├── requirements.txt ├── tests ├── __init__.py ├── requirements.txt ├── test_superfast.py ├── test_lookup3.py ├── test_mum.py ├── test_wyhash.py ├── test_issues.py ├── test_fnv1.py ├── test_xxhash.py ├── test_highway.py ├── test_hasher.py ├── test_halftime.py ├── test_t1hash.py ├── test_farmhash.py ├── test_cityhash.py ├── test_spooky.py ├── test_fingerprint.py ├── conftest.py ├── test_metrohash.py └── test_murmur.py ├── .coveragerc ├── MANIFEST.in ├── pyproject.toml ├── setup.cfg ├── .gitignore ├── .codecov.yml ├── tox.ini ├── src ├── fnv │ ├── longlong.h │ ├── have_ulong64.c │ ├── hash_32a.c │ ├── hash_32.c │ ├── README │ ├── fnv.h │ ├── Makefile │ ├── hash_64a.c │ ├── hash_64.c │ └── fnv32.c ├── SuperFastHash.h ├── Mum.h ├── wyHash.h ├── SuperFastHash │ └── SuperFastHash.c ├── FNV1.h ├── Lookup3.h ├── xxHash.h ├── T1ha.h ├── Highway.h ├── SpookyHash.h ├── FarmHash.h ├── Halftime.h ├── MetroHash.h ├── MurmurHash.h ├── Hash.cpp ├── CityHash.h └── Hash.h ├── .gitmodules ├── .travis.yml ├── .github └── workflows │ ├── codeql-analysis.yml │ └── ci.yml ├── pyhash └── __init__.py ├── README.md ├── setup.py └── LICENSE.txt /requirements.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source = pyhash 3 | omit = 4 | setup.py 5 | .eggs/* 6 | .tox/* 7 | build/* 8 | tests/* 9 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include LICENSE.txt 3 | include requirements.txt 4 | recursive-include src *.h *.c *.cc *.cpp 5 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | build-backend = "setuptools.build_meta" 3 | requires = ["setuptools>=42", "wheel", "pybind11>=2.8.1"] 4 | -------------------------------------------------------------------------------- /tests/requirements.txt: -------------------------------------------------------------------------------- 1 | pytest >= 4.6 2 | pytest-benchmark >= 3.4 3 | pytest-cov >= 2.12 4 | codecov >= 2.1 5 | cpuid ; platform_machine == "x86_64" 6 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | license_file = LICENSE.txt 3 | 4 | [bdist_wheel] 5 | universal = 1 6 | 7 | [aliases] 8 | test = pytest 9 | 10 | [tool:pytest] 11 | norecursedirs = .eggs src build 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | .vscode 3 | build 4 | dist 5 | pyhash.egg-info 6 | .tox 7 | .eggs 8 | .pytest_cache 9 | .coverage 10 | 11 | setuptools* 12 | *.pyc 13 | *.pyd 14 | *.so 15 | *.pdb 16 | *.xml 17 | -------------------------------------------------------------------------------- /.codecov.yml: -------------------------------------------------------------------------------- 1 | ignore: 2 | - "src/fnv" 3 | - "src/halftime" 4 | - "src/highwayhash" 5 | - "src/lookup3" 6 | - "src/smhasher" 7 | - "src/SuperFastHash" 8 | - "src/wyhash" 9 | - "src/xxHash" 10 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py{27,39},pypy{,3} 3 | 4 | [testenv] 5 | commands = pytest --benchmark-disable --cov=./ -v 6 | 7 | [testenv:{py27,pypy}] 8 | deps = pytest-runner 9 | pytest-benchmark 10 | 11 | [testenv:{py39,pypy3}] 12 | deps = -r{toxinidir}/requirements.txt 13 | -r{toxinidir}/tests/requirements.txt 14 | -------------------------------------------------------------------------------- /src/fnv/longlong.h: -------------------------------------------------------------------------------- 1 | /* 2 | * DO NOT EDIT -- generated by the Makefile 3 | */ 4 | 5 | #if !defined(__LONGLONG_H__) 6 | #define __LONGLONG_H__ 7 | 8 | /* do we have/want to use a long long type? */ 9 | #define HAVE_64BIT_LONG_LONG /* yes */ 10 | 11 | /* 12 | * NO64BIT_LONG_LONG undef HAVE_64BIT_LONG_LONG 13 | */ 14 | #if defined(NO64BIT_LONG_LONG) 15 | #undef HAVE_64BIT_LONG_LONG 16 | #endif /* NO64BIT_LONG_LONG */ 17 | 18 | #endif /* !__LONGLONG_H__ */ 19 | -------------------------------------------------------------------------------- /tests/test_superfast.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import pyhash 4 | 5 | 6 | def test_lookup3(hash_tester): 7 | hash_tester(hasher_type=pyhash.super_fast_hash, 8 | bytes_hash=942683319, 9 | seed_hash=777359542, 10 | unicode_hash=1430748046) 11 | 12 | 13 | @pytest.mark.benchmark(group='hash32', disable_gc=True) 14 | def test_super_fast_hash_perf(benchmark, hash_bencher): 15 | hash_bencher(benchmark, pyhash.super_fast_hash, 2804200527) 16 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "src/smhasher"] 2 | path = src/smhasher 3 | url = https://github.com/rurban/smhasher.git 4 | [submodule "src/pybind11"] 5 | path = src/pybind11 6 | url = https://github.com/pybind/pybind11.git 7 | [submodule "src/highwayhash"] 8 | path = src/highwayhash 9 | url = https://github.com/google/highwayhash.git 10 | [submodule "src/xxHash"] 11 | path = src/xxHash 12 | url = https://github.com/Cyan4973/xxHash.git 13 | [submodule "src/wyhash"] 14 | path = src/wyhash 15 | url = https://github.com/wangyi-fudan/wyhash.git 16 | [submodule "src/halftime"] 17 | path = src/halftime 18 | url = https://github.com/jbapple/HalftimeHash.git 19 | -------------------------------------------------------------------------------- /tests/test_lookup3.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import pyhash 4 | 5 | 6 | def test_lookup3(hash_tester): 7 | hash_tester(hasher_type=pyhash.lookup3, 8 | bytes_hash=3188463954, 9 | seed_hash=478901866, 10 | unicode_hash=1380664715) 11 | 12 | 13 | def test_lookup3_big(hash_tester): 14 | hash_tester(hasher_type=pyhash.lookup3_big, 15 | bytes_hash=305759528, 16 | seed_hash=1889773852, 17 | unicode_hash=1487153094) 18 | 19 | 20 | @pytest.mark.benchmark(group='hash32', disable_gc=True) 21 | def test_lookup3_perf(benchmark, hash_bencher): 22 | hash_bencher(benchmark, pyhash.lookup3, 3792570419) 23 | -------------------------------------------------------------------------------- /tests/test_mum.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | 5 | import pyhash 6 | 7 | 8 | def test_mum64(hash_tester, is_msvc): 9 | hash_tester(hasher_type=pyhash.mum_64, 10 | bytes_hash=12122843130624056202 if is_msvc else 8715813407503360407, 11 | seed_hash=14905784849636620642 if is_msvc else 1160173209250992409, 12 | unicode_hash=366515711009433586 if is_msvc else 16548684777514844522) 13 | 14 | 15 | @pytest.mark.benchmark(group='hash64', disable_gc=True) 16 | def test_mum_hash3_perf(benchmark, hash_bencher, is_msvc): 17 | hash_bencher(benchmark, pyhash.mum_64, 18 | 16713191835145177100 if is_msvc else 5704960907050105809) 19 | -------------------------------------------------------------------------------- /src/SuperFastHash.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Hash.h" 4 | 5 | /** 6 | * http://www.azillionmonkeys.com/qed/hash.html 7 | */ 8 | 9 | extern "C" uint32_t SuperFastHash(const char *data, int len, uint32_t hash); 10 | 11 | class super_fast_hash_t : public Hasher 12 | { 13 | public: 14 | typedef Hasher __hasher_t; 15 | typedef typename __hasher_t::hash_value_t hash_value_t; 16 | typedef typename __hasher_t::seed_value_t seed_value_t; 17 | 18 | super_fast_hash_t(seed_value_t seed = 0) : __hasher_t(seed) {} 19 | 20 | const hash_value_t operator()(void *buf, size_t len, seed_value_t seed) const 21 | { 22 | return SuperFastHash((const char *)buf, (int)len, seed); 23 | } 24 | }; 25 | -------------------------------------------------------------------------------- /src/Mum.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Hash.h" 4 | 5 | #include "smhasher/mum.h" 6 | 7 | template 8 | class mum_hash_t : public Hasher, T> 9 | { 10 | public: 11 | typedef Hasher, T> __hasher_t; 12 | typedef typename __hasher_t::hash_value_t hash_value_t; 13 | typedef typename __hasher_t::seed_value_t seed_value_t; 14 | 15 | mum_hash_t(seed_value_t seed = 0) : __hasher_t(seed) {} 16 | 17 | const hash_value_t operator()(void *buf, size_t len, seed_value_t seed) const; 18 | }; 19 | 20 | typedef mum_hash_t mum_hash_64_t; 21 | 22 | template <> 23 | const mum_hash_64_t::hash_value_t mum_hash_64_t::operator()(void *buf, size_t len, mum_hash_64_t::seed_value_t seed) const 24 | { 25 | return mum_hash(buf, len, seed); 26 | } 27 | -------------------------------------------------------------------------------- /tests/test_wyhash.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import pyhash 4 | 5 | 6 | def test_wy_32(hash_tester): 7 | hash_tester(hasher_type=pyhash.wy_32, 8 | bytes_hash=3257032231, 9 | seed_hash=4286430599, 10 | unicode_hash=213393459) 11 | 12 | 13 | def test_wy_64(hash_tester): 14 | hash_tester(hasher_type=pyhash.wy_64, 15 | bytes_hash=13282522921993940974, 16 | seed_hash=10112467932320148695, 17 | unicode_hash=2990017137888294531) 18 | 19 | 20 | @pytest.mark.benchmark(group='hash32', disable_gc=True) 21 | def test_wy_hash32_perf(benchmark, hash_bencher): 22 | hash_bencher(benchmark, pyhash.wy_32, 2032615721) 23 | 24 | 25 | @pytest.mark.benchmark(group='hash64', disable_gc=True) 26 | def test_wy_hash64_perf(benchmark, hash_bencher): 27 | hash_bencher(benchmark, pyhash.wy_64, 16560810041235762008) 28 | -------------------------------------------------------------------------------- /tests/test_issues.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | import sys 4 | 5 | import pytest 6 | 7 | import pyhash 8 | 9 | # https://github.com/flier/pyfasthash/issues/3 10 | 11 | 12 | def test_error_return_none(): 13 | if hasattr(sys, 'getrefcount'): # skip pypy 14 | h = pyhash.fnv1_64() 15 | 16 | old_refcnt = sys.getrefcount(None) 17 | 18 | for _ in range(10000): 19 | try: 20 | h(None) 21 | 22 | pytest.fail("fail to raise exception") 23 | except TypeError: 24 | pass 25 | 26 | new_refcnt = sys.getrefcount(None) 27 | 28 | assert old_refcnt >= new_refcnt 29 | 30 | # https://github.com/flier/pyfasthash/issues/24 31 | 32 | 33 | def test_default_string_type(): 34 | hasher = pyhash.murmur3_32() 35 | 36 | assert hasher('foo') == hasher(u'foo') 37 | assert hasher('foo') != hasher(b'foo') 38 | -------------------------------------------------------------------------------- /src/wyHash.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Hash.h" 4 | 5 | #include "wyhash/wyhash.h" 6 | #include "wyhash/wyhash32.h" 7 | 8 | template 9 | class wy_hash_t : public Hasher, T> 10 | { 11 | public: 12 | typedef Hasher, T> __hasher_t; 13 | typedef typename __hasher_t::hash_value_t hash_value_t; 14 | typedef typename __hasher_t::seed_value_t seed_value_t; 15 | 16 | wy_hash_t(seed_value_t seed = 0) : __hasher_t(seed) {} 17 | 18 | const hash_value_t operator()(void *buf, size_t len, seed_value_t seed) const; 19 | }; 20 | 21 | typedef wy_hash_t wy_hash_32_t; 22 | typedef wy_hash_t wy_hash_64_t; 23 | 24 | template <> 25 | const wy_hash_32_t::hash_value_t wy_hash_32_t::operator()(void *buf, size_t len, wy_hash_32_t::seed_value_t seed) const 26 | { 27 | return wyhash32(buf, len, seed); 28 | } 29 | template <> 30 | const wy_hash_64_t::hash_value_t wy_hash_64_t::operator()(void *buf, size_t len, wy_hash_64_t::seed_value_t seed) const 31 | { 32 | return wyhash(buf, len, seed, _wyp); 33 | } 34 | -------------------------------------------------------------------------------- /tests/test_fnv1.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import pyhash 4 | 5 | 6 | def test_fnv1_32(hash_tester): 7 | hash_tester(hasher_type=pyhash.fnv1_32, 8 | bytes_hash=3698262380, 9 | seed_hash=660137056, 10 | unicode_hash=3910690890) 11 | 12 | 13 | def test_fnv1a_32(hash_tester): 14 | hash_tester(hasher_type=pyhash.fnv1a_32, 15 | bytes_hash=1858026756, 16 | seed_hash=1357873952, 17 | unicode_hash=996945022) 18 | 19 | 20 | def test_fnv1_64(hash_tester): 21 | hash_tester(hasher_type=pyhash.fnv1_64, 22 | bytes_hash=17151984479173897804, 23 | seed_hash=6349570372626520864, 24 | unicode_hash=14017453969697934794) 25 | 26 | 27 | def test_fnv1a_64(hash_tester): 28 | hash_tester(hasher_type=pyhash.fnv1a_64, 29 | bytes_hash=11830222609977404196, 30 | seed_hash=8858165303110309728, 31 | unicode_hash=14494269412771327550) 32 | 33 | 34 | @pytest.mark.benchmark(group='hash32', disable_gc=True) 35 | def test_fnv1_32_perf(benchmark, hash_bencher): 36 | hash_bencher(benchmark, pyhash.fnv1_32, 4117514240) 37 | 38 | 39 | @pytest.mark.benchmark(group='hash32', disable_gc=True) 40 | def test_fnv1a_32_perf(benchmark, hash_bencher): 41 | hash_bencher(benchmark, pyhash.fnv1a_32, 1500862464) 42 | 43 | 44 | @pytest.mark.benchmark(group='hash64', disable_gc=True) 45 | def test_fnv1_64_perf(benchmark, hash_bencher): 46 | hash_bencher(benchmark, pyhash.fnv1_64, 487086381785722880) 47 | 48 | 49 | @pytest.mark.benchmark(group='hash64', disable_gc=True) 50 | def test_fnv1a_64_perf(benchmark, hash_bencher): 51 | hash_bencher(benchmark, pyhash.fnv1a_64, 13917847256464560128) 52 | -------------------------------------------------------------------------------- /src/SuperFastHash/SuperFastHash.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #ifdef WIN32 4 | typedef unsigned char uint8_t; 5 | typedef unsigned short uint16_t; 6 | typedef unsigned int uint32_t; 7 | #else 8 | #include 9 | #endif 10 | 11 | #undef get16bits 12 | #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ 13 | || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) 14 | #define get16bits(d) (*((const uint16_t *) (d))) 15 | #endif 16 | 17 | #if !defined (get16bits) 18 | #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\ 19 | +(uint32_t)(((const uint8_t *)(d))[0]) ) 20 | #endif 21 | 22 | uint32_t SuperFastHash (const char * data, int len, uint32_t hash) { 23 | uint32_t tmp; 24 | int rem; 25 | 26 | if (len <= 0 || data == NULL) return 0; 27 | 28 | rem = len & 3; 29 | len >>= 2; 30 | 31 | /* Main loop */ 32 | for (;len > 0; len--) { 33 | hash += get16bits (data); 34 | tmp = (get16bits (data+2) << 11) ^ hash; 35 | hash = (hash << 16) ^ tmp; 36 | data += 2*sizeof (uint16_t); 37 | hash += hash >> 11; 38 | } 39 | 40 | /* Handle end cases */ 41 | switch (rem) { 42 | case 3: hash += get16bits (data); 43 | hash ^= hash << 16; 44 | hash ^= data[sizeof (uint16_t)] << 18; 45 | hash += hash >> 11; 46 | break; 47 | case 2: hash += get16bits (data); 48 | hash ^= hash << 11; 49 | hash += hash >> 17; 50 | break; 51 | case 1: hash += *data; 52 | hash ^= hash << 10; 53 | hash += hash >> 1; 54 | } 55 | 56 | /* Force "avalanching" of final 127 bits */ 57 | hash ^= hash << 3; 58 | hash += hash >> 5; 59 | hash ^= hash << 4; 60 | hash += hash >> 17; 61 | hash ^= hash << 25; 62 | hash += hash >> 6; 63 | 64 | return hash; 65 | } -------------------------------------------------------------------------------- /src/fnv/have_ulong64.c: -------------------------------------------------------------------------------- 1 | /* 2 | * have_ulong64 - Determine if we have a 64 bit unsigned long long 3 | * 4 | * usage: 5 | * have_ulong64 > longlong.h 6 | * 7 | * Not all systems have a 'long long type' so this may not compile on 8 | * your system. 9 | * 10 | * This prog outputs the define: 11 | * 12 | * HAVE_64BIT_LONG_LONG 13 | * defined ==> we have a 64 bit unsigned long long 14 | * undefined ==> we must simulate a 64 bit unsigned long long 15 | */ 16 | /* 17 | * 18 | * Please do not copyright this code. This code is in the public domain. 19 | * 20 | * LANDON CURT NOLL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 21 | * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO 22 | * EVENT SHALL LANDON CURT NOLL BE LIABLE FOR ANY SPECIAL, INDIRECT OR 23 | * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF 24 | * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR 25 | * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 26 | * PERFORMANCE OF THIS SOFTWARE. 27 | * 28 | * By: 29 | * chongo /\oo/\ 30 | * http://www.isthe.com/chongo/ 31 | * 32 | * Share and Enjoy! :-) 33 | */ 34 | 35 | /* 36 | * have the compiler try its hand with unsigned and signed long longs 37 | */ 38 | #if ! defined(NO64BIT_LONG_LONG) 39 | unsigned long long val = 1099511628211ULL; 40 | #endif /* NO64BIT_LONG_LONG */ 41 | 42 | int 43 | main(void) 44 | { 45 | /* 46 | * ensure that the length of long long val is what we expect 47 | */ 48 | #if defined(NO64BIT_LONG_LONG) 49 | printf("#undef HAVE_64BIT_LONG_LONG\t/* no */\n"); 50 | #else /* NO64BIT_LONG_LONG */ 51 | if (val == 1099511628211ULL && sizeof(val) == 8) { 52 | printf("#define HAVE_64BIT_LONG_LONG\t/* yes */\n"); 53 | } 54 | #endif /* NO64BIT_LONG_LONG */ 55 | 56 | /* exit(0); */ 57 | return 0; 58 | } 59 | -------------------------------------------------------------------------------- /src/FNV1.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Hash.h" 4 | 5 | #include "fnv/fnv.h" 6 | 7 | /** 8 | * FNV hashes are designed to be fast while maintaining a low collision rate. 9 | * The FNV speed allows one to quickly hash lots of data while maintaining 10 | * a reasonable collision rate. The high dispersion of the FNV hashes 11 | * makes them well suited for hashing nearly identical strings such as URLs, 12 | * hostnames, filenames, text, IP addresses, etc. 13 | * 14 | * http://isthe.com/chongo/tech/comp/fnv/ 15 | * 16 | */ 17 | template 18 | class fnv_t : public Hasher, T> 19 | { 20 | public: 21 | typedef Hasher, T> __hasher_t; 22 | typedef typename __hasher_t::hash_value_t hash_value_t; 23 | typedef typename __hasher_t::seed_value_t seed_value_t; 24 | 25 | fnv_t(seed_value_t seed = 0) : __hasher_t(seed) {} 26 | 27 | const hash_value_t operator()(void *buf, size_t len, seed_value_t seed) const; 28 | }; 29 | 30 | typedef fnv_t fnv1_32_t; 31 | typedef fnv_t fnv1a_32_t; 32 | typedef fnv_t fnv1_64_t; 33 | typedef fnv_t fnv1a_64_t; 34 | 35 | template <> 36 | const fnv1_32_t::hash_value_t fnv1_32_t::operator()(void *buf, size_t len, fnv1_32_t::seed_value_t seed) const 37 | { 38 | return fnv_32_buf(buf, len, seed); 39 | } 40 | 41 | template <> 42 | const fnv1a_32_t::hash_value_t fnv1a_32_t::operator()(void *buf, size_t len, fnv1a_32_t::seed_value_t seed) const 43 | { 44 | return fnv_32a_buf(buf, len, seed); 45 | } 46 | 47 | template <> 48 | const fnv1_64_t::hash_value_t fnv1_64_t::operator()(void *buf, size_t len, fnv1_64_t::seed_value_t seed) const 49 | { 50 | return fnv_64_buf(buf, len, seed); 51 | } 52 | 53 | template <> 54 | const fnv1a_64_t::hash_value_t fnv1a_64_t::operator()(void *buf, size_t len, fnv1a_64_t::seed_value_t seed) const 55 | { 56 | return fnv_64a_buf(buf, len, seed); 57 | } 58 | -------------------------------------------------------------------------------- /tests/test_xxhash.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import pyhash 4 | 5 | 6 | def test_xx_32(hash_tester): 7 | hash_tester(hasher_type=pyhash.xx_32, 8 | bytes_hash=1042293711, 9 | seed_hash=1018767936, 10 | unicode_hash=2783988247) 11 | 12 | 13 | def test_xx_64(hash_tester): 14 | hash_tester(hasher_type=pyhash.xx_64, 15 | bytes_hash=5754696928334414137, 16 | seed_hash=12934826212537126916, 17 | unicode_hash=16125048496228390453) 18 | 19 | 20 | def test_xxh3_64(hash_tester): 21 | hash_tester(hasher_type=pyhash.xxh3_64, 22 | bytes_hash=9511462701433476418, 23 | seed_hash=18431907721717861993, 24 | unicode_hash=9339502706477692137) 25 | 26 | 27 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 28 | def test_xxh3_128(hash_tester): 29 | hash_tester(hasher_type=pyhash.xxh3_128, 30 | bytes_hash=61773019920352653487352012421565896002, 31 | seed_hash=75077604214798731190000330999719120489, 32 | unicode_hash=253138563925068684169935446223964533993) 33 | 34 | 35 | @pytest.mark.benchmark(group='hash32', disable_gc=True) 36 | def test_xx_hash32_perf(benchmark, hash_bencher): 37 | hash_bencher(benchmark, pyhash.xx_32, 1497633363) 38 | 39 | 40 | @pytest.mark.benchmark(group='hash64', disable_gc=True) 41 | def test_xx_hash64_perf(benchmark, hash_bencher): 42 | hash_bencher(benchmark, pyhash.xx_64, 2282408585429094475) 43 | 44 | 45 | @pytest.mark.benchmark(group='hash64', disable_gc=True) 46 | def test_xxh3_hash64_perf(benchmark, hash_bencher): 47 | hash_bencher(benchmark, pyhash.xxh3_64, 5383753519105369680) 48 | 49 | 50 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 51 | @pytest.mark.benchmark(group='hash128', disable_gc=True) 52 | def test_xxh3_hash128_perf(benchmark, hash_bencher): 53 | hash_bencher(benchmark, pyhash.xxh3_128, 54 | 38410093203896075778304082117375728449) 55 | -------------------------------------------------------------------------------- /src/Lookup3.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Hash.h" 4 | 5 | /** 6 | * http://burtleburtle.net/bob/hash/doobs.html 7 | */ 8 | extern "C" 9 | { 10 | 11 | uint32_t hashword(const uint32_t *k, /* the key, an array of uint32_t values */ 12 | size_t length, /* the length of the key, in uint32_ts */ 13 | uint32_t initval); 14 | 15 | void hashword2(const uint32_t *k, /* the key, an array of uint32_t values */ 16 | size_t length, /* the length of the key, in uint32_ts */ 17 | uint32_t *pc, /* IN: seed OUT: primary hash value */ 18 | uint32_t *pb); /* IN: more seed OUT: secondary hash value */ 19 | 20 | uint32_t hashlittle(const void *key, size_t length, uint32_t initval); 21 | 22 | void hashlittle2(const void *key, /* the key to hash */ 23 | size_t length, /* length of the key */ 24 | uint32_t *pc, /* IN: primary initval, OUT: primary hash */ 25 | uint32_t *pb); /* IN: secondary initval, OUT: secondary hash */ 26 | 27 | uint32_t hashbig(const void *key, size_t length, uint32_t initval); 28 | } 29 | 30 | template 31 | class lookup3_t : public Hasher, uint32_t> 32 | { 33 | public: 34 | typedef Hasher, uint32_t> __hasher_t; 35 | typedef typename __hasher_t::hash_value_t hash_value_t; 36 | typedef typename __hasher_t::seed_value_t seed_value_t; 37 | 38 | lookup3_t(seed_value_t seed = 0) : __hasher_t(seed) {} 39 | 40 | const hash_value_t operator()(void *buf, size_t len, seed_value_t seed) const; 41 | }; 42 | 43 | typedef lookup3_t lookup3_little_t; 44 | typedef lookup3_t lookup3_big_t; 45 | 46 | template <> 47 | const lookup3_little_t::hash_value_t lookup3_little_t::operator()(void *buf, size_t len, lookup3_little_t::seed_value_t seed) const 48 | { 49 | return hashlittle(buf, len, seed); 50 | } 51 | 52 | template <> 53 | const lookup3_big_t::hash_value_t lookup3_big_t::operator()(void *buf, size_t len, lookup3_big_t::seed_value_t seed) const 54 | { 55 | return hashbig(buf, len, seed); 56 | } 57 | -------------------------------------------------------------------------------- /tests/test_highway.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import pyhash 4 | 5 | 6 | def test_highway_64(hash_tester): 7 | hash_tester(hasher_type=pyhash.highway_64, 8 | bytes_hash=10478741295963822880, 9 | seed_hash=10160071405899912585, 10 | unicode_hash=12146505054120333431) 11 | 12 | 13 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 14 | def test_highway_128(hash_tester): 15 | hash_tester(hasher_type=pyhash.highway_128, 16 | bytes_hash=205029416337089142837388334492957817459, 17 | seed_hash=[131606380489010322043134332560055467821, 18 | 49445702356125343135034790375575615434], 19 | unicode_hash=106097529843409528118081989705354610918) 20 | 21 | 22 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 23 | def test_highway_256(hash_tester): 24 | hash_tester(hasher_type=pyhash.highway_256, 25 | bytes_hash=81695253358482264846640254134214061745359108833184802399504321540179680608337, 26 | seed_hash=38580172915253542762608044587659714789132854339955812293479199758918380945040, 27 | unicode_hash=82825971135523989855392223965598679755378386501396024912896994597279835575349) 28 | 29 | 30 | @pytest.mark.benchmark(group='hash64', disable_gc=True) 31 | def bench_highway_64(benchmark, hash_bencher): 32 | hash_bencher(benchmark, pyhash.highway_64, 17171225769172857249) 33 | 34 | 35 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 36 | @pytest.mark.benchmark(group='hash128', disable_gc=True) 37 | def bench_highway_128(benchmark, hash_bencher): 38 | hash_bencher(benchmark, pyhash.highway_128, 39 | 263168739977411690410017013291704716368) 40 | 41 | 42 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 43 | @pytest.mark.benchmark(group='hash128', disable_gc=True) 44 | def bench_highway_256(benchmark, hash_bencher): 45 | hash_bencher(benchmark, pyhash.highway_256, 46 | 263168739977411690410017013291704716368) 47 | -------------------------------------------------------------------------------- /src/xxHash.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Hash.h" 4 | 5 | #define XXH_STATIC_LINKING_ONLY 6 | 7 | #include "xxHash/xxhash.h" 8 | 9 | template 10 | class xx_hash_t : public Hasher, T> 11 | { 12 | public: 13 | typedef Hasher, T> __hasher_t; 14 | typedef typename __hasher_t::hash_value_t hash_value_t; 15 | typedef typename __hasher_t::seed_value_t seed_value_t; 16 | 17 | xx_hash_t(seed_value_t seed = 0) : __hasher_t(seed) {} 18 | 19 | const hash_value_t operator()(void *buf, size_t len, seed_value_t seed) const; 20 | }; 21 | 22 | typedef xx_hash_t xx_hash_32_t; 23 | typedef xx_hash_t xx_hash_64_t; 24 | 25 | template <> 26 | const xx_hash_32_t::hash_value_t xx_hash_32_t::operator()(void *buf, size_t len, xx_hash_32_t::seed_value_t seed) const 27 | { 28 | return XXH32(buf, len, seed); 29 | } 30 | template <> 31 | const xx_hash_64_t::hash_value_t xx_hash_64_t::operator()(void *buf, size_t len, xx_hash_64_t::seed_value_t seed) const 32 | { 33 | return XXH64(buf, len, seed); 34 | } 35 | 36 | template 37 | class xxh3_hash_t : public Hasher, T> 38 | { 39 | public: 40 | typedef Hasher, T> __hasher_t; 41 | typedef typename __hasher_t::hash_value_t hash_value_t; 42 | typedef typename __hasher_t::seed_value_t seed_value_t; 43 | 44 | xxh3_hash_t(seed_value_t seed = 0) : __hasher_t(seed) {} 45 | 46 | const hash_value_t operator()(void *buf, size_t len, seed_value_t seed) const; 47 | }; 48 | 49 | typedef xxh3_hash_t xxh3_hash_64_t; 50 | #ifdef SUPPORT_INT128 51 | typedef xxh3_hash_t xxh3_hash_128_t; 52 | #endif 53 | 54 | template <> 55 | const xxh3_hash_64_t::hash_value_t xxh3_hash_64_t::operator()(void *buf, size_t len, xxh3_hash_64_t::seed_value_t seed) const 56 | { 57 | return XXH3_64bits_withSeed(buf, len, seed); 58 | } 59 | 60 | #ifdef SUPPORT_INT128 61 | template <> 62 | const xxh3_hash_128_t::hash_value_t xxh3_hash_128_t::operator()(void *buf, size_t len, xxh3_hash_128_t::seed_value_t seed) const 63 | { 64 | XXH128_hash_t hash = XXH3_128bits_withSeed(buf, len, seed); 65 | 66 | return U128_NEW(hash.low64, hash.high64); 67 | } 68 | #endif 69 | -------------------------------------------------------------------------------- /src/T1ha.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Hash.h" 4 | 5 | #include "smhasher/t1ha.h" 6 | 7 | enum t1_hash_a 8 | { 9 | t1ha2_atonce_a, 10 | t1ha2_atonce128_a, 11 | t1ha1_le_a, 12 | t1ha1_be_a, 13 | t1ha0_a, 14 | }; 15 | 16 | template 17 | class t1_hash_t : public Hasher, uint64_t, T> 18 | { 19 | public: 20 | typedef Hasher, uint64_t, T> __hasher_t; 21 | typedef typename __hasher_t::hash_value_t hash_value_t; 22 | typedef typename __hasher_t::seed_value_t seed_value_t; 23 | 24 | t1_hash_t(seed_value_t seed = 0) : __hasher_t(seed) {} 25 | 26 | const hash_value_t operator()(void *buf, size_t len, seed_value_t seed) const; 27 | }; 28 | 29 | typedef t1_hash_t t1ha2_atonce_t; 30 | #if defined(SUPPORT_INT128) 31 | typedef t1_hash_t t1ha2_atonce128_t; 32 | #endif 33 | typedef t1_hash_t t1ha1_le_t; 34 | typedef t1_hash_t t1ha1_be_t; 35 | typedef t1_hash_t t1ha0_t; 36 | 37 | template <> 38 | const t1ha2_atonce_t::hash_value_t t1ha2_atonce_t::operator()(void *buf, size_t len, t1ha2_atonce_t::seed_value_t seed) const 39 | { 40 | return t1ha2_atonce(buf, len, seed); 41 | } 42 | 43 | #if defined(SUPPORT_INT128) 44 | template <> 45 | const t1ha2_atonce128_t::hash_value_t t1ha2_atonce128_t::operator()(void *buf, size_t len, t1ha2_atonce128_t::seed_value_t seed) const 46 | { 47 | uint64_t hi = 0; 48 | uint64_t lo = t1ha2_atonce128(&hi, buf, len, seed); 49 | 50 | return U128_NEW(lo, hi); 51 | } 52 | #endif 53 | 54 | template <> 55 | const t1ha1_le_t::hash_value_t t1ha1_le_t::operator()(void *buf, size_t len, t1ha1_le_t::seed_value_t seed) const 56 | { 57 | return t1ha1_le(buf, len, seed); 58 | } 59 | 60 | template <> 61 | const t1ha1_be_t::hash_value_t t1ha1_be_t::operator()(void *buf, size_t len, t1ha1_be_t::seed_value_t seed) const 62 | { 63 | return t1ha1_be(buf, len, seed); 64 | } 65 | 66 | template <> 67 | const t1ha0_t::hash_value_t t1ha0_t::operator()(void *buf, size_t len, t1ha0_t::seed_value_t seed) const 68 | { 69 | static auto t1ha0_funcptr = t1ha0_resolve(); 70 | 71 | return t1ha0_funcptr(buf, len, seed); 72 | } 73 | -------------------------------------------------------------------------------- /src/Highway.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Hash.h" 4 | 5 | #include "highwayhash/highwayhash/highwayhash_target.h" 6 | #include "highwayhash/highwayhash/instruction_sets.h" 7 | 8 | #ifdef SUPPORT_INT128 9 | 10 | template 11 | class hightway_hash_t : public Hasher, uint256_t, T> 12 | { 13 | public: 14 | typedef Hasher, uint256_t, T> __hasher_t; 15 | typedef typename __hasher_t::hash_value_t hash_value_t; 16 | typedef typename __hasher_t::seed_value_t seed_value_t; 17 | 18 | hightway_hash_t(seed_value_t seed = 0) : __hasher_t(seed) {} 19 | 20 | const hash_value_t operator()(void *buf, size_t len, seed_value_t seed) const; 21 | }; 22 | 23 | typedef hightway_hash_t highway_hash_64_t; 24 | 25 | template <> 26 | const highway_hash_64_t::hash_value_t highway_hash_64_t::operator()(void *buf, size_t len, highway_hash_64_t::seed_value_t seed) const 27 | { 28 | highwayhash::HHResult64 result; 29 | highwayhash::InstructionSets::Run( 30 | *reinterpret_cast(&seed), 31 | reinterpret_cast(buf), len, &result); 32 | return result; 33 | } 34 | 35 | typedef hightway_hash_t highway_hash_128_t; 36 | 37 | template <> 38 | const highway_hash_128_t::hash_value_t highway_hash_128_t::operator()(void *buf, size_t len, highway_hash_128_t::seed_value_t seed) const 39 | { 40 | highwayhash::HHResult128 result; 41 | highwayhash::InstructionSets::Run( 42 | *reinterpret_cast(&seed), 43 | reinterpret_cast(buf), len, &result); 44 | return U128_NEW(result[0], result[1]); 45 | } 46 | 47 | typedef hightway_hash_t highway_hash_256_t; 48 | 49 | template <> 50 | const highway_hash_256_t::hash_value_t highway_hash_256_t::operator()(void *buf, size_t len, highway_hash_256_t::seed_value_t seed) const 51 | { 52 | highwayhash::HHResult256 result; 53 | highwayhash::InstructionSets::Run( 54 | *reinterpret_cast(&seed), 55 | reinterpret_cast(buf), len, &result); 56 | 57 | uint256_t hash; 58 | std::move(std::begin(result), std::end(result), hash.begin()); 59 | return hash; 60 | } 61 | 62 | #endif 63 | -------------------------------------------------------------------------------- /tests/test_hasher.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | import sys 4 | import platform 5 | 6 | import pytest 7 | 8 | import pyhash 9 | 10 | 11 | def test_string(city_64): 12 | hasher = city_64() 13 | 14 | assert hasher 15 | assert hasattr(hasher, 'seed') 16 | 17 | assert hasher('hello') == 359204553733634674 18 | assert hasher('hello world') == 489096247858400539 19 | assert hasher('hello', ' ', 'world') == 416726000223957297 20 | assert hasher('world', seed=hasher( 21 | ' ', seed=hasher('hello'))) == 416726000223957297 22 | 23 | assert hasher(b'hello') == 2578220239953316063 24 | 25 | assert hasher('') == 11160318154034397263 26 | assert hasher(u'') == 11160318154034397263 27 | assert hasher(b'') == 11160318154034397263 28 | 29 | 30 | def test_list(city_64): 31 | hasher = city_64() 32 | 33 | with pytest.raises(TypeError, match="unsupported argument type"): 34 | assert hasher(list(b'hello')) == 2578220239953316063 35 | 36 | 37 | def test_array(city_64): 38 | from array import array 39 | 40 | hasher = city_64() 41 | 42 | assert hasher(array('B', b'hello')) == 2578220239953316063 43 | 44 | 45 | def test_buffer(city_64): 46 | if sys.version_info.major < 3: 47 | hasher = city_64() 48 | 49 | assert hasher(buffer(b'hello')) == 2578220239953316063 50 | 51 | 52 | def test_bufferview(city_64): 53 | hasher = city_64() 54 | 55 | assert hasher(memoryview(b'hello')) == 2578220239953316063 56 | 57 | 58 | def test_bytearray(city_64): 59 | hasher = city_64() 60 | 61 | assert hasher(bytearray(b'hello')) == 2578220239953316063 62 | 63 | 64 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 65 | def test_error(city_64, city_128): 66 | if sys.version_info.major < 3: 67 | with pytest.raises(TypeError): 68 | city_64.__call__() 69 | 70 | with pytest.raises(TypeError): 71 | city_64.__call__(None) 72 | 73 | with pytest.raises(TypeError): 74 | city_64.__call__(city_128()) 75 | else: 76 | with pytest.raises(ValueError): 77 | city_64.__call__() 78 | 79 | with pytest.raises(TypeError): 80 | city_64.__call__(None) 81 | 82 | with pytest.raises(RuntimeError): 83 | city_64.__call__(city_128()) 84 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: precise 2 | sudo: false 3 | language: python 4 | cache: pip 5 | matrix: 6 | include: 7 | - name: "Python 2.7 on Linux" 8 | python: "2.7" 9 | - name: "Python 3.6 on Linux" 10 | python: "3.6" 11 | - name: "Python 3.7 on Linux" 12 | python: "3.7" 13 | - name: "Python 3.8 on Linux" 14 | python: "3.8" 15 | - name: "Python 3.9 on Linux" 16 | python: "3.9" 17 | - name: "Python 3.9-dev on Linux" 18 | python: "3.9-dev" # 3.9 development branch 19 | - name: "Python nightly build on Linux" 20 | python: "nightly" # nightly build 21 | - name: "Python pypy2.7-7.3.1 on Linux" 22 | python: "pypy2.7-7.3.1" 23 | - name: "Python pypy3.6-7.3.3 on Linux" 24 | python: "pypy3.6-7.3.3" 25 | - name: "Python pypy3.7-7.3.5 on Linux" 26 | python: "pypy3.7-7.3.5" 27 | - name: "Python 3 on macOS 11.3" 28 | os: osx 29 | osx_image: xcode12.5 30 | language: shell 31 | - name: "Python 3.7 on Windows" 32 | os: windows 33 | language: shell 34 | before_install: 35 | - choco install python --version=3.7.2 36 | - python -m pip install --upgrade pip 37 | env: PATH=/c/Python37:/c/Python37/Scripts:$PATH 38 | allow_failures: 39 | - python: "3.9-dev" 40 | virtualenv: 41 | system_site_packages: true 42 | addons: 43 | apt: 44 | packages: 45 | - lcov 46 | homebrew: 47 | packages: 48 | - lcov 49 | env: 50 | - CFLAGS="-coverage" 51 | install: 52 | - pip install -U setuptools pip 53 | - pip install -r requirements.txt -r tests/requirements.txt 54 | - pip install . 55 | script: 56 | - python setup.py build_clib build_ext --inplace 57 | - pytest -v 58 | - pytest --benchmark-disable --cov=./ tests 59 | - if [ "$TRAVIS_OS_NAME" != "windows" ]; then lcov --capture --directory . --output-file build/coverage.info ; fi 60 | - if [ "$TRAVIS_OS_NAME" != "windows" ]; then lcov --remove build/coverage.info '/usr/*' "${PWD}/src/fnv/*" "${PWD}/src/lookup3/*" "${PWD}/src/SuperFastHash/*" "${PWD}/src/smhasher/*" "${PWD}/src/pybind11/*" -o build/coverage_filtered.info ; fi 61 | - if [ "$TRAVIS_OS_NAME" != "windows" ]; then genhtml --legend --title "${TRAVIS_COMMIT} ${TRAVIS_COMMIT_MESSAGE}" --prefix src build/coverage_filtered.info --output-directory build/coverage ; fi 62 | after_success: 63 | - bash <(curl -s https://codecov.io/bash) || echo 'Codecov failed to upload' 64 | - codecov 65 | -------------------------------------------------------------------------------- /tests/test_halftime.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import pyhash 4 | 5 | 6 | def test_halftime_64(hash_tester): 7 | hash_tester(hasher_type=pyhash.halftime_64, 8 | bytes_hash=3604972081320839471, 9 | seed_hash=3471423249075386634, 10 | unicode_hash=7775058808590938809) 11 | 12 | 13 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 14 | def test_halftime_128(hash_tester): 15 | hash_tester(hasher_type=pyhash.halftime_128, 16 | bytes_hash=1789925655390500234, 17 | seed_hash=1764839430745045589, 18 | unicode_hash=13450828806325116760) 19 | 20 | 21 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 22 | def test_halftime_256(hash_tester): 23 | hash_tester(hasher_type=pyhash.halftime_256, 24 | bytes_hash=7217693443645459132, 25 | seed_hash=2352364554368405048, 26 | unicode_hash=3229181546426569946) 27 | 28 | 29 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 30 | def test_halftime_512(hash_tester): 31 | hash_tester(hasher_type=pyhash.halftime_512, 32 | bytes_hash=11860383299129413215, 33 | seed_hash=14752946495531348805, 34 | unicode_hash=11130042267740264375) 35 | 36 | 37 | @pytest.mark.benchmark(group='hash64', disable_gc=True) 38 | def test_halftime_hash64_perf(benchmark, hash_bencher): 39 | hash_bencher(benchmark, pyhash.halftime_64, 16234903152174643107) 40 | 41 | 42 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 43 | @pytest.mark.benchmark(group='hash128', disable_gc=True) 44 | def test_halftime_hash128_perf(benchmark, hash_bencher): 45 | hash_bencher(benchmark, pyhash.halftime_128, 17751672293671634095) 46 | 47 | 48 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 49 | @pytest.mark.benchmark(group='hash256', disable_gc=True) 50 | def test_halftime_hash256_perf(benchmark, hash_bencher): 51 | hash_bencher(benchmark, pyhash.halftime_256, 17104156079727854476) 52 | 53 | 54 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 55 | @pytest.mark.benchmark(group='hash512', disable_gc=True) 56 | def test_halftime_hash512_perf(benchmark, hash_bencher): 57 | hash_bencher(benchmark, pyhash.halftime_512, 4920030102686204611) 58 | -------------------------------------------------------------------------------- /tests/test_t1hash.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import pyhash 4 | 5 | 6 | def test_t1ha2_atonce(hash_tester): 7 | hash_tester(hasher_type=pyhash.t1ha2_atonce, 8 | bytes_hash=11576265462006865275, 9 | seed_hash=9383269742356701786, 10 | unicode_hash=10647421798084574537) 11 | 12 | 13 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 14 | def test_t1ha2_atonce128(hash_tester): 15 | hash_tester(hasher_type=pyhash.t1ha2_atonce128, 16 | bytes_hash=111289500776795915835395169778666467727, 17 | seed_hash=86921791256626059574547663004160252269, 18 | unicode_hash=265347458704473149675948059533744938455) 19 | 20 | 21 | def test_t1ha1_le(hash_tester): 22 | hash_tester(hasher_type=pyhash.t1ha1_le, 23 | bytes_hash=10616215634819799576, 24 | seed_hash=6056749954736269874, 25 | unicode_hash=18194209408316694427) 26 | 27 | 28 | def test_t1ha1_be(hash_tester): 29 | hash_tester(hasher_type=pyhash.t1ha1_be, 30 | bytes_hash=7811195108528602730, 31 | seed_hash=16139937605191117723, 32 | unicode_hash=4258761466277697735) 33 | 34 | 35 | def test_t1ha0(hash_tester, cpu): 36 | hash_tester(hasher_type=pyhash.t1ha0, 37 | bytes_hash=11576265462006865275 if cpu.aes else 10616215634819799576, 38 | seed_hash=9383269742356701786 if cpu.aes else 6056749954736269874, 39 | unicode_hash=10647421798084574537 if cpu.aes else 18194209408316694427) 40 | 41 | 42 | @pytest.mark.benchmark(group='hash64', disable_gc=True) 43 | def test_t1ha2_perf(benchmark, hash_bencher): 44 | hash_bencher(benchmark, pyhash.t1ha2, 17171225769172857249) 45 | 46 | 47 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 48 | @pytest.mark.benchmark(group='hash128', disable_gc=True) 49 | def test_t1ha2_128_perf(benchmark, hash_bencher): 50 | hash_bencher(benchmark, pyhash.t1ha2_128, 51 | 263168739977411690410017013291704716368) 52 | 53 | 54 | @pytest.mark.benchmark(group='hash64', disable_gc=True) 55 | def test_t1ha1_perf(benchmark, hash_bencher): 56 | hash_bencher(benchmark, pyhash.t1ha1, 6501324028002495964) 57 | 58 | 59 | @pytest.mark.benchmark(group='hash64', disable_gc=True) 60 | def test_t1ha0_perf(benchmark, hash_bencher): 61 | hash_bencher(benchmark, pyhash.t1ha0, 62 | [6501324028002495964, 6970451072221114646, 13811823941710697992]) 63 | -------------------------------------------------------------------------------- /tests/test_farmhash.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import pyhash 4 | 5 | 6 | def test_farm_32(test_data): 7 | data, udata = test_data 8 | hasher = pyhash.farm_32() 9 | 10 | h1 = hasher(data) 11 | h2 = hasher(data, seed=123) 12 | h3 = hasher(udata) 13 | 14 | assert h1 != 0 15 | assert h2 != 0 16 | assert h3 != 0 17 | assert h1 != h2 18 | assert h1 != h3 19 | assert h2 != h3 20 | 21 | 22 | def test_farm_64(hash_tester): 23 | hash_tester(hasher_type=pyhash.farm_64, 24 | bytes_hash=8581389452482819506, 25 | seed_hash=10025340881295800991, 26 | unicode_hash=7274603073818924232) 27 | 28 | 29 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 30 | def test_farm_128(hash_tester): 31 | hash_tester(hasher_type=pyhash.farm_128, 32 | bytes_hash=334882099032867325754781607143811124132, 33 | seed_hash=49442029837562385903494085441886302499, 34 | unicode_hash=251662992469041432568516527017706898625) 35 | 36 | 37 | def test_farm_fingerprint_32(fingerprint_tester): 38 | fingerprint_tester(fingerprinter_type=pyhash.farm_fingerprint_32, 39 | bytes_fingerprint=1633095781, 40 | unicode_fingerprint=3574089775) 41 | 42 | 43 | def test_farm_fingerprint_64(fingerprint_tester): 44 | fingerprint_tester(fingerprinter_type=pyhash.farm_fingerprint_64, 45 | bytes_fingerprint=8581389452482819506, 46 | unicode_fingerprint=7274603073818924232) 47 | 48 | 49 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 50 | def test_farm_fingerprint_128(fingerprint_tester): 51 | fingerprint_tester(fingerprinter_type=pyhash.farm_fingerprint_128, 52 | bytes_fingerprint=334882099032867325754781607143811124132, 53 | unicode_fingerprint=251662992469041432568516527017706898625) 54 | 55 | 56 | @pytest.mark.benchmark(group='hash32', disable_gc=True) 57 | def test_farm_hash32_perf(benchmark, hash_bencher): 58 | hash_bencher(benchmark, pyhash.farm_32, [3712697123, 3977123615]) 59 | 60 | 61 | @pytest.mark.benchmark(group='hash64', disable_gc=True) 62 | def test_farm_hash64_perf(benchmark, hash_bencher): 63 | hash_bencher(benchmark, pyhash.farm_64, 5291657088564336415) 64 | 65 | 66 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 67 | @pytest.mark.benchmark(group='hash128', disable_gc=True) 68 | def test_farm_hash128_perf(benchmark, hash_bencher): 69 | hash_bencher(benchmark, pyhash.farm_128, 70 | 2614362402971166945389138950146702896) 71 | -------------------------------------------------------------------------------- /.github/workflows/codeql-analysis.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ master ] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: [ master ] 20 | schedule: 21 | - cron: '26 16 * * 1' 22 | 23 | jobs: 24 | analyze: 25 | name: Analyze 26 | runs-on: ubuntu-latest 27 | permissions: 28 | actions: read 29 | contents: read 30 | security-events: write 31 | 32 | strategy: 33 | fail-fast: false 34 | matrix: 35 | language: [ 'cpp', 'python' ] 36 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] 37 | # Learn more about CodeQL language support at https://git.io/codeql-language-support 38 | 39 | steps: 40 | - name: Checkout repository 41 | uses: actions/checkout@v2 42 | with: 43 | submodules: true 44 | 45 | # Initializes the CodeQL tools for scanning. 46 | - name: Initialize CodeQL 47 | uses: github/codeql-action/init@v2 48 | with: 49 | languages: ${{ matrix.language }} 50 | # If you wish to specify custom queries, you can do so here or in a config file. 51 | # By default, queries listed here will override any specified in a config file. 52 | # Prefix the list here with "+" to use these queries and those in the config file. 53 | # queries: ./path/to/local/query, your-org/your-repo/queries@main 54 | 55 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 56 | # If this step fails, then you should remove it and run the build manually (see below) 57 | #- name: Autobuild 58 | # uses: github/codeql-action/autobuild@v2 59 | 60 | # ℹ️ Command-line programs to run using the OS shell. 61 | # 📚 https://git.io/JvXDl 62 | 63 | # ✏️ If the Autobuild fails above, remove it and uncomment the following three lines 64 | # and modify them (or add more) to build your code if your project 65 | # uses a compiled language 66 | 67 | - run: | 68 | python3.8 -m pip install cpuid 69 | python3.8 setup.py build_clib build_ext --inplace 70 | 71 | - name: Perform CodeQL Analysis 72 | uses: github/codeql-action/analyze@v2 73 | -------------------------------------------------------------------------------- /tests/test_cityhash.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import pyhash 4 | 5 | 6 | def test_city_32(hash_tester): 7 | hash_tester(hasher_type=pyhash.city_32, 8 | bytes_hash=1633095781, 9 | seed_hash=3687200064, 10 | unicode_hash=3574089775) 11 | 12 | 13 | def test_city_64(hash_tester): 14 | hash_tester(hasher_type=pyhash.city_64, 15 | bytes_hash=17703940110308125106, 16 | seed_hash=8806864191580960558, 17 | unicode_hash=7557950076747784205) 18 | 19 | 20 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 21 | def test_city_128(hash_tester): 22 | hash_tester(hasher_type=pyhash.city_128, 23 | bytes_hash=195179989828428219998331619914059544201, 24 | seed_hash=206755929755292977387372217469167977636, 25 | unicode_hash=211596129097514838244042408160146499227) 26 | 27 | 28 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 29 | def test_city_crc128(hash_tester): 30 | hash_tester(hasher_type=pyhash.city_crc_128, 31 | bytes_hash=195179989828428219998331619914059544201, 32 | seed_hash=206755929755292977387372217469167977636, 33 | unicode_hash=211596129097514838244042408160146499227) 34 | 35 | 36 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 37 | def test_city_crc256(fingerprint_tester): 38 | fingerprint_tester(fingerprinter_type=pyhash.city_fingerprint_256, 39 | bytes_fingerprint=43374127706338803100025155483422426900760284308948611519881759972455588549698, 40 | unicode_fingerprint=106103693879923228777324437129892107726572355760220840777228701216663718411687) 41 | 42 | 43 | @pytest.mark.benchmark(group='hash32', disable_gc=True) 44 | def test_city_hash32_perf(benchmark, hash_bencher): 45 | hash_bencher(benchmark, pyhash.city_32, 2824210825) 46 | 47 | 48 | @pytest.mark.benchmark(group='hash64', disable_gc=True) 49 | def test_city_hash64_perf(benchmark, hash_bencher): 50 | hash_bencher(benchmark, pyhash.city_64, 894299094737143437) 51 | 52 | 53 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 54 | @pytest.mark.benchmark(group='hash128', disable_gc=True) 55 | def test_city_hash128_perf(benchmark, hash_bencher): 56 | hash_bencher(benchmark, pyhash.city_128, 57 | 254849646208103091500548480943427727100) 58 | 59 | 60 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 61 | @pytest.mark.benchmark(group='hash128', disable_gc=True) 62 | def test_city_hash_crc128_perf(benchmark, hash_bencher): 63 | hash_bencher(benchmark, pyhash.city_crc_128, 64 | 254849646208103091500548480943427727100) 65 | -------------------------------------------------------------------------------- /src/SpookyHash.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Hash.h" 4 | 5 | #include "smhasher/Spooky.h" 6 | 7 | /** 8 | 9 | SpookyHash: a 128-bit noncryptographic hash 10 | 11 | http://burtleburtle.net/bob/hash/spooky.html 12 | 13 | **/ 14 | 15 | template 16 | class spooky_hash_v1_t : public Hasher, T> 17 | { 18 | public: 19 | typedef Hasher, T> __hasher_t; 20 | typedef typename __hasher_t::hash_value_t hash_value_t; 21 | typedef typename __hasher_t::seed_value_t seed_value_t; 22 | 23 | spooky_hash_v1_t(seed_value_t seed = 0) : __hasher_t(seed) {} 24 | 25 | const hash_value_t operator()(void *buf, size_t len, seed_value_t seed) const; 26 | }; 27 | 28 | typedef spooky_hash_v1_t spooky_hash_v1_32_t; 29 | typedef spooky_hash_v1_t spooky_hash_v1_64_t; 30 | 31 | template <> 32 | const spooky_hash_v1_32_t::hash_value_t spooky_hash_v1_32_t::operator()(void *buf, size_t len, spooky_hash_v1_32_t::seed_value_t seed) const 33 | { 34 | return SpookyHashV1::Hash32(buf, len, seed); 35 | } 36 | 37 | template <> 38 | const spooky_hash_v1_64_t::hash_value_t spooky_hash_v1_64_t::operator()(void *buf, size_t len, spooky_hash_v1_64_t::seed_value_t seed) const 39 | { 40 | return SpookyHashV1::Hash64(buf, len, seed); 41 | } 42 | 43 | #ifdef SUPPORT_INT128 44 | 45 | typedef spooky_hash_v1_t spooky_hash_v1_128_t; 46 | 47 | template <> 48 | const spooky_hash_v1_128_t::hash_value_t spooky_hash_v1_128_t::operator()(void *buf, size_t len, spooky_hash_v1_128_t::seed_value_t seed) const 49 | { 50 | uint64_t lo = U128_LO(seed), hi = U128_HI(seed); 51 | 52 | SpookyHashV1::Hash128(buf, len, &lo, &hi); 53 | 54 | return U128_NEW(lo, hi); 55 | } 56 | 57 | #endif 58 | 59 | template 60 | class spooky_hash_v2_t : public Hasher, T> 61 | { 62 | public: 63 | typedef Hasher, T> __hasher_t; 64 | typedef typename __hasher_t::hash_value_t hash_value_t; 65 | typedef typename __hasher_t::seed_value_t seed_value_t; 66 | 67 | spooky_hash_v2_t(seed_value_t seed = 0) : __hasher_t(seed) {} 68 | 69 | const hash_value_t operator()(void *buf, size_t len, seed_value_t seed) const; 70 | }; 71 | 72 | typedef spooky_hash_v2_t spooky_hash_v2_32_t; 73 | typedef spooky_hash_v2_t spooky_hash_v2_64_t; 74 | 75 | template <> 76 | const spooky_hash_v2_32_t::hash_value_t spooky_hash_v2_32_t::operator()(void *buf, size_t len, spooky_hash_v2_32_t::seed_value_t seed) const 77 | { 78 | return SpookyHashV1::Hash32(buf, len, seed); 79 | } 80 | 81 | template <> 82 | const spooky_hash_v2_64_t::hash_value_t spooky_hash_v2_64_t::operator()(void *buf, size_t len, spooky_hash_v2_64_t::seed_value_t seed) const 83 | { 84 | return SpookyHashV1::Hash64(buf, len, seed); 85 | } 86 | 87 | #ifdef SUPPORT_INT128 88 | 89 | typedef spooky_hash_v2_t spooky_hash_v2_128_t; 90 | 91 | template <> 92 | const spooky_hash_v2_128_t::hash_value_t spooky_hash_v2_128_t::operator()(void *buf, size_t len, spooky_hash_v2_128_t::seed_value_t seed) const 93 | { 94 | uint64_t lo = U128_LO(seed), hi = U128_HI(seed); 95 | 96 | SpookyHashV1::Hash128(buf, len, &lo, &hi); 97 | 98 | return U128_NEW(lo, hi); 99 | } 100 | 101 | #endif 102 | -------------------------------------------------------------------------------- /tests/test_spooky.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import pyhash 4 | 5 | 6 | def test_spooky_v1_32(hash_tester): 7 | hash_tester(hasher_type=pyhash.spooky_v1_32, 8 | bytes_hash=1882037601, 9 | seed_hash=1324274298, 10 | unicode_hash=2977967976) 11 | 12 | 13 | def test_spooky_v1_64(hash_tester): 14 | hash_tester(hasher_type=pyhash.spooky_v1_64, 15 | bytes_hash=10130480990056717665, 16 | seed_hash=1598355329892273278, 17 | unicode_hash=4093159241144086376) 18 | 19 | 20 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 21 | def test_spooky_v1_128(hash_tester): 22 | hash_tester(hasher_type=pyhash.spooky_v1_128, 23 | bytes_hash=241061513486538422840128476001680072033, 24 | seed_hash=315901747311404831226315334184550174199, 25 | unicode_hash=207554373952009549684886824908954283880) 26 | 27 | 28 | def test_spooky_v2_32(hash_tester): 29 | hash_tester(hasher_type=pyhash.spooky_v2_32, 30 | bytes_hash=1882037601, 31 | seed_hash=1324274298, 32 | unicode_hash=2977967976) 33 | 34 | 35 | def test_spooky_v2_64(hash_tester): 36 | hash_tester(hasher_type=pyhash.spooky_v2_64, 37 | bytes_hash=10130480990056717665, 38 | seed_hash=1598355329892273278, 39 | unicode_hash=4093159241144086376) 40 | 41 | 42 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 43 | def test_spooky_v2_128(hash_tester): 44 | hash_tester(hasher_type=pyhash.spooky_v2_128, 45 | bytes_hash=241061513486538422840128476001680072033, 46 | seed_hash=315901747311404831226315334184550174199, 47 | unicode_hash=207554373952009549684886824908954283880) 48 | 49 | 50 | @pytest.mark.benchmark(group='hash32', disable_gc=True) 51 | def test_spooky_v1_hash32_perf(benchmark, hash_bencher): 52 | hash_bencher(benchmark, pyhash.spooky_v1_32, 2489700128) 53 | 54 | 55 | @pytest.mark.benchmark(group='hash64', disable_gc=True) 56 | def test_spooky_v1_hash64_perf(benchmark, hash_bencher): 57 | hash_bencher(benchmark, pyhash.spooky_v1_64, 8714752859576848160) 58 | 59 | 60 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 61 | @pytest.mark.benchmark(group='hash128', disable_gc=True) 62 | def test_spooky_v1_hash128_perf(benchmark, hash_bencher): 63 | hash_bencher(benchmark, pyhash.spooky_v1_128, 64 | 69975394272542483818884528997491134240) 65 | 66 | 67 | @pytest.mark.benchmark(group='hash32', disable_gc=True) 68 | def test_spooky_v2_hash32_perf(benchmark, hash_bencher): 69 | hash_bencher(benchmark, pyhash.spooky_v2_32, 2489700128) 70 | 71 | 72 | @pytest.mark.benchmark(group='hash64', disable_gc=True) 73 | def test_spooky_v2_hash64_perf(benchmark, hash_bencher): 74 | hash_bencher(benchmark, pyhash.spooky_v2_64, 8714752859576848160) 75 | 76 | 77 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 78 | @pytest.mark.benchmark(group='hash128', disable_gc=True) 79 | def test_spooky_v2_hash128_perf(benchmark, hash_bencher): 80 | hash_bencher(benchmark, pyhash.spooky_v2_128, 81 | 69975394272542483818884528997491134240) 82 | -------------------------------------------------------------------------------- /src/FarmHash.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Hash.h" 4 | 5 | #include "smhasher/farmhash-c.h" 6 | 7 | /** 8 | 9 | FarmHash, a family of hash functions. 10 | 11 | https://github.com/google/farmhash 12 | 13 | **/ 14 | 15 | template 16 | class farm_hash_t : public Hasher, T> 17 | { 18 | public: 19 | typedef Hasher, T> __hasher_t; 20 | typedef typename __hasher_t::hash_value_t hash_value_t; 21 | typedef typename __hasher_t::seed_value_t seed_value_t; 22 | 23 | farm_hash_t(seed_value_t seed = 0) : __hasher_t(seed) {} 24 | 25 | const hash_value_t operator()(void *buf, size_t len, seed_value_t seed) const; 26 | }; 27 | 28 | typedef farm_hash_t farm_hash_32_t; 29 | typedef farm_hash_t farm_hash_64_t; 30 | #ifdef SUPPORT_INT128 31 | typedef farm_hash_t farm_hash_128_t; 32 | #endif 33 | 34 | template <> 35 | const farm_hash_32_t::hash_value_t farm_hash_32_t::operator()(void *buf, size_t len, farm_hash_32_t::seed_value_t seed) const 36 | { 37 | if (seed) 38 | { 39 | return farmhash32_with_seed((const char *)buf, len, seed); 40 | } 41 | else 42 | { 43 | return farmhash32((const char *)buf, len); 44 | } 45 | } 46 | 47 | template <> 48 | const farm_hash_64_t::hash_value_t farm_hash_64_t::operator()(void *buf, size_t len, farm_hash_64_t::seed_value_t seed) const 49 | { 50 | if (seed) 51 | { 52 | return farmhash64_with_seed((const char *)buf, len, seed); 53 | } 54 | else 55 | { 56 | return farmhash64((const char *)buf, len); 57 | } 58 | } 59 | 60 | #ifdef SUPPORT_INT128 61 | template <> 62 | const farm_hash_128_t::hash_value_t farm_hash_128_t::operator()(void *buf, size_t len, farm_hash_128_t::seed_value_t seed) const 63 | { 64 | uint128_c_t hash; 65 | 66 | if (seed) 67 | { 68 | hash = farmhash128_with_seed((const char *)buf, len, make_uint128_c_t(U128_LO(seed), U128_HI(seed))); 69 | } 70 | else 71 | { 72 | hash = farmhash128((const char *)buf, len); 73 | } 74 | 75 | return U128_NEW(uint128_c_t_low64(hash), uint128_c_t_high64(hash)); 76 | } 77 | #endif 78 | 79 | template 80 | class farm_fingerprint_t : public Fingerprinter, T> 81 | { 82 | public: 83 | typedef Fingerprinter, T> __fingerprinter_t; 84 | typedef typename __fingerprinter_t::fingerprint_t fingerprint_t; 85 | 86 | farm_fingerprint_t() = default; 87 | 88 | const fingerprint_t operator()(void *buf, size_t len) const; 89 | }; 90 | 91 | typedef farm_fingerprint_t farm_fingerprint_32_t; 92 | typedef farm_fingerprint_t farm_fingerprint_64_t; 93 | #ifdef SUPPORT_INT128 94 | typedef farm_fingerprint_t farm_fingerprint_128_t; 95 | #endif 96 | 97 | template <> 98 | const farm_fingerprint_32_t::fingerprint_t farm_fingerprint_32_t::operator()(void *buf, size_t len) const 99 | { 100 | return farmhash_fingerprint32((const char *)buf, len); 101 | } 102 | 103 | template <> 104 | const farm_fingerprint_64_t::fingerprint_t farm_fingerprint_64_t::operator()(void *buf, size_t len) const 105 | { 106 | return farmhash_fingerprint64((const char *)buf, len); 107 | } 108 | 109 | #ifdef SUPPORT_INT128 110 | template <> 111 | const farm_fingerprint_128_t::fingerprint_t farm_fingerprint_128_t::operator()(void *buf, size_t len) const 112 | { 113 | uint128_c_t fingerprint = farmhash_fingerprint128((const char *)buf, len); 114 | 115 | return U128_NEW(uint128_c_t_low64(fingerprint), uint128_c_t_high64(fingerprint)); 116 | } 117 | #endif 118 | -------------------------------------------------------------------------------- /tests/test_fingerprint.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | import sys 4 | import platform 5 | 6 | import pytest 7 | 8 | import pyhash 9 | 10 | 11 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 12 | def test_string(city_fingerprint_256): 13 | fp = city_fingerprint_256() 14 | 15 | assert fp 16 | assert not hasattr(fp, 'seed') 17 | 18 | assert fp( 19 | 'hello') == 85269287788015250159504678996909715671377074579298442842859846348216760383456 20 | assert fp('hello', ' ', 'world') == [ 21 | 85269287788015250159504678996909715671377074579298442842859846348216760383456, 22 | 96002752956723026543986242229272779665913536958721069786304092822660732998180, 23 | 48731312164395256331902723544149022249026467417384836741664854409465211001225 24 | ] 25 | 26 | assert fp( 27 | b'hello') == 32224966601437776796805147064203168097183942002141386838543322678917249904243 28 | 29 | assert fp( 30 | '') == 105392963878170340009271248359803826743454413279965606456743994508350154550720 31 | assert fp( 32 | u'') == 105392963878170340009271248359803826743454413279965606456743994508350154550720 33 | assert fp( 34 | '') == 105392963878170340009271248359803826743454413279965606456743994508350154550720 35 | 36 | 37 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 38 | def test_list(city_fingerprint_256): 39 | fp = city_fingerprint_256() 40 | 41 | with pytest.raises(TypeError, match="unsupported argument type"): 42 | assert fp(list(b'hello')) == 2578220239953316063 43 | 44 | 45 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 46 | def test_array(city_fingerprint_256): 47 | from array import array 48 | 49 | fp = city_fingerprint_256() 50 | 51 | assert fp(array('B', b'hello') 52 | ) == 32224966601437776796805147064203168097183942002141386838543322678917249904243 53 | 54 | 55 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 56 | def test_buffer(city_fingerprint_256): 57 | if sys.version_info.major < 3: 58 | fp = city_fingerprint_256() 59 | 60 | assert fp(buffer( 61 | b'hello')) == 32224966601437776796805147064203168097183942002141386838543322678917249904243 62 | 63 | 64 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 65 | def test_bufferview(city_fingerprint_256): 66 | fp = city_fingerprint_256() 67 | 68 | assert fp(memoryview( 69 | b'hello')) == 32224966601437776796805147064203168097183942002141386838543322678917249904243 70 | 71 | 72 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 73 | def test_bytearray(city_fingerprint_256): 74 | fp = city_fingerprint_256() 75 | 76 | assert fp(bytearray( 77 | b'hello')) == 32224966601437776796805147064203168097183942002141386838543322678917249904243 78 | 79 | 80 | def test_error(farm_fingerprint_32, farm_fingerprint_64): 81 | if sys.version_info.major < 3: 82 | with pytest.raises(TypeError): 83 | farm_fingerprint_32.__call__() 84 | 85 | with pytest.raises(TypeError): 86 | farm_fingerprint_32.__call__(None) 87 | 88 | with pytest.raises(TypeError): 89 | farm_fingerprint_32.__call__(farm_fingerprint_64()) 90 | else: 91 | with pytest.raises(ValueError): 92 | farm_fingerprint_32.__call__() 93 | 94 | with pytest.raises(TypeError): 95 | farm_fingerprint_32.__call__(None) 96 | 97 | with pytest.raises(RuntimeError): 98 | farm_fingerprint_32.__call__(farm_fingerprint_64()) 99 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Continuous integration 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | - "releases/**" 8 | tags: 9 | - "*" 10 | paths-ignore: 11 | - "**.md" 12 | pull_request: 13 | branches: 14 | - master 15 | page_build: 16 | release: 17 | types: 18 | - created 19 | 20 | defaults: 21 | run: 22 | shell: bash 23 | 24 | jobs: 25 | test: 26 | strategy: 27 | matrix: 28 | os: [ubuntu-latest] 29 | python-version: 30 | - "3.7" 31 | - "3.8" 32 | - "3.9" 33 | - "3.10" 34 | - "pypy-3.7" 35 | - "pypy-3.8" 36 | experimental: [false] 37 | include: 38 | - os: macos-11 39 | python-version: 3.8 40 | experimental: true 41 | - os: windows-latest 42 | python-version: 3.x 43 | experimental: true 44 | runs-on: ${{ matrix.os }} 45 | continue-on-error: ${{ matrix.experimental }} 46 | name: Python ${{ matrix.python-version }} @ ${{ matrix.os }} 47 | steps: 48 | - uses: actions/checkout@v2 49 | with: 50 | submodules: true 51 | 52 | - name: Setup python 53 | uses: actions/setup-python@v2 54 | with: 55 | python-version: ${{ matrix.python-version }} 56 | architecture: x64 57 | 58 | - name: Display Python version 59 | run: python -c "import sys; print(sys.version)" 60 | 61 | - name: Install requirements 62 | run: | 63 | pip install -U pip setuptools wheel 64 | pip install cpuid 65 | pip install -r requirements.txt 66 | pip install -r tests/requirements.txt 67 | 68 | - name: Build extensions 69 | run: | 70 | python setup.py build_clib build_ext --inplace 71 | 72 | - name: Run tests 73 | run: | 74 | pytest -v --benchmark-disable --log-cli-level=info -k 'not highway' 75 | 76 | coverage: 77 | name: Coverage report 78 | runs-on: ubuntu-latest 79 | needs: test 80 | steps: 81 | - name: Install dependencies 82 | run: | 83 | sudo apt-get update 84 | sudo apt-get install -yq lcov 85 | 86 | - uses: actions/checkout@v2 87 | with: 88 | submodules: true 89 | 90 | - name: Setup python 91 | uses: actions/setup-python@v2 92 | with: 93 | python-version: 3.x 94 | 95 | - name: Display Python version 96 | run: python -c "import sys; print(sys.version)" 97 | 98 | - name: Install requirements 99 | run: | 100 | pip install -r tests/requirements.txt 101 | 102 | - name: Install pyhash package with coverage 103 | env: 104 | CFLAGS: -coverage 105 | run: | 106 | pip install -v . 107 | 108 | - name: Run tests with coverage 109 | run: | 110 | pytest -k 'not highway' --benchmark-disable --cov=./ --cov-report=xml 111 | 112 | - name: Generate coverage report 113 | run: | 114 | lcov --capture --directory . --output-file coverage.info 115 | lcov --remove coverage.info '/usr/*' "${PWD}/src/fnv/*" "${PWD}/src/lookup3/*" "${PWD}/src/SuperFastHash/*" "${PWD}/src/smhasher/*" "${PWD}/src/pybind11/*" -o coverage_filtered.info 116 | genhtml --legend --title "${{ github.sha }}" --prefix src coverage_filtered.info --output-directory . 117 | 118 | - name: Upload coverage to Codecov 119 | uses: codecov/codecov-action@v2 120 | with: 121 | directory: . 122 | env_vars: OS,PYTHON 123 | flags: unittests 124 | fail_ci_if_error: true 125 | verbose: true 126 | -------------------------------------------------------------------------------- /pyhash/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import unicode_literals 4 | 5 | import sys 6 | import inspect 7 | 8 | import _pyhash 9 | 10 | __is_little_endian__ = sys.byteorder == 'little' 11 | 12 | _MSC_VER = _pyhash.__dict__.get('_MSC_VER') 13 | _GCC_VER = _pyhash.__dict__.get('_GCC_VER') 14 | _CLANG_VER = _pyhash.__dict__.get('_CLANG_VER') 15 | __VERSION__ = _pyhash.__dict__.get('__VERSION__') 16 | 17 | build_with_sse42 = _pyhash.build_with_sse42 18 | build_with_int128 = _pyhash.build_with_int128 19 | 20 | fnv1_32 = _pyhash.fnv1_32 21 | fnv1a_32 = _pyhash.fnv1a_32 22 | fnv1_64 = _pyhash.fnv1_64 23 | fnv1a_64 = _pyhash.fnv1a_64 24 | 25 | murmur1_32 = _pyhash.murmur1_32 26 | murmur1_aligned_32 = _pyhash.murmur1_aligned_32 27 | murmur2_32 = _pyhash.murmur2_32 28 | murmur2a_32 = _pyhash.murmur2a_32 29 | murmur2_aligned_32 = _pyhash.murmur2_aligned_32 30 | murmur2_neutral_32 = _pyhash.murmur2_neutral_32 31 | murmur2_x64_64a = _pyhash.murmur2_x64_64a 32 | murmur2_x86_64b = _pyhash.murmur2_x86_64b 33 | murmur3_32 = _pyhash.murmur3_32 34 | murmur3_x86_128 = _pyhash.__dict__.get('murmur3_x86_128') 35 | murmur3_x64_128 = _pyhash.__dict__.get('murmur3_x64_128') 36 | 37 | lookup3 = _pyhash.lookup3_little if __is_little_endian__ else _pyhash.lookup3_big 38 | lookup3_little = _pyhash.lookup3_little 39 | lookup3_big = _pyhash.lookup3_big 40 | 41 | super_fast_hash = _pyhash.super_fast_hash 42 | 43 | city_32 = _pyhash.city_32 44 | city_64 = _pyhash.city_64 45 | city_128 = _pyhash.__dict__.get('city_128') 46 | city_crc_128 = _pyhash.__dict__.get('city_crc_128') 47 | city_fingerprint_256 = _pyhash.__dict__.get('city_fingerprint_256') 48 | 49 | spooky_v1_32 = _pyhash.spooky_v1_32 50 | spooky_v1_64 = _pyhash.spooky_v1_64 51 | spooky_v1_128 = _pyhash.__dict__.get('spooky_v1_128') 52 | 53 | spooky_v2_32 = _pyhash.spooky_v2_32 54 | spooky_v2_64 = _pyhash.spooky_v2_64 55 | spooky_v2_128 = _pyhash.__dict__.get('spooky_v2_128') 56 | 57 | spooky_32 = spooky_v2_32 58 | spooky_64 = spooky_v2_64 59 | spooky_128 = spooky_v2_128 60 | 61 | farm_32 = _pyhash.__dict__.get('farm_32') 62 | farm_64 = _pyhash.__dict__.get('farm_64') 63 | farm_128 = _pyhash.__dict__.get('farm_128') 64 | 65 | farm_fingerprint_32 = _pyhash.__dict__.get('farm_fingerprint_32') 66 | farm_fingerprint_64 = _pyhash.__dict__.get('farm_fingerprint_64') 67 | farm_fingerprint_128 = _pyhash.__dict__.get('farm_fingerprint_128') 68 | 69 | metro_64 = metro_64_1 = _pyhash.metro_64_1 70 | metro_64_2 = _pyhash.metro_64_2 71 | metro_128 = metro_128_1 = _pyhash.__dict__.get('metro_128_1') 72 | metro_128_2 = _pyhash.__dict__.get('metro_128_2') 73 | metro_crc_64 = metro_crc_64_1 = _pyhash.metro_64_crc_1 74 | metro_crc_64_2 = _pyhash.metro_64_crc_2 75 | metro_crc_128 = metro_crc_128_1 = _pyhash.__dict__.get('metro_128_crc_1') 76 | metro_crc_128_2 = _pyhash.__dict__.get('metro_128_crc_2') 77 | 78 | mum_64 = _pyhash.mum_64 79 | 80 | t1ha2 = t1ha2_64 = t1ha2_atonce = _pyhash.t1ha2_atonce 81 | t1ha2_128 = t1ha2_atonce128 = _pyhash.__dict__.get('t1ha2_atonce128') 82 | t1ha1_le = t1ha1_64le = _pyhash.t1ha1_le 83 | t1ha1_be = t1ha1_64be = _pyhash.t1ha1_be 84 | t1ha1 = t1ha1_64 = t1ha1_le if __is_little_endian__ else t1ha1_be 85 | t1ha0 = t1ha0_64 = _pyhash.t1ha0 86 | 87 | xx_32 = _pyhash.xx_32 88 | xx_64 = _pyhash.xx_64 89 | xxh3_64 = _pyhash.xxh3_64 90 | xxh3_128 = _pyhash.__dict__.get('xxh3_128') 91 | 92 | highway_64 = _pyhash.__dict__.get('highway_64') 93 | highway_128 = _pyhash.__dict__.get('highway_128') 94 | highway_256 = _pyhash.__dict__.get('highway_256') 95 | 96 | wy_32 = _pyhash.wy_32 97 | wy_64 = _pyhash.wy_64 98 | 99 | halftime_64 = _pyhash.halftime_64 100 | halftime_128 = _pyhash.__dict__.get('halftime_128') 101 | halftime_256 = _pyhash.__dict__.get('halftime_256') 102 | halftime_512 = _pyhash.__dict__.get('halftime_512') 103 | 104 | __hasher__ = dict(inspect.getmembers(sys.modules[__name__], inspect.isclass)) 105 | -------------------------------------------------------------------------------- /src/Halftime.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #include "Hash.h" 8 | 9 | #include "halftime/halftime-hash.hpp" 10 | 11 | typedef std::array halftime_seed_t; 12 | 13 | template <> 14 | halftime_seed_t as_seed_value(uint64_t hash) 15 | { 16 | halftime_seed_t seed; 17 | std::generate(seed.begin(), seed.end(), std::mt19937_64(hash)); 18 | return seed; 19 | } 20 | 21 | template <> 22 | uint64_t as_hash_value(halftime_seed_t seed) 23 | { 24 | return seed[0]; 25 | } 26 | 27 | #ifdef SUPPORT_INT128 28 | template <> 29 | halftime_seed_t as_seed_value(uint128_t hash) 30 | { 31 | std::seed_seq seeds{U128_LO(hash), U128_HI(hash)}; 32 | std::mt19937_64 gen(seeds); 33 | 34 | halftime_seed_t seed; 35 | std::generate(seed.begin(), seed.end(), gen); 36 | return seed; 37 | } 38 | 39 | template <> 40 | halftime_seed_t as_seed_value(uint256_t hash) 41 | { 42 | std::seed_seq seeds(hash.begin(), hash.end()); 43 | std::mt19937_64 gen(seeds); 44 | 45 | halftime_seed_t seed; 46 | std::generate(seed.begin(), seed.end(), gen); 47 | return seed; 48 | } 49 | 50 | template <> 51 | halftime_seed_t as_seed_value(uint512_t hash) 52 | { 53 | std::seed_seq seeds(hash.begin(), hash.end()); 54 | std::mt19937_64 gen(seeds); 55 | 56 | halftime_seed_t seed; 57 | std::generate(seed.begin(), seed.end(), gen); 58 | return seed; 59 | } 60 | 61 | template <> 62 | uint128_t as_hash_value(halftime_seed_t seed) 63 | { 64 | return U128_NEW(seed[0], seed[1]); 65 | } 66 | 67 | template <> 68 | uint256_t as_hash_value(halftime_seed_t seed) 69 | { 70 | return {seed[0], seed[1], seed[2], seed[3]}; 71 | } 72 | 73 | template <> 74 | uint512_t as_hash_value(halftime_seed_t seed) 75 | { 76 | return {seed[0], seed[1], seed[2], seed[3], seed[4], seed[5], seed[6], seed[7]}; 77 | } 78 | 79 | #endif 80 | 81 | template 82 | class halftime_hash_t : public Hasher, halftime_seed_t, uint64_t> 83 | { 84 | public: 85 | typedef Hasher, halftime_seed_t, uint64_t> __hasher_t; 86 | typedef typename __hasher_t::hash_value_t hash_value_t; 87 | typedef typename __hasher_t::seed_value_t seed_value_t; 88 | 89 | halftime_hash_t(uint64_t seed = {}) : __hasher_t(as_seed_value(seed)) 90 | { 91 | } 92 | 93 | const hash_value_t operator()(void *buf, size_t len, seed_value_t seed) const; 94 | 95 | static py::class_> Export(const py::module &m, const char *name) 96 | { 97 | return py::class_>(m, name) 98 | .def(py::init(), py::arg("seed") = 0) 99 | .def_readwrite("seed", &halftime_hash_t::_seed) 100 | .def("__call__", &halftime_hash_t::CallWithArgs); 101 | } 102 | }; 103 | 104 | typedef halftime_hash_t halftime_hash_64_t; 105 | 106 | #ifdef SUPPORT_INT128 107 | typedef halftime_hash_t halftime_hash_128_t; 108 | typedef halftime_hash_t halftime_hash_256_t; 109 | typedef halftime_hash_t halftime_hash_512_t; 110 | #endif 111 | 112 | template <> 113 | const halftime_hash_64_t::hash_value_t halftime_hash_64_t::operator()(void *buf, size_t len, halftime_hash_64_t::seed_value_t seed) const 114 | { 115 | return halftime_hash::HalftimeHashStyle64(seed.data(), (const char *)buf, len); 116 | } 117 | 118 | #ifdef SUPPORT_INT128 119 | 120 | template <> 121 | const halftime_hash_128_t::hash_value_t halftime_hash_128_t::operator()(void *buf, size_t len, halftime_hash_128_t::seed_value_t seed) const 122 | { 123 | return halftime_hash::HalftimeHashStyle128(seed.data(), (const char *)buf, len); 124 | } 125 | 126 | template <> 127 | const halftime_hash_256_t::hash_value_t halftime_hash_256_t::operator()(void *buf, size_t len, halftime_hash_256_t::seed_value_t seed) const 128 | { 129 | return halftime_hash::HalftimeHashStyle256(seed.data(), (const char *)buf, len); 130 | } 131 | 132 | template <> 133 | const halftime_hash_512_t::hash_value_t halftime_hash_512_t::operator()(void *buf, size_t len, halftime_hash_512_t::seed_value_t seed) const 134 | { 135 | return halftime_hash::HalftimeHashStyle512(seed.data(), (const char *)buf, len); 136 | } 137 | 138 | #endif 139 | -------------------------------------------------------------------------------- /src/MetroHash.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Hash.h" 4 | 5 | #include "smhasher/metrohash/metrohash.h" 6 | 7 | /** 8 | 9 | MetroHash: Faster, Better Hash Functions 10 | 11 | https://github.com/jandrewrogers/MetroHash 12 | 13 | **/ 14 | 15 | template 16 | class metro_hash_t : public Hasher, uint32_t, T> 17 | { 18 | public: 19 | typedef Hasher, uint32_t, T> __hasher_t; 20 | typedef typename __hasher_t::hash_value_t hash_value_t; 21 | typedef typename __hasher_t::seed_value_t seed_value_t; 22 | 23 | metro_hash_t(seed_value_t seed = 0) : __hasher_t(seed) {} 24 | 25 | const hash_value_t operator()(void *buf, size_t len, seed_value_t seed) const; 26 | }; 27 | 28 | typedef metro_hash_t metro_hash_64_1_t; 29 | typedef metro_hash_t metro_hash_64_2_t; 30 | #ifdef SUPPORT_INT128 31 | typedef metro_hash_t metro_hash_128_1_t; 32 | typedef metro_hash_t metro_hash_128_2_t; 33 | #endif 34 | 35 | template <> 36 | const metro_hash_64_1_t::hash_value_t metro_hash_64_1_t::operator()(void *buf, size_t len, metro_hash_64_1_t::seed_value_t seed) const 37 | { 38 | uint64_t hash; 39 | 40 | metrohash64_1((const uint8_t *)buf, len, seed, (uint8_t *)&hash); 41 | 42 | return hash; 43 | } 44 | 45 | template <> 46 | const metro_hash_64_2_t::hash_value_t metro_hash_64_2_t::operator()(void *buf, size_t len, metro_hash_64_2_t::seed_value_t seed) const 47 | { 48 | uint64_t hash; 49 | 50 | metrohash64_2((const uint8_t *)buf, len, seed, (uint8_t *)&hash); 51 | 52 | return hash; 53 | } 54 | 55 | #ifdef SUPPORT_INT128 56 | 57 | template <> 58 | const metro_hash_128_1_t::hash_value_t metro_hash_128_1_t::operator()(void *buf, size_t len, metro_hash_128_1_t::seed_value_t seed) const 59 | { 60 | uint128_t hash; 61 | 62 | metrohash128_1((const uint8_t *)buf, len, seed, (uint8_t *)&hash); 63 | 64 | return hash; 65 | } 66 | 67 | template <> 68 | const metro_hash_128_2_t::hash_value_t metro_hash_128_2_t::operator()(void *buf, size_t len, metro_hash_128_2_t::seed_value_t seed) const 69 | { 70 | uint128_t hash; 71 | 72 | metrohash128_2((const uint8_t *)buf, len, seed, (uint8_t *)&hash); 73 | 74 | return hash; 75 | } 76 | #endif 77 | 78 | template 79 | class metro_hash_crc_t : public Hasher, uint32_t, T> 80 | { 81 | public: 82 | typedef Hasher, uint32_t, T> __hasher_t; 83 | typedef typename __hasher_t::hash_value_t hash_value_t; 84 | typedef typename __hasher_t::seed_value_t seed_value_t; 85 | 86 | metro_hash_crc_t(seed_value_t seed = 0) : __hasher_t(seed) {} 87 | 88 | const hash_value_t operator()(void *buf, size_t len, seed_value_t seed) const; 89 | }; 90 | 91 | typedef metro_hash_crc_t metro_hash_64_crc_1_t; 92 | typedef metro_hash_crc_t metro_hash_64_crc_2_t; 93 | #ifdef SUPPORT_INT128 94 | typedef metro_hash_crc_t metro_hash_128_crc_1_t; 95 | typedef metro_hash_crc_t metro_hash_128_crc_2_t; 96 | #endif 97 | 98 | template <> 99 | const metro_hash_64_crc_1_t::hash_value_t metro_hash_64_crc_1_t::operator()(void *buf, size_t len, metro_hash_64_crc_1_t::seed_value_t seed) const 100 | { 101 | uint64_t hash; 102 | 103 | metrohash64crc_1((const uint8_t *)buf, len, (uint32_t)seed, (uint8_t *)&hash); 104 | 105 | return hash; 106 | } 107 | 108 | template <> 109 | const metro_hash_64_crc_2_t::hash_value_t metro_hash_64_crc_2_t::operator()(void *buf, size_t len, metro_hash_64_crc_2_t::seed_value_t seed) const 110 | { 111 | uint64_t hash; 112 | 113 | metrohash64crc_2((const uint8_t *)buf, len, (uint32_t)seed, (uint8_t *)&hash); 114 | 115 | return hash; 116 | } 117 | 118 | #ifdef SUPPORT_INT128 119 | 120 | template <> 121 | const metro_hash_128_crc_1_t::hash_value_t metro_hash_128_crc_1_t::operator()(void *buf, size_t len, metro_hash_128_crc_1_t::seed_value_t seed) const 122 | { 123 | uint128_t hash; 124 | 125 | metrohash128crc_1((const uint8_t *)buf, len, seed, (uint8_t *)&hash); 126 | 127 | return hash; 128 | } 129 | 130 | template <> 131 | const metro_hash_128_crc_2_t::hash_value_t metro_hash_128_crc_2_t::operator()(void *buf, size_t len, metro_hash_128_crc_2_t::seed_value_t seed) const 132 | { 133 | uint128_t hash; 134 | 135 | metrohash128crc_2((const uint8_t *)buf, len, seed, (uint8_t *)&hash); 136 | 137 | return hash; 138 | } 139 | 140 | #endif 141 | -------------------------------------------------------------------------------- /src/fnv/hash_32a.c: -------------------------------------------------------------------------------- 1 | /* 2 | * hash_32 - 32 bit Fowler/Noll/Vo FNV-1a hash code 3 | * 4 | * @(#) $Revision: 5.1 $ 5 | * @(#) $Id: hash_32a.c,v 5.1 2009/06/30 09:13:32 chongo Exp $ 6 | * @(#) $Source: /usr/local/src/cmd/fnv/RCS/hash_32a.c,v $ 7 | * 8 | *** 9 | * 10 | * Fowler/Noll/Vo hash 11 | * 12 | * The basis of this hash algorithm was taken from an idea sent 13 | * as reviewer comments to the IEEE POSIX P1003.2 committee by: 14 | * 15 | * Phong Vo (http://www.research.att.com/info/kpv/) 16 | * Glenn Fowler (http://www.research.att.com/~gsf/) 17 | * 18 | * In a subsequent ballot round: 19 | * 20 | * Landon Curt Noll (http://www.isthe.com/chongo/) 21 | * 22 | * improved on their algorithm. Some people tried this hash 23 | * and found that it worked rather well. In an EMail message 24 | * to Landon, they named it the ``Fowler/Noll/Vo'' or FNV hash. 25 | * 26 | * FNV hashes are designed to be fast while maintaining a low 27 | * collision rate. The FNV speed allows one to quickly hash lots 28 | * of data while maintaining a reasonable collision rate. See: 29 | * 30 | * http://www.isthe.com/chongo/tech/comp/fnv/index.html 31 | * 32 | * for more details as well as other forms of the FNV hash. 33 | *** 34 | * 35 | * To use the recommended 32 bit FNV-1a hash, pass FNV1_32A_INIT as the 36 | * Fnv32_t hashval argument to fnv_32a_buf() or fnv_32a_str(). 37 | * 38 | *** 39 | * 40 | * Please do not copyright this code. This code is in the public domain. 41 | * 42 | * LANDON CURT NOLL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 43 | * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO 44 | * EVENT SHALL LANDON CURT NOLL BE LIABLE FOR ANY SPECIAL, INDIRECT OR 45 | * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF 46 | * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR 47 | * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 48 | * PERFORMANCE OF THIS SOFTWARE. 49 | * 50 | * By: 51 | * chongo /\oo/\ 52 | * http://www.isthe.com/chongo/ 53 | * 54 | * Share and Enjoy! :-) 55 | */ 56 | 57 | #include 58 | #include "fnv.h" 59 | 60 | 61 | /* 62 | * 32 bit magic FNV-1a prime 63 | */ 64 | #define FNV_32_PRIME ((Fnv32_t)0x01000193) 65 | 66 | 67 | /* 68 | * fnv_32a_buf - perform a 32 bit Fowler/Noll/Vo FNV-1a hash on a buffer 69 | * 70 | * input: 71 | * buf - start of buffer to hash 72 | * len - length of buffer in octets 73 | * hval - previous hash value or 0 if first call 74 | * 75 | * returns: 76 | * 32 bit hash as a static hash type 77 | * 78 | * NOTE: To use the recommended 32 bit FNV-1a hash, use FNV1_32A_INIT as the 79 | * hval arg on the first call to either fnv_32a_buf() or fnv_32a_str(). 80 | */ 81 | Fnv32_t 82 | fnv_32a_buf(void *buf, size_t len, Fnv32_t hval) 83 | { 84 | unsigned char *bp = (unsigned char *)buf; /* start of buffer */ 85 | unsigned char *be = bp + len; /* beyond end of buffer */ 86 | 87 | /* 88 | * FNV-1a hash each octet in the buffer 89 | */ 90 | while (bp < be) { 91 | 92 | /* xor the bottom with the current octet */ 93 | hval ^= (Fnv32_t)*bp++; 94 | 95 | /* multiply by the 32 bit FNV magic prime mod 2^32 */ 96 | #if defined(NO_FNV_GCC_OPTIMIZATION) 97 | hval *= FNV_32_PRIME; 98 | #else 99 | hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24); 100 | #endif 101 | } 102 | 103 | /* return our new hash value */ 104 | return hval; 105 | } 106 | 107 | 108 | /* 109 | * fnv_32a_str - perform a 32 bit Fowler/Noll/Vo FNV-1a hash on a string 110 | * 111 | * input: 112 | * str - string to hash 113 | * hval - previous hash value or 0 if first call 114 | * 115 | * returns: 116 | * 32 bit hash as a static hash type 117 | * 118 | * NOTE: To use the recommended 32 bit FNV-1a hash, use FNV1_32A_INIT as the 119 | * hval arg on the first call to either fnv_32a_buf() or fnv_32a_str(). 120 | */ 121 | Fnv32_t 122 | fnv_32a_str(char *str, Fnv32_t hval) 123 | { 124 | unsigned char *s = (unsigned char *)str; /* unsigned string */ 125 | 126 | /* 127 | * FNV-1a hash each octet in the buffer 128 | */ 129 | while (*s) { 130 | 131 | /* xor the bottom with the current octet */ 132 | hval ^= (Fnv32_t)*s++; 133 | 134 | /* multiply by the 32 bit FNV magic prime mod 2^32 */ 135 | #if defined(NO_FNV_GCC_OPTIMIZATION) 136 | hval *= FNV_32_PRIME; 137 | #else 138 | hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24); 139 | #endif 140 | } 141 | 142 | /* return our new hash value */ 143 | return hval; 144 | } 145 | -------------------------------------------------------------------------------- /src/MurmurHash.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Hash.h" 4 | 5 | #include "smhasher/MurmurHash1.h" 6 | #include "smhasher/MurmurHash2.h" 7 | #include "smhasher/MurmurHash3.h" 8 | 9 | /** 10 | * http://code.google.com/p/smhasher/ 11 | */ 12 | 13 | enum murmur_hash_t 14 | { 15 | murmur_hash1, 16 | murmur_hash1_aligned, 17 | murmur_hash2, 18 | murmur_hash2a, 19 | murmur_hash2_aligned, 20 | murmur_hash2_neutral, 21 | murmur_hash2_x64_64a, 22 | murmur_hash2_x86_64b, 23 | murmur_hash3_32, 24 | murmur_hash3_x86_128, 25 | murmur_hash3_x64_128 26 | }; 27 | 28 | template 29 | class murmur_t : public Hasher, S, T> 30 | { 31 | public: 32 | typedef Hasher, S, T> __hasher_t; 33 | typedef typename __hasher_t::hash_value_t hash_value_t; 34 | typedef typename __hasher_t::seed_value_t seed_value_t; 35 | 36 | murmur_t(seed_value_t seed = 0) : __hasher_t(seed) {} 37 | 38 | const hash_value_t operator()(void *buf, size_t len, seed_value_t seed) const; 39 | }; 40 | 41 | typedef murmur_t murmur1_32_t; 42 | typedef murmur_t murmur1_aligned_32_t; 43 | typedef murmur_t murmur2_32_t; 44 | typedef murmur_t murmur2a_32_t; 45 | typedef murmur_t murmur2_aligned_32_t; 46 | typedef murmur_t murmur2_neutral_32_t; 47 | typedef murmur_t murmur2_x64_64a_t; 48 | typedef murmur_t murmur2_x86_64b_t; 49 | typedef murmur_t murmur3_32_t; 50 | 51 | #ifdef SUPPORT_INT128 52 | typedef murmur_t murmur3_x86_128_t; 53 | typedef murmur_t murmur3_x64_128_t; 54 | #endif 55 | 56 | template <> 57 | const murmur1_32_t::hash_value_t murmur1_32_t::operator()(void *buf, size_t len, murmur1_32_t::seed_value_t seed) const 58 | { 59 | return MurmurHash1(buf, (int)len, seed); 60 | } 61 | 62 | template <> 63 | const murmur1_aligned_32_t::hash_value_t murmur1_aligned_32_t::operator()(void *buf, size_t len, murmur1_aligned_32_t::seed_value_t seed) const 64 | { 65 | return MurmurHash1Aligned(buf, (int)len, seed); 66 | } 67 | 68 | template <> 69 | const murmur2_32_t::hash_value_t murmur2_32_t::operator()(void *buf, size_t len, murmur2_32_t::seed_value_t seed) const 70 | { 71 | return MurmurHash2(buf, (int)len, seed); 72 | } 73 | 74 | template <> 75 | const murmur2a_32_t::hash_value_t murmur2a_32_t::operator()(void *buf, size_t len, murmur2a_32_t::seed_value_t seed) const 76 | { 77 | return MurmurHash2A(buf, (int)len, seed); 78 | } 79 | 80 | template <> 81 | const murmur2_aligned_32_t::hash_value_t murmur2_aligned_32_t::operator()(void *buf, size_t len, murmur2_aligned_32_t::seed_value_t seed) const 82 | { 83 | return MurmurHashAligned2(buf, (int)len, seed); 84 | } 85 | 86 | template <> 87 | const murmur2_neutral_32_t::hash_value_t murmur2_neutral_32_t::operator()(void *buf, size_t len, murmur2_neutral_32_t::seed_value_t seed) const 88 | { 89 | return MurmurHashNeutral2(buf, (int)len, seed); 90 | } 91 | 92 | template <> 93 | const murmur2_x64_64a_t::hash_value_t murmur2_x64_64a_t::operator()(void *buf, size_t len, murmur2_x64_64a_t::seed_value_t seed) const 94 | { 95 | return MurmurHash64A(buf, (int)len, seed); 96 | } 97 | 98 | template <> 99 | const murmur2_x86_64b_t::hash_value_t murmur2_x86_64b_t::operator()(void *buf, size_t len, murmur2_x86_64b_t::seed_value_t seed) const 100 | { 101 | return MurmurHash64B(buf, (int)len, seed); 102 | } 103 | 104 | template <> 105 | const murmur3_32_t::hash_value_t murmur3_32_t::operator()(void *buf, size_t len, murmur3_32_t::seed_value_t seed) const 106 | { 107 | unsigned int hash = 0; 108 | 109 | MurmurHash3_x86_32(buf, (int)len, seed, &hash); 110 | 111 | return hash; 112 | } 113 | 114 | #ifdef SUPPORT_INT128 115 | 116 | template <> 117 | const murmur3_x86_128_t::hash_value_t murmur3_x86_128_t::operator()(void *buf, size_t len, murmur3_x86_128_t::seed_value_t seed) const 118 | { 119 | uint128_t hash = 0; 120 | 121 | MurmurHash3_x86_128(buf, (int)len, seed, &hash); 122 | 123 | return hash; 124 | } 125 | 126 | template <> 127 | const murmur3_x64_128_t::hash_value_t murmur3_x64_128_t::operator()(void *buf, size_t len, murmur3_x64_128_t::seed_value_t seed) const 128 | { 129 | uint128_t hash = 0; 130 | 131 | MurmurHash3_x64_128(buf, (int)len, seed, &hash); 132 | 133 | return hash; 134 | } 135 | 136 | #endif 137 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | import os 3 | import platform 4 | import sys 5 | import logging 6 | 7 | import pytest 8 | 9 | import pyhash 10 | 11 | 12 | @pytest.fixture 13 | def is_x86(): 14 | return platform.machine().lower() in ['i386', 'i686', 'x86_64', 'amd64'] 15 | 16 | 17 | @pytest.fixture 18 | def is_64bit(): 19 | return sys.maxsize > 2**32 20 | 21 | 22 | @pytest.fixture 23 | def is_winnt(): 24 | return os.name == "nt" 25 | 26 | 27 | @pytest.fixture 28 | def is_msvc(): 29 | return getattr(pyhash, "_MSC_VER", None) is not None 30 | 31 | 32 | CpuFeatures = namedtuple("CpuFeatures", 33 | ['name', 'vendor', 'arch', 'sse41', 'sse42', 'aes', 'avx', 'avx2']) 34 | 35 | 36 | @pytest.fixture 37 | def cpu(is_x86, is_64bit): 38 | cpu = CpuFeatures(name=platform.processor(), vendor=platform.platform(), arch=platform.machine(), 39 | sse41=False, sse42=False, aes=False, avx=False, avx2=False) 40 | 41 | if is_x86: 42 | from cpuid import _is_set, cpu_vendor, cpu_name, cpu_microarchitecture 43 | 44 | cpu = CpuFeatures( 45 | name=cpu_name().rstrip('\x00'), 46 | vendor=cpu_vendor(), 47 | arch=cpu_microarchitecture()[0], 48 | sse41=_is_set(1, 2, 19) == 'Yes', 49 | sse42=_is_set(1, 2, 20) == 'Yes', 50 | aes=_is_set(1, 2, 25) == 'Yes', 51 | avx=_is_set(1, 2, 28) == 'Yes', 52 | avx2=_is_set(7, 1, 5) == 'Yes', 53 | ) 54 | 55 | logging.getLogger().info("CPU %s", cpu) 56 | 57 | return cpu 58 | 59 | 60 | @pytest.fixture(scope="module") 61 | def test_data(): 62 | return b'test', u'test' 63 | 64 | 65 | for name in pyhash.__hasher__: 66 | def generate_fixture(name, hasher): 67 | @pytest.fixture(scope='module', name=name) 68 | def wrap(): 69 | return hasher 70 | return wrap 71 | 72 | globals()[name] = generate_fixture(name, getattr(pyhash, name)) 73 | 74 | 75 | @pytest.fixture(scope="module") 76 | def hash_tester(test_data): 77 | def do_test(hasher_type, bytes_hash, seed_hash, unicode_hash): 78 | assert hasher_type 79 | 80 | assert hasattr(hasher_type, 'seed') 81 | 82 | data, udata = test_data 83 | hasher = hasher_type() 84 | 85 | assert hasher 86 | assert hasattr(hasher, 'seed') 87 | assert hasher(data) in as_list( 88 | bytes_hash), "bytes hash should be %d" % hasher(data) 89 | assert hasher(data, seed=bytes_hash) in as_list( 90 | seed_hash), "bytes hash with seed should be %d" % hasher(data, seed=bytes_hash) 91 | assert hasher(data, data) in as_list( 92 | seed_hash), "hash(data, data) should be %d" % hasher(data, data) 93 | assert hasher(data, seed=hasher(data)) in as_list( 94 | seed_hash), "hasher(data, seed=hasher(data)) should be %d" % hasher(data, seed=hasher(data)) 95 | assert hasher(udata) in as_list( 96 | unicode_hash), "unicode hash should be %d" % hasher(udata) 97 | 98 | return do_test 99 | 100 | 101 | def as_list(v): 102 | if isinstance(v, list): 103 | return v 104 | 105 | return [v] 106 | 107 | 108 | @pytest.fixture(scope="module") 109 | def fingerprint_tester(test_data): 110 | def do_test(fingerprinter_type, bytes_fingerprint, unicode_fingerprint): 111 | assert fingerprinter_type 112 | 113 | assert not hasattr(fingerprinter_type, 'seed') 114 | 115 | data, udata = test_data 116 | fingerprinter = fingerprinter_type() 117 | 118 | assert fingerprinter 119 | assert not hasattr(fingerprinter, 'seed') 120 | assert bytes_fingerprint == fingerprinter( 121 | data), "bytes fingerprint should be %d" % fingerprinter(data) 122 | assert unicode_fingerprint == fingerprinter( 123 | udata), "unicode fingerprint should be %d" % fingerprinter(udata) 124 | 125 | bytes_fingerprints = fingerprinter(data, data) 126 | 127 | assert [bytes_fingerprint, 128 | bytes_fingerprint] == bytes_fingerprints, "bytes fingerprint should be %s" % bytes_fingerprints 129 | 130 | return do_test 131 | 132 | 133 | @pytest.fixture(scope="module") 134 | def hash_bencher(): 135 | def do_bench(benchmark, hasher, hash, size=256): 136 | h = hasher() 137 | data = bytes(bytearray([i % 256 for i in range(size)])) 138 | 139 | @benchmark 140 | def result(): 141 | return h(data) 142 | 143 | if isinstance(hash, list): 144 | assert result in hash 145 | else: 146 | assert result == hash 147 | 148 | return do_bench 149 | -------------------------------------------------------------------------------- /tests/test_metrohash.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import pyhash 4 | 5 | 6 | def test_metro_64_1(hash_tester): 7 | hash_tester(hasher_type=pyhash.metro_64_1, 8 | bytes_hash=7555593383206836236, 9 | seed_hash=9613011798576657330, 10 | unicode_hash=5634638029758084150) 11 | 12 | 13 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 14 | def test_metro_128_1(hash_tester): 15 | hash_tester(hasher_type=pyhash.metro_128_1, 16 | bytes_hash=310240039238111093048322555259813357218, 17 | seed_hash=330324289553816260191102680044286377986, 18 | unicode_hash=160639312567243412360084738183177128736) 19 | 20 | 21 | def test_metro_64_2(hash_tester): 22 | hash_tester(hasher_type=pyhash.metro_64_2, 23 | bytes_hash=13328239478646503906, 24 | seed_hash=16521803336796657060, 25 | unicode_hash=5992985172783395072) 26 | 27 | 28 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 29 | def test_metro_128_2(hash_tester): 30 | hash_tester(hasher_type=pyhash.metro_128_2, 31 | bytes_hash=308979041176504703647272401075625691044, 32 | seed_hash=156408679042779357342816971045969684594, 33 | unicode_hash=169904568621124891123383613748925830588) 34 | 35 | 36 | def test_metro_Crc64_1(hash_tester): 37 | hash_tester(hasher_type=pyhash.metro_crc_64_1, 38 | bytes_hash=6872506084457499713, 39 | seed_hash=14064239385324957326, 40 | unicode_hash=5634638029758084150) 41 | 42 | 43 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 44 | def test_metro_Crc128_1(hash_tester): 45 | hash_tester(hasher_type=pyhash.metro_crc_128_1, 46 | bytes_hash=44856800307026421677415827141042094245, 47 | seed_hash=199990471895323666720887863107514038076, 48 | unicode_hash=53052528140813423722778028047086277728) 49 | 50 | 51 | def test_metro_Crc64_2(hash_tester): 52 | hash_tester(hasher_type=pyhash.metro_crc_64_2, 53 | bytes_hash=9168163846307153532, 54 | seed_hash=11235719994915751828, 55 | unicode_hash=15697829093445668111) 56 | 57 | 58 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 59 | def test_metro_Crc128_2(hash_tester): 60 | hash_tester(hasher_type=pyhash.metro_crc_128_2, 61 | bytes_hash=29039398407115405218669555123781288008, 62 | seed_hash=26197404070933777589488526163359489061, 63 | unicode_hash=136212167639765185451107230087801381416) 64 | 65 | 66 | @pytest.mark.benchmark(group='hash64', disable_gc=True) 67 | def test_metro_hash64_1_perf(benchmark, hash_bencher): 68 | hash_bencher(benchmark, pyhash.metro_64_1, 6897098198286496634) 69 | 70 | 71 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 72 | @pytest.mark.benchmark(group='hash128', disable_gc=True) 73 | def test_metro_hash128_1_perf(benchmark, hash_bencher): 74 | hash_bencher(benchmark, pyhash.metro_128_1, 75 | 284089860902754045805586152203438670446) 76 | 77 | 78 | @pytest.mark.benchmark(group='hash64', disable_gc=True) 79 | def test_metro_hash64_2_perf(benchmark, hash_bencher): 80 | hash_bencher(benchmark, pyhash.metro_64_2, 9928248983045338067) 81 | 82 | 83 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 84 | @pytest.mark.benchmark(group='hash128', disable_gc=True) 85 | def test_metro_hash128_2_perf(benchmark, hash_bencher): 86 | hash_bencher(benchmark, pyhash.metro_128_2, 87 | 298961466275459716490100873977629041349) 88 | 89 | 90 | @pytest.mark.benchmark(group='hash64', disable_gc=True) 91 | def test_metro_hash_crc64_1_perf(benchmark, hash_bencher): 92 | hash_bencher(benchmark, pyhash.metro_crc_64_1, 15625740387403976237) 93 | 94 | 95 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 96 | @pytest.mark.benchmark(group='hash128', disable_gc=True) 97 | def test_metro_hash_crc128_1_perf(benchmark, hash_bencher): 98 | hash_bencher(benchmark, pyhash.metro_crc_128_1, 99 | 221795002586229010982769362009963170208) 100 | 101 | 102 | @pytest.mark.benchmark(group='hash64', disable_gc=True) 103 | def test_metro_hash_crc64_2_perf(benchmark, hash_bencher): 104 | hash_bencher(benchmark, pyhash.metro_crc_64_2, 9313388757605283934) 105 | 106 | 107 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 108 | @pytest.mark.benchmark(group='hash128', disable_gc=True) 109 | def test_metro_hash_crc128_2_perf(benchmark, hash_bencher): 110 | hash_bencher(benchmark, pyhash.metro_crc_128_2, 111 | 319940271611864595969873671463832146628) 112 | -------------------------------------------------------------------------------- /src/Hash.cpp: -------------------------------------------------------------------------------- 1 | #include "Hash.h" 2 | 3 | #include "FNV1.h" 4 | #include "MurmurHash.h" 5 | #include "Lookup3.h" 6 | #include "SuperFastHash.h" 7 | #include "CityHash.h" 8 | #include "SpookyHash.h" 9 | #include "FarmHash.h" 10 | #include "MetroHash.h" 11 | #include "Mum.h" 12 | #include "T1ha.h" 13 | #include "xxHash.h" 14 | #include "Highway.h" 15 | #include "wyHash.h" 16 | #include "Halftime.h" 17 | 18 | PYBIND11_MODULE(_pyhash, m) 19 | { 20 | m.doc() = "Python Non-cryptographic Hash Library"; 21 | 22 | #if defined(__SSE4_2__) && defined(__x86_64__) 23 | m.attr("build_with_sse42") = true; 24 | #else 25 | m.attr("build_with_sse42") = false; 26 | #endif 27 | 28 | #ifdef SUPPORT_INT128 29 | m.attr("build_with_int128") = true; 30 | #else 31 | m.attr("build_with_int128") = false; 32 | #endif 33 | 34 | #ifdef _MSC_VER 35 | m.attr("_MSC_VER") = _MSC_VER; 36 | #endif 37 | 38 | #ifdef __GNUC__ 39 | m.attr("_GCC_VER") = __GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__; 40 | #endif 41 | 42 | #ifdef __clang__ 43 | m.attr("_CLANG_VER") = __clang__ * 10000 + __clang_major__ * 100 + __clang_patchlevel__; 44 | #endif 45 | 46 | #ifdef __VERSION__ 47 | m.attr("__VERSION__") = __VERSION__; 48 | #endif 49 | 50 | fnv1_32_t::Export(m, "fnv1_32"); 51 | fnv1a_32_t::Export(m, "fnv1a_32"); 52 | fnv1_64_t::Export(m, "fnv1_64"); 53 | fnv1a_64_t::Export(m, "fnv1a_64"); 54 | 55 | murmur1_32_t::Export(m, "murmur1_32"); 56 | murmur1_aligned_32_t::Export(m, "murmur1_aligned_32"); 57 | murmur2_32_t::Export(m, "murmur2_32"); 58 | murmur2a_32_t::Export(m, "murmur2a_32"); 59 | murmur2_aligned_32_t::Export(m, "murmur2_aligned_32"); 60 | murmur2_neutral_32_t::Export(m, "murmur2_neutral_32"); 61 | murmur2_x64_64a_t::Export(m, "murmur2_x64_64a"); 62 | murmur2_x86_64b_t::Export(m, "murmur2_x86_64b"); 63 | murmur3_32_t::Export(m, "murmur3_32"); 64 | #ifdef SUPPORT_INT128 65 | murmur3_x86_128_t::Export(m, "murmur3_x86_128"); 66 | murmur3_x64_128_t::Export(m, "murmur3_x64_128"); 67 | #endif 68 | 69 | lookup3_little_t::Export(m, "lookup3_little"); 70 | lookup3_big_t::Export(m, "lookup3_big"); 71 | 72 | super_fast_hash_t::Export(m, "super_fast_hash"); 73 | 74 | city_hash_32_t::Export(m, "city_32"); 75 | city_hash_64_t::Export(m, "city_64"); 76 | #ifdef SUPPORT_INT128 77 | city_hash_128_t::Export(m, "city_128"); 78 | city_hash_crc_128_t::Export(m, "city_crc_128"); 79 | city_fingerprint_256_t::Export(m, "city_fingerprint_256"); 80 | #endif 81 | 82 | spooky_hash_v1_32_t::Export(m, "spooky_v1_32"); 83 | spooky_hash_v1_64_t::Export(m, "spooky_v1_64"); 84 | #ifdef SUPPORT_INT128 85 | spooky_hash_v1_128_t::Export(m, "spooky_v1_128"); 86 | #endif 87 | 88 | spooky_hash_v2_32_t::Export(m, "spooky_v2_32"); 89 | spooky_hash_v2_64_t::Export(m, "spooky_v2_64"); 90 | #ifdef SUPPORT_INT128 91 | spooky_hash_v2_128_t::Export(m, "spooky_v2_128"); 92 | #endif 93 | 94 | farm_hash_32_t::Export(m, "farm_32"); 95 | farm_hash_64_t::Export(m, "farm_64"); 96 | #ifdef SUPPORT_INT128 97 | farm_hash_128_t::Export(m, "farm_128"); 98 | #endif 99 | 100 | farm_fingerprint_32_t::Export(m, "farm_fingerprint_32"); 101 | farm_fingerprint_64_t::Export(m, "farm_fingerprint_64"); 102 | #ifdef SUPPORT_INT128 103 | farm_fingerprint_128_t::Export(m, "farm_fingerprint_128"); 104 | #endif 105 | 106 | metro_hash_64_1_t::Export(m, "metro_64_1"); 107 | metro_hash_64_2_t::Export(m, "metro_64_2"); 108 | #ifdef SUPPORT_INT128 109 | metro_hash_128_1_t::Export(m, "metro_128_1"); 110 | metro_hash_128_2_t::Export(m, "metro_128_2"); 111 | #endif 112 | 113 | metro_hash_64_crc_1_t::Export(m, "metro_64_crc_1"); 114 | metro_hash_64_crc_2_t::Export(m, "metro_64_crc_2"); 115 | #ifdef SUPPORT_INT128 116 | metro_hash_128_crc_1_t::Export(m, "metro_128_crc_1"); 117 | metro_hash_128_crc_2_t::Export(m, "metro_128_crc_2"); 118 | #endif 119 | 120 | mum_hash_64_t::Export(m, "mum_64"); 121 | 122 | t1ha2_atonce_t::Export(m, "t1ha2_atonce"); 123 | #ifdef SUPPORT_INT128 124 | t1ha2_atonce128_t::Export(m, "t1ha2_atonce128"); 125 | #endif 126 | t1ha1_le_t::Export(m, "t1ha1_le"); 127 | t1ha1_be_t::Export(m, "t1ha1_be"); 128 | t1ha0_t::Export(m, "t1ha0"); 129 | 130 | xx_hash_32_t::Export(m, "xx_32"); 131 | xx_hash_64_t::Export(m, "xx_64"); 132 | 133 | xxh3_hash_64_t::Export(m, "xxh3_64"); 134 | #ifdef SUPPORT_INT128 135 | xxh3_hash_128_t::Export(m, "xxh3_128"); 136 | #endif 137 | 138 | #ifdef SUPPORT_INT128 139 | highway_hash_64_t::Export(m, "highway_64"); 140 | highway_hash_128_t::Export(m, "highway_128"); 141 | highway_hash_256_t::Export(m, "highway_256"); 142 | #endif 143 | 144 | wy_hash_32_t::Export(m, "wy_32"); 145 | wy_hash_64_t::Export(m, "wy_64"); 146 | 147 | halftime_hash_64_t::Export(m, "halftime_64"); 148 | 149 | #ifdef SUPPORT_INT128 150 | halftime_hash_128_t::Export(m, "halftime_128"); 151 | halftime_hash_256_t::Export(m, "halftime_256"); 152 | halftime_hash_512_t::Export(m, "halftime_512"); 153 | #endif 154 | } 155 | -------------------------------------------------------------------------------- /src/fnv/hash_32.c: -------------------------------------------------------------------------------- 1 | /* 2 | * hash_32 - 32 bit Fowler/Noll/Vo hash code 3 | * 4 | * @(#) $Revision: 5.1 $ 5 | * @(#) $Id: hash_32.c,v 5.1 2009/06/30 09:13:32 chongo Exp $ 6 | * @(#) $Source: /usr/local/src/cmd/fnv/RCS/hash_32.c,v $ 7 | * 8 | *** 9 | * 10 | * Fowler/Noll/Vo hash 11 | * 12 | * The basis of this hash algorithm was taken from an idea sent 13 | * as reviewer comments to the IEEE POSIX P1003.2 committee by: 14 | * 15 | * Phong Vo (http://www.research.att.com/info/kpv/) 16 | * Glenn Fowler (http://www.research.att.com/~gsf/) 17 | * 18 | * In a subsequent ballot round: 19 | * 20 | * Landon Curt Noll (http://www.isthe.com/chongo/) 21 | * 22 | * improved on their algorithm. Some people tried this hash 23 | * and found that it worked rather well. In an EMail message 24 | * to Landon, they named it the ``Fowler/Noll/Vo'' or FNV hash. 25 | * 26 | * FNV hashes are designed to be fast while maintaining a low 27 | * collision rate. The FNV speed allows one to quickly hash lots 28 | * of data while maintaining a reasonable collision rate. See: 29 | * 30 | * http://www.isthe.com/chongo/tech/comp/fnv/index.html 31 | * 32 | * for more details as well as other forms of the FNV hash. 33 | *** 34 | * 35 | * NOTE: The FNV-0 historic hash is not recommended. One should use 36 | * the FNV-1 hash instead. 37 | * 38 | * To use the 32 bit FNV-0 historic hash, pass FNV0_32_INIT as the 39 | * Fnv32_t hashval argument to fnv_32_buf() or fnv_32_str(). 40 | * 41 | * To use the recommended 32 bit FNV-1 hash, pass FNV1_32_INIT as the 42 | * Fnv32_t hashval argument to fnv_32_buf() or fnv_32_str(). 43 | * 44 | *** 45 | * 46 | * Please do not copyright this code. This code is in the public domain. 47 | * 48 | * LANDON CURT NOLL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 49 | * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO 50 | * EVENT SHALL LANDON CURT NOLL BE LIABLE FOR ANY SPECIAL, INDIRECT OR 51 | * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF 52 | * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR 53 | * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 54 | * PERFORMANCE OF THIS SOFTWARE. 55 | * 56 | * By: 57 | * chongo /\oo/\ 58 | * http://www.isthe.com/chongo/ 59 | * 60 | * Share and Enjoy! :-) 61 | */ 62 | 63 | #include 64 | #include "fnv.h" 65 | 66 | 67 | /* 68 | * 32 bit magic FNV-0 and FNV-1 prime 69 | */ 70 | #define FNV_32_PRIME ((Fnv32_t)0x01000193) 71 | 72 | 73 | /* 74 | * fnv_32_buf - perform a 32 bit Fowler/Noll/Vo hash on a buffer 75 | * 76 | * input: 77 | * buf - start of buffer to hash 78 | * len - length of buffer in octets 79 | * hval - previous hash value or 0 if first call 80 | * 81 | * returns: 82 | * 32 bit hash as a static hash type 83 | * 84 | * NOTE: To use the 32 bit FNV-0 historic hash, use FNV0_32_INIT as the hval 85 | * argument on the first call to either fnv_32_buf() or fnv_32_str(). 86 | * 87 | * NOTE: To use the recommended 32 bit FNV-1 hash, use FNV1_32_INIT as the hval 88 | * argument on the first call to either fnv_32_buf() or fnv_32_str(). 89 | */ 90 | Fnv32_t 91 | fnv_32_buf(void *buf, size_t len, Fnv32_t hval) 92 | { 93 | unsigned char *bp = (unsigned char *)buf; /* start of buffer */ 94 | unsigned char *be = bp + len; /* beyond end of buffer */ 95 | 96 | /* 97 | * FNV-1 hash each octet in the buffer 98 | */ 99 | while (bp < be) { 100 | 101 | /* multiply by the 32 bit FNV magic prime mod 2^32 */ 102 | #if defined(NO_FNV_GCC_OPTIMIZATION) 103 | hval *= FNV_32_PRIME; 104 | #else 105 | hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24); 106 | #endif 107 | 108 | /* xor the bottom with the current octet */ 109 | hval ^= (Fnv32_t)*bp++; 110 | } 111 | 112 | /* return our new hash value */ 113 | return hval; 114 | } 115 | 116 | 117 | /* 118 | * fnv_32_str - perform a 32 bit Fowler/Noll/Vo hash on a string 119 | * 120 | * input: 121 | * str - string to hash 122 | * hval - previous hash value or 0 if first call 123 | * 124 | * returns: 125 | * 32 bit hash as a static hash type 126 | * 127 | * NOTE: To use the 32 bit FNV-0 historic hash, use FNV0_32_INIT as the hval 128 | * argument on the first call to either fnv_32_buf() or fnv_32_str(). 129 | * 130 | * NOTE: To use the recommended 32 bit FNV-1 hash, use FNV1_32_INIT as the hval 131 | * argument on the first call to either fnv_32_buf() or fnv_32_str(). 132 | */ 133 | Fnv32_t 134 | fnv_32_str(char *str, Fnv32_t hval) 135 | { 136 | unsigned char *s = (unsigned char *)str; /* unsigned string */ 137 | 138 | /* 139 | * FNV-1 hash each octet in the buffer 140 | */ 141 | while (*s) { 142 | 143 | /* multiply by the 32 bit FNV magic prime mod 2^32 */ 144 | #if defined(NO_FNV_GCC_OPTIMIZATION) 145 | hval *= FNV_32_PRIME; 146 | #else 147 | hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24); 148 | #endif 149 | 150 | /* xor the bottom with the current octet */ 151 | hval ^= (Fnv32_t)*s++; 152 | } 153 | 154 | /* return our new hash value */ 155 | return hval; 156 | } 157 | -------------------------------------------------------------------------------- /src/fnv/README: -------------------------------------------------------------------------------- 1 | #=====================# 2 | # Fowler/Noll/Vo hash # 3 | #=====================# 4 | 5 | The basis of this hash algorithm was taken from an idea sent 6 | as reviewer comments to the IEEE POSIX P1003.2 committee by: 7 | 8 | Phong Vo (http://www.research.att.com/info/kpv) 9 | Glenn Fowler (http://www.research.att.com/~gsf/) 10 | 11 | In a subsequent ballot round: 12 | 13 | Landon Curt Noll (http://www.isthe.com/chongo) 14 | 15 | improved on their algorithm. Some people tried this hash 16 | and found that it worked rather well. In an EMail message 17 | to Landon, they named it the ``Fowler/Noll/Vo'' or FNV hash. 18 | 19 | FNV hashes are designed to be fast while maintaining a low 20 | collision rate. The FNV speed allows one to quickly hash lots 21 | of data while maintaining a reasonable collision rate. See: 22 | 23 | http://www.isthe.com/chongo/tech/comp/fnv/index.html 24 | 25 | for more details as well as other forms of the FNV hash. 26 | Comments, questions, bug fixes and suggestions welcome at 27 | the address given in the above URL. 28 | 29 | 30 | #==================# 31 | # FNV hash utility # 32 | #==================# 33 | 34 | Two hash utilities (32 bit and 64 bit) are provided: 35 | 36 | fnv032 [-b bcnt] [-m] [-s arg] [-t code] [-v] [arg ...] 37 | fnv132 [-b bcnt] [-m] [-s arg] [-t code] [-v] [arg ...] 38 | fnv1a32 [-b bcnt] [-m] [-s arg] [-t code] [-v] [arg ...] 39 | 40 | fnv064 [-b bcnt] [-m] [-s arg] [-t code] [-v] [arg ...] 41 | fnv164 [-b bcnt] [-m] [-s arg] [-t code] [-v] [arg ...] 42 | fnv1a64 [-b bcnt] [-m] [-s arg] [-t code] [-v] [arg ...] 43 | 44 | -b bcnt mask off all but the lower bcnt bits (default: 32) 45 | -m multiple hashes, one per line for each arg 46 | -s hash arg as a string (ignoring terminating NUL bytes) 47 | -t code 0 ==> generate test vectors, 1 ==> test FNV hash 48 | -v verbose mode, print arg after hash (implies -m) 49 | arg string (if -s was given) or filename (default stdin) 50 | 51 | The fnv032, fnv064 implement the historic FNV-0 hash. 52 | The fnv132, fnv164 implement the recommended FNV-1 hash. 53 | The fnv1a32, fnv1a64 implement the recommended FNV-1a hash. 54 | 55 | This is the original historic FNV algorithm with a 0 offset basis. 56 | It is recommended that FNV-1, with a non-0 offset basis be used instead. 57 | 58 | To test FNV hashes, try: 59 | 60 | fnv032 -t 1 -v 61 | fnv132 -t 1 -v 62 | fnv1a32 -t 1 -v 63 | 64 | fnv064 -t 1 -v 65 | fnv164 -t 1 -v 66 | fnv1a64 -t 1 -v 67 | 68 | If you are compiling, try: 69 | 70 | make check 71 | 72 | 73 | #==================# 74 | # FNV hash library # 75 | #==================# 76 | 77 | The libfnv.a library implements both a 32 bit and a 64 bit FNV hash 78 | on collections of bytes, a NUL terminated strings or on an open file 79 | descriptor. 80 | 81 | Here is the 32 bit FNV 1 hash: 82 | 83 | Fnv32_t fnv_32_buf(void *buf, int len, Fnv32_t hval); /* byte buf */ 84 | Fnv32_t fnv_32_str(char *string, Fnv32_t hval); /* string */ 85 | 86 | Here is the 32 bit FNV 1a hash: 87 | 88 | Fnv32_t fnv_32a_buf(void *buf, int len, Fnv32_t hval); /* byte buf */ 89 | Fnv32_t fnv_32a_str(char *string, Fnv32_t hval); /* string */ 90 | 91 | Here is the 64 bit FNV 1 hash: 92 | 93 | Fnv64_t fnv_64_buf(void *buf, int len, Fnv64_t hval); /* byte buf */ 94 | Fnv64_t fnv_64_str(char *string, Fnv64_t hval); /* string */ 95 | 96 | Here is the 64 bit FNV 1a hash: 97 | 98 | Fnv64_t fnv_64a_buf(void *buf, int len, Fnv64_t hval); /* byte buf */ 99 | Fnv64_t fnv_64a_str(char *string, Fnv64_t hval); /* string */ 100 | 101 | On the first call to a hash function, one must supply the initial basis 102 | that is appropriate for the hash in question: 103 | 104 | FNV-0: (not recommended) 105 | 106 | FNV0_32_INIT /* 32 bit FNV-0 initial basis */ 107 | FNV0_64_INIT /* 64 bit FNV-0 initial basis */ 108 | 109 | FNV-1: 110 | 111 | FNV1_32_INIT /* 32 bit FNV-1 initial basis */ 112 | FNV1_64_INIT /* 64 bit FNV-1 initial basis */ 113 | 114 | FNV-1a: 115 | 116 | FNV1A_32_INIT /* 32 bit FNV-1a initial basis */ 117 | FNV1A_64_INIT /* 64 bit FNV-1a initial basis */ 118 | 119 | For example to perform a 64 bit FNV-1 hash: 120 | 121 | #include "fnv.h" 122 | 123 | Fnv64_t hash_val; 124 | 125 | hash_val = fnv_64_str("a string", FNV1_64_INIT); 126 | hash_val = fnv_64_str("more string", hash_val); 127 | 128 | produces the same final hash value as: 129 | 130 | hash_val = fnv_64_str("a stringmore string", FNV1_64_INIT); 131 | 132 | NOTE: If one used 'FNV0_64_INIT' instead of 'FNV1_64_INIT' one would get the 133 | historic FNV-0 hash instead recommended FNV-1 hash. 134 | 135 | To perform a 32 bit FNV-1 hash: 136 | 137 | #include "fnv.h" 138 | 139 | Fnv32_t hash_val; 140 | 141 | hash_val = fnv_32_buf(buf, length_of_buf, FNV1_32_INIT); 142 | hash_val = fnv_32_str("more data", hash_val); 143 | 144 | To perform a 64 bit FNV-1a hash: 145 | 146 | #include "fnv.h" 147 | 148 | Fnv64_t hash_val; 149 | 150 | hash_val = fnv_64a_buf(buf, length_of_buf, FNV1_64_INIT); 151 | hash_val = fnv_64a_str("more data", hash_val); 152 | 153 | =-= 154 | 155 | chongo /\oo/\ 156 | http://www.isthe.com/chongo 157 | 158 | Share and Enjoy! 159 | -------------------------------------------------------------------------------- /src/CityHash.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include "Hash.h" 4 | 5 | #include "smhasher/City.h" 6 | 7 | /** 8 | 9 | The CityHash family of hash functions 10 | 11 | https://code.google.com/p/cityhash/ 12 | 13 | **/ 14 | 15 | template 16 | struct city_hash_t : public Hasher, T> 17 | { 18 | #if defined(__SSE4_2__) && defined(__x86_64__) 19 | static bool has_sse4_2; 20 | #endif 21 | 22 | public: 23 | typedef Hasher, T> __hasher_t; 24 | typedef typename __hasher_t::hash_value_t hash_value_t; 25 | typedef typename __hasher_t::seed_value_t seed_value_t; 26 | 27 | city_hash_t(seed_value_t seed = 0) : __hasher_t(seed) {} 28 | 29 | const hash_value_t operator()(void *buf, size_t len, seed_value_t seed) const; 30 | }; 31 | 32 | #if defined(__SSE4_2__) && defined(__x86_64__) 33 | 34 | bool support_sse4_2(void) 35 | { 36 | unsigned cpuinfo[4] = {0}; 37 | unsigned infotype = 1; 38 | 39 | #ifdef _MSC_VER 40 | __cpuid(cpuinfo, infotype); 41 | #else // _MSC_VER 42 | // cpuid and PIC mode don't play nice. Push ebx before use! 43 | // see http://www.technovelty.org/code/arch/pic-cas.html 44 | #ifdef __x86_64__ 45 | __asm__ __volatile__( 46 | "cpuid;" 47 | : "=a"(cpuinfo[0]), "=b"(cpuinfo[1]), "=c"(cpuinfo[2]), "=d"(cpuinfo[3]) 48 | : "a"(infotype)); 49 | #else // __x86_64__ 50 | __asm__ __volatile__( 51 | "pushl %%ebx;" 52 | "cpuid;" 53 | "movl %%ebx,%1;" 54 | "pop %%ebx;" 55 | : "=a"(cpuinfo[0]), "=m"(cpuinfo[1]), "=c"(cpuinfo[2]), "=d"(cpuinfo[3]) 56 | : "a"(infotype)); 57 | #endif // __x86_64__ 58 | #endif // _MSC_VER 59 | 60 | return cpuinfo[2] & (1 << 20); 61 | } 62 | 63 | template 64 | bool city_hash_t::has_sse4_2 = support_sse4_2(); 65 | 66 | #endif 67 | 68 | typedef city_hash_t city_hash_32_t; 69 | typedef city_hash_t city_hash_64_t; 70 | #ifdef SUPPORT_INT128 71 | typedef city_hash_t city_hash_128_t; 72 | #endif 73 | 74 | template <> 75 | const city_hash_32_t::hash_value_t city_hash_32_t::operator()(void *buf, size_t len, city_hash_32_t::seed_value_t seed) const 76 | { 77 | return CityHash32WithSeed((const char *)buf, len, seed); 78 | } 79 | 80 | template <> 81 | const city_hash_64_t::hash_value_t city_hash_64_t::operator()(void *buf, size_t len, city_hash_64_t::seed_value_t seed) const 82 | { 83 | if (seed) 84 | { 85 | return CityHash64WithSeed((const char *)buf, len, seed); 86 | } 87 | else 88 | { 89 | return CityHash64((const char *)buf, len); 90 | } 91 | } 92 | 93 | #ifdef SUPPORT_INT128 94 | 95 | template <> 96 | const city_hash_128_t::hash_value_t city_hash_128_t::operator()(void *buf, size_t len, city_hash_128_t::seed_value_t seed) const 97 | { 98 | #if defined(__SSE4_2__) && defined(__x86_64__) 99 | if (has_sse4_2) 100 | { 101 | if (seed) 102 | { 103 | const uint128 &hash = CityHashCrc128WithSeed((const char *)buf, len, std::make_pair(U128_LO(seed), U128_HI(seed))); 104 | 105 | return *(uint128_t *)&hash; 106 | } 107 | else 108 | { 109 | const uint128 &hash = CityHashCrc128((const char *)buf, len); 110 | 111 | return *(uint128_t *)&hash; 112 | } 113 | } 114 | #endif 115 | 116 | if (seed) 117 | { 118 | const uint128 &hash = CityHash128WithSeed((const char *)buf, len, std::make_pair(U128_LO(seed), U128_HI(seed))); 119 | 120 | return *(uint128_t *)&hash; 121 | } 122 | else 123 | { 124 | const uint128 &hash = CityHash128((const char *)buf, len); 125 | 126 | return *(uint128_t *)&hash; 127 | } 128 | } 129 | 130 | #if defined(__SSE4_2__) && defined(__x86_64__) 131 | 132 | template 133 | struct city_hash_crc_t : public Hasher, T> 134 | { 135 | public: 136 | typedef Hasher, T> __hasher_t; 137 | typedef typename __hasher_t::hash_value_t hash_value_t; 138 | typedef typename __hasher_t::seed_value_t seed_value_t; 139 | 140 | city_hash_crc_t(seed_value_t seed = {}) : __hasher_t(seed) {} 141 | 142 | const hash_value_t operator()(void *buf, size_t len, seed_value_t seed) const; 143 | }; 144 | 145 | template 146 | struct city_fingerprint_t : public Fingerprinter, T> 147 | { 148 | public: 149 | typedef Fingerprinter, T> __fingerprinter_t; 150 | typedef typename __fingerprinter_t::fingerprint_t fingerprint_value_t; 151 | 152 | city_fingerprint_t() = default; 153 | 154 | const fingerprint_value_t operator()(void *buf, size_t len) const; 155 | }; 156 | 157 | typedef city_hash_crc_t city_hash_crc_128_t; 158 | typedef city_fingerprint_t city_fingerprint_256_t; 159 | 160 | template <> 161 | const city_hash_crc_128_t::hash_value_t city_hash_crc_128_t::operator()(void *buf, size_t len, city_hash_crc_128_t::seed_value_t seed) const 162 | { 163 | if (seed) 164 | { 165 | const uint128 &hash = CityHashCrc128WithSeed((const char *)buf, len, std::make_pair(U128_LO(seed), U128_HI(seed))); 166 | 167 | return *(uint128_t *)&hash; 168 | } 169 | else 170 | { 171 | const uint128 &hash = CityHashCrc128((const char *)buf, len); 172 | 173 | return *(uint128_t *)&hash; 174 | } 175 | } 176 | 177 | template <> 178 | const city_fingerprint_256_t::fingerprint_value_t city_fingerprint_256_t::operator()(void *buf, size_t len) const 179 | { 180 | uint256_t result = {}; 181 | 182 | CityHashCrc256((const char *)buf, len, result.data()); 183 | 184 | return result; 185 | } 186 | 187 | #endif // defined(__SSE4_2__) && defined(__x86_64__) 188 | 189 | #endif // SUPPORT_INT128 190 | -------------------------------------------------------------------------------- /tests/test_murmur.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import pyhash 4 | 5 | 6 | def test_murmur1_32(hash_tester): 7 | hash_tester(hasher_type=pyhash.murmur1_32, 8 | bytes_hash=1706635965, 9 | seed_hash=1637637239, 10 | unicode_hash=2296970802) 11 | 12 | 13 | def test_murmur1_aligned_32(hash_tester): 14 | hash_tester(hasher_type=pyhash.murmur1_aligned_32, 15 | bytes_hash=1706635965, 16 | seed_hash=1637637239, 17 | unicode_hash=2296970802) 18 | 19 | 20 | def test_murmur2_32(hash_tester): 21 | hash_tester(hasher_type=pyhash.murmur2_32, 22 | bytes_hash=403862830, 23 | seed_hash=1257009171, 24 | unicode_hash=2308212514) 25 | 26 | 27 | def test_murmur2a_32(hash_tester): 28 | hash_tester(hasher_type=pyhash.murmur2a_32, 29 | bytes_hash=1026673864, 30 | seed_hash=3640713775, 31 | unicode_hash=3710634486) 32 | 33 | 34 | def test_murmur2_aligned32(hash_tester): 35 | hash_tester(hasher_type=pyhash.murmur2_aligned_32, 36 | bytes_hash=403862830, 37 | seed_hash=1257009171, 38 | unicode_hash=2308212514) 39 | 40 | 41 | def test_murmur2_neutral32(hash_tester): 42 | hash_tester(hasher_type=pyhash.murmur2_neutral_32, 43 | bytes_hash=403862830, 44 | seed_hash=1257009171, 45 | unicode_hash=2308212514) 46 | 47 | 48 | def test_murmur2_x64_64a(hash_tester): 49 | hash_tester(hasher_type=pyhash.murmur2_x64_64a, 50 | bytes_hash=3407684658384555107, 51 | seed_hash=14278059344916754999, 52 | unicode_hash=9820020607534352415) 53 | 54 | 55 | def test_murmur2_x86_64b(hash_tester): 56 | hash_tester(hasher_type=pyhash.murmur2_x86_64b, 57 | bytes_hash=1560774255606158893, 58 | seed_hash=11567531768634065834, 59 | unicode_hash=7104676830630207180) 60 | 61 | 62 | def test_murmur3_32(hash_tester): 63 | hash_tester(hasher_type=pyhash.murmur3_32, 64 | bytes_hash=3127628307, 65 | seed_hash=1973649836, 66 | unicode_hash=1351191292) 67 | 68 | 69 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 70 | def test_murmur3_x86_128(hash_tester): 71 | hash_tester(hasher_type=pyhash.murmur3_x86_128, 72 | bytes_hash=113049230771270950235709929058346397488, 73 | seed_hash=201730919445129814667855021331871906456, 74 | unicode_hash=34467989874860051826961972957664456325) 75 | 76 | 77 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 78 | def test_murmur3_x64_128(hash_tester): 79 | hash_tester(hasher_type=pyhash.murmur3_x64_128, 80 | bytes_hash=204797213367049729698754624420042367389, 81 | seed_hash=25000065729391260169145522623652811022, 82 | unicode_hash=301054382688326301269845371608405900524) 83 | 84 | 85 | @pytest.mark.benchmark(group='hash32', disable_gc=True) 86 | def test_murmur_hash1_32_perf(benchmark, hash_bencher): 87 | hash_bencher(benchmark, pyhash.murmur1_32, 3043957486) 88 | 89 | 90 | @pytest.mark.benchmark(group='hash32', disable_gc=True) 91 | def test_murmur_hash1_aligned_32_perf(benchmark, hash_bencher): 92 | hash_bencher(benchmark, pyhash.murmur1_aligned_32, 3043957486) 93 | 94 | 95 | @pytest.mark.benchmark(group='hash32', disable_gc=True) 96 | def test_murmur_hash2_32_perf(benchmark, hash_bencher): 97 | hash_bencher(benchmark, pyhash.murmur2_32, 2373126550) 98 | 99 | 100 | @pytest.mark.benchmark(group='hash32', disable_gc=True) 101 | def test_murmur_hash2a_32_perf(benchmark, hash_bencher): 102 | hash_bencher(benchmark, pyhash.murmur2a_32, 178525084) 103 | 104 | 105 | @pytest.mark.benchmark(group='hash32', disable_gc=True) 106 | def test_murmur_hash2_aligned_32_perf(benchmark, hash_bencher): 107 | hash_bencher(benchmark, pyhash.murmur2_aligned_32, 2373126550) 108 | 109 | 110 | @pytest.mark.benchmark(group='hash32', disable_gc=True) 111 | def test_murmur_hash2_neutral_32_perf(benchmark, hash_bencher): 112 | hash_bencher(benchmark, pyhash.murmur2_neutral_32, 2373126550) 113 | 114 | 115 | @pytest.mark.benchmark(group='hash64', disable_gc=True) 116 | def test_murmur_hash2_x64_64a_perf(benchmark, hash_bencher): 117 | hash_bencher(benchmark, pyhash.murmur2_x64_64a, 12604435678857905857) 118 | 119 | 120 | @pytest.mark.benchmark(group='hash64', disable_gc=True) 121 | def test_murmur_hash2_x86_64b_perf(benchmark, hash_bencher): 122 | hash_bencher(benchmark, pyhash.murmur2_x86_64b, 3759496224018757553) 123 | 124 | 125 | @pytest.mark.benchmark(group='hash32', disable_gc=True) 126 | def test_murmur_hash3_32_perf(benchmark, hash_bencher): 127 | hash_bencher(benchmark, pyhash.murmur3_32, 3825864278) 128 | 129 | 130 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 131 | @pytest.mark.benchmark(group='hash128', disable_gc=True) 132 | def test_murmur_hash3_x86_128_perf(benchmark, hash_bencher): 133 | hash_bencher(benchmark, pyhash.murmur3_x86_128, 134 | 97431559281111809997269275467939498127) 135 | 136 | 137 | @pytest.mark.skipif(not pyhash.build_with_int128, reason="requires int128 support") 138 | @pytest.mark.benchmark(group='hash128', disable_gc=True) 139 | def test_murmur_hash3_x64_128_perf(benchmark, hash_bencher): 140 | hash_bencher(benchmark, pyhash.murmur3_x64_128, 141 | 149984839147466660491291446859193586361) 142 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Introduction [![pypi](https://img.shields.io/pypi/v/pyhash.svg)](https://pypi.org/project/pyhash/) [![Travis CI Status](https://travis-ci.org/flier/pyfasthash.svg?branch=master)](https://travis-ci.org/flier/pyfasthash) [![codecov](https://codecov.io/gh/flier/pyfasthash/branch/master/graph/badge.svg)](https://codecov.io/gh/flier/pyfasthash) 2 | 3 | `pyhash` is a python non-cryptographic hash library. 4 | 5 | It provides several common hash algorithms with C/C++ implementation for performance and compatibility. 6 | 7 | ```python 8 | >>> import pyhash 9 | >>> hasher = pyhash.fnv1_32() 10 | 11 | >>> hasher('hello world') 12 | 2805756500L 13 | 14 | >>> hasher('hello', ' ', 'world') 15 | 2805756500L 16 | 17 | >>> hasher('world', seed=hasher('hello ')) 18 | 2805756500L 19 | ``` 20 | 21 | It also can be used to generate fingerprints without seed. 22 | 23 | ```python 24 | >>> import pyhash 25 | >>> fp = pyhash.farm_fingerprint_64() 26 | 27 | >>> fp('hello') 28 | >>> 13009744463427800296L 29 | 30 | >>> fp('hello', 'world') 31 | >>> [13009744463427800296L, 16436542438370751598L] 32 | ``` 33 | 34 | **Notes** 35 | 36 | `hasher('hello', ' ', 'world')` is a syntax sugar for `hasher('world', seed=hasher(' ', seed=hasher('hello')))`, and may not equals to `hasher('hello world')`, because some hash algorithms use different `hash` and `seed` size. 37 | 38 | For example, `metro` hash always use 32bit seed for 64/128 bit hash value. 39 | 40 | ```python 41 | >>> import pyhash 42 | >>> hasher = pyhash.metro_64() 43 | 44 | >>> hasher('hello world') 45 | >>> 5622782129197849471L 46 | 47 | >>> hasher('hello', ' ', 'world') 48 | >>> 16402988188088019159L 49 | 50 | >>> hasher('world', seed=hasher(' ', seed=hasher('hello'))) 51 | >>> 16402988188088019159L 52 | ``` 53 | 54 | # Installation 55 | 56 | ```bash 57 | $ pip install pyhash 58 | ``` 59 | 60 | **Notes** 61 | 62 | If `pip` install failed with similar errors, [#27](https://github.com/flier/pyfasthash/issues/27) 63 | 64 | ``` 65 | /usr/lib/gcc/x86_64-linux-gnu/6/include/smmintrin.h:846:1: error: inlining failed in call to always_inline 'long long unsigned int _mm_crc32_u64(long long unsigned int, long long unsigned int)': target specific option mismatch 66 | _mm_crc32_u64 (unsigned long long __C, unsigned long long __V) 67 | ^~~~~~~~~~~~~ 68 | src/smhasher/metrohash64crc.cpp:52:34: note: called from here 69 | v[0] ^= _mm_crc32_u64(v[0], read_u64(ptr)); ptr += 8; 70 | ~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~ 71 | ``` 72 | 73 | Please upgrade `pip` and `setuptools` to latest version and try again 74 | 75 | ```bash 76 | $ pip install --upgrade pip setuptools 77 | ``` 78 | 79 | **Notes** 80 | 81 | If `pip` install failed on MacOS with similar errors [#28](https://github.com/flier/pyfasthash/issues/28) 82 | 83 | ``` 84 | creating build/temp.macosx-10.6-intel-3.6 85 | ... 86 | /usr/bin/clang -fno-strict-aliasing -Wsign-compare -fno-common -dynamic -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -arch i386 -arch x86_64 -g -c src/smhasher/metrohash64crc.cpp -o build/temp.macosx-10.6-intel-3.6/src/smhasher/metrohash64crc.o -msse4.2 -maes -mavx -mavx2 87 | src/smhasher/metrohash64crc.cpp:52:21: error: use of undeclared identifier '_mm_crc32_u64' 88 | v[0] ^= _mm_crc32_u64(v[0], read_u64(ptr)); ptr += 8; 89 | ^ 90 | ``` 91 | 92 | You may try to 93 | 94 | ```bash 95 | $ CFLAGS="-mmacosx-version-min=10.13" pip install pyhash 96 | ``` 97 | 98 | **Notes** 99 | 100 | `pyhash` only support `pypy` v6.0 or newer, please [download and install](https://pypy.org/download.html) the latest `pypy`. 101 | 102 | # Algorithms 103 | 104 | pyhash supports the following hash algorithms 105 | 106 | - [FNV](http://isthe.com/chongo/tech/comp/fnv/) (Fowler-Noll-Vo) hash 107 | - fnv1_32 108 | - fnv1a_32 109 | - fnv1_64 110 | - fnv1a_64 111 | - [MurmurHash](http://code.google.com/p/smhasher/) 112 | - murmur1_32 113 | - murmur1_aligned_32 114 | - murmur2_32 115 | - murmur2a_32 116 | - murmur2_aligned_32 117 | - murmur2_neutral_32 118 | - murmur2_x64_64a 119 | - murmur2_x86_64b 120 | - murmur3_32 121 | - murmur3_x86_128 122 | - murmur3_x64_128 123 | - [lookup3](http://burtleburtle.net/bob/hash/doobs.html) 124 | - lookup3 125 | - lookup3_little 126 | - lookup3_big 127 | - [SuperFastHash](http://www.azillionmonkeys.com/qed/hash.html) 128 | - super_fast_hash 129 | - [City Hash](https://code.google.com/p/cityhash/) 130 | _ city_32 131 | - city_64 132 | - city_128 133 | - city_crc_128 134 | - city_fingerprint_256 135 | - [Spooky Hash](http://burtleburtle.net/bob/hash/spooky.html) 136 | - spooky_32 137 | - spooky_64 138 | - spooky_128 139 | - [FarmHash](https://github.com/google/farmhash) 140 | - farm_32 141 | - farm_64 142 | - farm_128 143 | - farm_fingerprint_32 144 | - farm_fingerprint_64 145 | - farm_fingerprint_128 146 | - [MetroHash](https://github.com/jandrewrogers/MetroHash) 147 | - metro_64 148 | - metro_128 149 | - metro_crc_64 150 | - metro_crc_128 151 | - [MumHash](https://github.com/vnmakarov/mum-hash) 152 | - mum_64 153 | - [T1Ha](https://github.com/leo-yuriev/t1ha) 154 | - t1ha2 _(64-bit little-endian)_ 155 | - t1ha2_128 _(128-bit little-endian)_ 156 | - t1ha1 _(64-bit native-endian)_ 157 | - t1ha1_le _(64-bit little-endian)_ 158 | - t1ha1_be _(64-bit big-endian)_ 159 | - t1ha0 _(64-bit, choice fastest function in runtime.)_ 160 | - ~~t1_32~~ 161 | - ~~t1_32_be~~ 162 | - ~~t1_64~~ 163 | - ~~t1_64_be~~ 164 | - [XXHash](https://github.com/Cyan4973/xxHash) 165 | - xx_32 166 | - xx_64 167 | - xxh3_64 **NEW** 168 | - xxh3_128 **NEW** 169 | - [Highway Hash](https://github.com/google/highwayhash) 170 | - highway_64 **NEW** 171 | - highway_128 **NEW** 172 | - highway_256 **NEW** 173 | 174 | ## String and Bytes literals 175 | 176 | Python has two types can be used to present string literals, the hash values of the two types are definitely different. 177 | 178 | - For Python 2.x [String literals](https://docs.python.org/2/reference/lexical_analysis.html#string-literals), `str` will be used by default, `unicode` can be used with the `u` prefix. 179 | - For Python 3.x [String and Bytes literals](https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals), `unicode` will be used by default, `bytes` can be used with the `b` prefix. 180 | 181 | For example, 182 | 183 | ``` 184 | $ python2 185 | Python 2.7.15 (default, Jun 17 2018, 12:46:58) 186 | [GCC 4.2.1 Compatible Apple LLVM 9.1.0 (clang-902.0.39.2)] on darwin 187 | Type "help", "copyright", "credits" or "license" for more information. 188 | >>> import pyhash 189 | >>> hasher = pyhash.murmur3_32() 190 | >>> hasher('foo') 191 | 4138058784L 192 | >>> hasher(u'foo') 193 | 2085578581L 194 | >>> hasher(b'foo') 195 | 4138058784L 196 | ``` 197 | 198 | ``` 199 | $ python3 200 | Python 3.7.0 (default, Jun 29 2018, 20:13:13) 201 | [Clang 9.1.0 (clang-902.0.39.2)] on darwin 202 | Type "help", "copyright", "credits" or "license" for more information. 203 | >>> import pyhash 204 | >>> hasher = pyhash.murmur3_32() 205 | >>> hasher('foo') 206 | 2085578581 207 | >>> hasher(u'foo') 208 | 2085578581 209 | >>> hasher(b'foo') 210 | 4138058784 211 | ``` 212 | 213 | You can also import [unicode_literals](http://python-future.org/unicode_literals.html) to use unicode literals in Python 2.x 214 | 215 | ```python 216 | from __future__ import unicode_literals 217 | ``` 218 | 219 | > In general, it is more compelling to use unicode_literals when back-porting new or existing Python 3 code to Python 2/3 than when porting existing Python 2 code to 2/3. In the latter case, explicitly marking up all unicode string literals with u'' prefixes would help to avoid unintentionally changing the existing Python 2 API. However, if changing the existing Python 2 API is not a concern, using unicode_literals may speed up the porting process. 220 | -------------------------------------------------------------------------------- /src/fnv/fnv.h: -------------------------------------------------------------------------------- 1 | /* 2 | * fnv - Fowler/Noll/Vo- hash code 3 | * 4 | * @(#) $Revision: 5.4 $ 5 | * @(#) $Id: fnv.h,v 5.4 2009/07/30 22:49:13 chongo Exp $ 6 | * @(#) $Source: /usr/local/src/cmd/fnv/RCS/fnv.h,v $ 7 | * 8 | *** 9 | * 10 | * Fowler/Noll/Vo- hash 11 | * 12 | * The basis of this hash algorithm was taken from an idea sent 13 | * as reviewer comments to the IEEE POSIX P1003.2 committee by: 14 | * 15 | * Phong Vo (http://www.research.att.com/info/kpv/) 16 | * Glenn Fowler (http://www.research.att.com/~gsf/) 17 | * 18 | * In a subsequent ballot round: 19 | * 20 | * Landon Curt Noll (http://www.isthe.com/chongo/) 21 | * 22 | * improved on their algorithm. Some people tried this hash 23 | * and found that it worked rather well. In an EMail message 24 | * to Landon, they named it the ``Fowler/Noll/Vo'' or FNV hash. 25 | * 26 | * FNV hashes are designed to be fast while maintaining a low 27 | * collision rate. The FNV speed allows one to quickly hash lots 28 | * of data while maintaining a reasonable collision rate. See: 29 | * 30 | * http://www.isthe.com/chongo/tech/comp/fnv/index.html 31 | * 32 | * for more details as well as other forms of the FNV hash. 33 | * 34 | *** 35 | * 36 | * NOTE: The FNV-0 historic hash is not recommended. One should use 37 | * the FNV-1 hash instead. 38 | * 39 | * To use the 32 bit FNV-0 historic hash, pass FNV0_32_INIT as the 40 | * Fnv32_t hashval argument to fnv_32_buf() or fnv_32_str(). 41 | * 42 | * To use the 64 bit FNV-0 historic hash, pass FNV0_64_INIT as the 43 | * Fnv64_t hashval argument to fnv_64_buf() or fnv_64_str(). 44 | * 45 | * To use the recommended 32 bit FNV-1 hash, pass FNV1_32_INIT as the 46 | * Fnv32_t hashval argument to fnv_32_buf() or fnv_32_str(). 47 | * 48 | * To use the recommended 64 bit FNV-1 hash, pass FNV1_64_INIT as the 49 | * Fnv64_t hashval argument to fnv_64_buf() or fnv_64_str(). 50 | * 51 | * To use the recommended 32 bit FNV-1a hash, pass FNV1_32A_INIT as the 52 | * Fnv32_t hashval argument to fnv_32a_buf() or fnv_32a_str(). 53 | * 54 | * To use the recommended 64 bit FNV-1a hash, pass FNV1A_64_INIT as the 55 | * Fnv64_t hashval argument to fnv_64a_buf() or fnv_64a_str(). 56 | * 57 | *** 58 | * 59 | * Please do not copyright this code. This code is in the public domain. 60 | * 61 | * LANDON CURT NOLL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 62 | * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO 63 | * EVENT SHALL LANDON CURT NOLL BE LIABLE FOR ANY SPECIAL, INDIRECT OR 64 | * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF 65 | * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR 66 | * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 67 | * PERFORMANCE OF THIS SOFTWARE. 68 | * 69 | * By: 70 | * chongo /\oo/\ 71 | * http://www.isthe.com/chongo/ 72 | * 73 | * Share and Enjoy! :-) 74 | */ 75 | 76 | #if !defined(__FNV_H__) 77 | #define __FNV_H__ 78 | 79 | #include 80 | 81 | #define FNV_VERSION "5.0.2" /* @(#) FNV Version */ 82 | 83 | #ifdef WIN32 84 | typedef unsigned int u_int32_t; 85 | #endif 86 | 87 | 88 | /* 89 | * 32 bit FNV-0 hash type 90 | */ 91 | typedef u_int32_t Fnv32_t; 92 | 93 | 94 | /* 95 | * 32 bit FNV-0 zero initial basis 96 | * 97 | * This historic hash is not recommended. One should use 98 | * the FNV-1 hash and initial basis instead. 99 | */ 100 | #define FNV0_32_INIT ((Fnv32_t)0) 101 | 102 | 103 | /* 104 | * 32 bit FNV-1 and FNV-1a non-zero initial basis 105 | * 106 | * The FNV-1 initial basis is the FNV-0 hash of the following 32 octets: 107 | * 108 | * chongo /\../\ 109 | * 110 | * NOTE: The \'s above are not back-slashing escape characters. 111 | * They are literal ASCII backslash 0x5c characters. 112 | * 113 | * NOTE: The FNV-1a initial basis is the same value as FNV-1 by definition. 114 | */ 115 | #define FNV1_32_INIT ((Fnv32_t)0x811c9dc5) 116 | #define FNV1_32A_INIT FNV1_32_INIT 117 | 118 | 119 | /* 120 | * determine how 64 bit unsigned values are represented 121 | */ 122 | #include "longlong.h" 123 | 124 | #ifdef WIN32 125 | typedef unsigned long long u_int64_t; 126 | #endif 127 | 128 | 129 | /* 130 | * 64 bit FNV-0 hash 131 | */ 132 | #if defined(HAVE_64BIT_LONG_LONG) 133 | typedef u_int64_t Fnv64_t; 134 | #else /* HAVE_64BIT_LONG_LONG */ 135 | typedef struct { 136 | u_int32_t w32[2]; /* w32[0] is low order, w32[1] is high order word */ 137 | } Fnv64_t; 138 | #endif /* HAVE_64BIT_LONG_LONG */ 139 | 140 | 141 | /* 142 | * 64 bit FNV-0 zero initial basis 143 | * 144 | * This historic hash is not recommended. One should use 145 | * the FNV-1 hash and initial basis instead. 146 | */ 147 | #if defined(HAVE_64BIT_LONG_LONG) 148 | #define FNV0_64_INIT ((Fnv64_t)0) 149 | #else /* HAVE_64BIT_LONG_LONG */ 150 | extern const Fnv64_t fnv0_64_init; 151 | #define FNV0_64_INIT (fnv0_64_init) 152 | #endif /* HAVE_64BIT_LONG_LONG */ 153 | 154 | 155 | /* 156 | * 64 bit FNV-1 non-zero initial basis 157 | * 158 | * The FNV-1 initial basis is the FNV-0 hash of the following 32 octets: 159 | * 160 | * chongo /\../\ 161 | * 162 | * NOTE: The \'s above are not back-slashing escape characters. 163 | * They are literal ASCII backslash 0x5c characters. 164 | * 165 | * NOTE: The FNV-1a initial basis is the same value as FNV-1 by definition. 166 | */ 167 | #if defined(HAVE_64BIT_LONG_LONG) 168 | #define FNV1_64_INIT ((Fnv64_t)0xcbf29ce484222325ULL) 169 | #define FNV1A_64_INIT FNV1_64_INIT 170 | #else /* HAVE_64BIT_LONG_LONG */ 171 | extern const fnv1_64_init; 172 | extern const Fnv64_t fnv1a_64_init; 173 | #define FNV1_64_INIT (fnv1_64_init) 174 | #define FNV1A_64_INIT (fnv1a_64_init) 175 | #endif /* HAVE_64BIT_LONG_LONG */ 176 | 177 | 178 | /* 179 | * hash types 180 | */ 181 | enum fnv_type { 182 | FNV_NONE = 0, /* invalid FNV hash type */ 183 | FNV0_32 = 1, /* FNV-0 32 bit hash */ 184 | FNV1_32 = 2, /* FNV-1 32 bit hash */ 185 | FNV1a_32 = 3, /* FNV-1a 32 bit hash */ 186 | FNV0_64 = 4, /* FNV-0 64 bit hash */ 187 | FNV1_64 = 5, /* FNV-1 64 bit hash */ 188 | FNV1a_64 = 6, /* FNV-1a 64 bit hash */ 189 | }; 190 | 191 | 192 | /* 193 | * these test vectors are used as part o the FNV test suite 194 | */ 195 | struct test_vector { 196 | void *buf; /* start of test vector buffer */ 197 | int len; /* length of test vector */ 198 | }; 199 | struct fnv0_32_test_vector { 200 | struct test_vector *test; /* test vector buffer to hash */ 201 | Fnv32_t fnv0_32; /* expected FNV-0 32 bit hash value */ 202 | }; 203 | struct fnv1_32_test_vector { 204 | struct test_vector *test; /* test vector buffer to hash */ 205 | Fnv32_t fnv1_32; /* expected FNV-1 32 bit hash value */ 206 | }; 207 | struct fnv1a_32_test_vector { 208 | struct test_vector *test; /* test vector buffer to hash */ 209 | Fnv32_t fnv1a_32; /* expected FNV-1a 32 bit hash value */ 210 | }; 211 | struct fnv0_64_test_vector { 212 | struct test_vector *test; /* test vector buffer to hash */ 213 | Fnv64_t fnv0_64; /* expected FNV-0 64 bit hash value */ 214 | }; 215 | struct fnv1_64_test_vector { 216 | struct test_vector *test; /* test vector buffer to hash */ 217 | Fnv64_t fnv1_64; /* expected FNV-1 64 bit hash value */ 218 | }; 219 | struct fnv1a_64_test_vector { 220 | struct test_vector *test; /* test vector buffer to hash */ 221 | Fnv64_t fnv1a_64; /* expected FNV-1a 64 bit hash value */ 222 | }; 223 | 224 | #ifdef __cplusplus 225 | extern "C" { 226 | #endif 227 | 228 | /* 229 | * external functions 230 | */ 231 | /* hash_32.c */ 232 | Fnv32_t fnv_32_buf(void *buf, size_t len, Fnv32_t hashval); 233 | Fnv32_t fnv_32_str(char *buf, Fnv32_t hashval); 234 | 235 | /* hash_32a.c */ 236 | Fnv32_t fnv_32a_buf(void *buf, size_t len, Fnv32_t hashval); 237 | Fnv32_t fnv_32a_str(char *buf, Fnv32_t hashval); 238 | 239 | /* hash_64.c */ 240 | Fnv64_t fnv_64_buf(void *buf, size_t len, Fnv64_t hashval); 241 | Fnv64_t fnv_64_str(char *buf, Fnv64_t hashval); 242 | 243 | /* hash_64a.c */ 244 | Fnv64_t fnv_64a_buf(void *buf, size_t len, Fnv64_t hashval); 245 | Fnv64_t fnv_64a_str(char *buf, Fnv64_t hashval); 246 | 247 | #ifdef __cplusplus 248 | } 249 | #endif 250 | 251 | /* test_fnv.c */ 252 | extern struct test_vector fnv_test_str[]; 253 | extern struct fnv0_32_test_vector fnv0_32_vector[]; 254 | extern struct fnv1_32_test_vector fnv1_32_vector[]; 255 | extern struct fnv1a_32_test_vector fnv1a_32_vector[]; 256 | extern struct fnv0_64_test_vector fnv0_64_vector[]; 257 | extern struct fnv1_64_test_vector fnv1_64_vector[]; 258 | extern struct fnv1a_64_test_vector fnv1a_64_vector[]; 259 | extern void unknown_hash_type(char *prog, enum fnv_type type, int code); 260 | extern void print_fnv32(Fnv32_t hval, Fnv32_t mask, int verbose, char *arg); 261 | extern void print_fnv64(Fnv64_t hval, Fnv64_t mask, int verbose, char *arg); 262 | 263 | 264 | #endif /* __FNV_H__ */ 265 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import sys 4 | import os 5 | import platform 6 | from glob import glob 7 | 8 | from setuptools import setup 9 | 10 | here = os.path.abspath(os.path.dirname(__file__)) 11 | 12 | IS_64BITS = sys.maxsize > 2**32 13 | 14 | machine = platform.machine().lower() 15 | IS_X86 = machine in ['i386', 'i686', 'x86_64', 'amd64'] 16 | IS_X86_64 = IS_X86 and IS_64BITS 17 | IS_ARM = machine.startswith('arm') or machine.startswith('aarch') 18 | IS_ARM64 = IS_ARM and IS_64BITS 19 | IS_PPC = machine.startswith('ppc') 20 | IS_PPC64 = IS_PPC and IS_64BITS 21 | 22 | IS_WINNT = os.name == "nt" 23 | IS_POSIX = os.name == "posix" 24 | IS_MACOS = sys.platform == "darwin" 25 | 26 | SUPPORT_INT128 = not IS_WINNT 27 | 28 | ON = 1 29 | OFF = 0 30 | 31 | 32 | def cpu_features(): 33 | from collections import namedtuple 34 | 35 | CpuFeatures = namedtuple( 36 | "CpuFeatures", ['sse41', 'sse42', 'aes', 'avx', 'avx2']) 37 | 38 | sse41 = sse42 = aes = avx = avx2 = False 39 | 40 | if IS_X86: 41 | try: 42 | from cpuid import _is_set 43 | 44 | sse41 = _is_set(1, 2, 19) == 'Yes' 45 | sse42 = _is_set(1, 2, 20) == 'Yes' 46 | aes = _is_set(1, 2, 25) == 'Yes' 47 | avx = _is_set(1, 2, 28) == 'Yes' 48 | avx2 = _is_set(7, 1, 5) == 'Yes' 49 | except ImportError: 50 | if IS_64BITS: 51 | sse41 = sse42 = aes = avx = avx2 = True 52 | 53 | return CpuFeatures(sse41, sse42, aes, avx, avx2) 54 | 55 | 56 | cpu = cpu_features() 57 | 58 | 59 | macros = [] 60 | include_dirs = [ 61 | "src/pybind11/include", 62 | "src/highwayhash", 63 | ] 64 | library_dirs = [] 65 | libraries = [] 66 | extra_macros = [] 67 | extra_compile_args = [] 68 | extra_link_args = [] 69 | 70 | if IS_WINNT: 71 | macros += [ 72 | ("WIN32", None), 73 | ] 74 | 75 | python_home = os.environ.get('PYTHON_HOME') 76 | 77 | if python_home: 78 | include_dirs += [ 79 | os.path.join(python_home, 'include'), 80 | ] 81 | library_dirs += [ 82 | os.path.join(python_home, 'libs'), 83 | ] 84 | 85 | extra_macros += [("WIN32", 1)] 86 | extra_compile_args += ["/O2", "/GL", "/MT", "/EHsc", "/Gy", "/Zi"] 87 | extra_link_args += ["/DLL", "/OPT:REF", "/OPT:ICF", 88 | "/MACHINE:X64" if IS_64BITS else "/MACHINE:X86"] 89 | elif IS_POSIX: 90 | if IS_MACOS: 91 | include_dirs += [ 92 | '/opt/local/include', 93 | '/usr/local/include' 94 | ] 95 | 96 | extra_compile_args += [ 97 | "-Wno-deprecated-register", 98 | "-Wno-unused-lambda-capture", 99 | "-stdlib=libc++", 100 | ] 101 | else: 102 | libraries += ["rt", "gcc"] 103 | 104 | extra_compile_args += ["-march=native"] 105 | 106 | if SUPPORT_INT128: 107 | macros += [ 108 | ('SUPPORT_INT128', ON), 109 | ] 110 | 111 | c_libraries = [( 112 | 'fnv', { 113 | "sources": [ 114 | 'src/fnv/hash_32.c', 115 | 'src/fnv/hash_32a.c', 116 | 'src/fnv/hash_64.c', 117 | 'src/fnv/hash_64a.c' 118 | ], 119 | "macros": extra_macros, 120 | } 121 | ), ( 122 | 'smhasher', { 123 | "sources": list(filter(None, [ 124 | 'src/smhasher/MurmurHash1.cpp', 125 | 'src/smhasher/MurmurHash2.cpp', 126 | 'src/smhasher/MurmurHash3.cpp', 127 | 'src/smhasher/City.cpp', 128 | 'src/smhasher/Spooky.cpp', 129 | 'src/smhasher/SpookyV2.cpp', 130 | 'src/smhasher/metrohash/metrohash64.cpp', 131 | 'src/smhasher/metrohash/metrohash64crc.cpp' if IS_X86 or IS_ARM64 else None, 132 | 'src/smhasher/metrohash/metrohash128.cpp', 133 | 'src/smhasher/metrohash/metrohash128crc.cpp' if IS_X86 or IS_ARM64 else None, 134 | ])), 135 | "cflags": extra_compile_args + [ 136 | "-std=c++11", 137 | ], 138 | } 139 | ), ( 140 | 't1ha', { 141 | "sources": list(filter(None, [ 142 | 'src/smhasher/t1ha/t1ha0.c', 143 | 'src/smhasher/t1ha/t1ha0_ia32aes_avx.c' if IS_X86 else None, 144 | 'src/smhasher/t1ha/t1ha0_ia32aes_avx2.c' if IS_X86 else None, 145 | 'src/smhasher/t1ha/t1ha0_ia32aes_noavx.c', 146 | 'src/smhasher/t1ha/t1ha1.c', 147 | 'src/smhasher/t1ha/t1ha2.c', 148 | ])), 149 | "macros": [ 150 | ("T1HA0_AESNI_AVAILABLE", ON if cpu.aes else OFF), 151 | ("T1HA0_RUNTIME_SELECT", ON), 152 | ], 153 | "cflags": extra_compile_args, 154 | } 155 | ), ( 156 | 'farm', { 157 | "sources": ['src/smhasher/farmhash-c.c'], 158 | "macros": extra_macros, 159 | } 160 | ), ( 161 | 'lookup3', { 162 | "sources": ['src/lookup3/lookup3.c'], 163 | "macros": extra_macros, 164 | } 165 | ), ( 166 | 'SuperFastHash', { 167 | "sources": ['src/SuperFastHash/SuperFastHash.c'], 168 | "macros": extra_macros, 169 | } 170 | ), ( 171 | "xxhash", { 172 | "sources": ["src/xxHash/xxhash.c"], 173 | } 174 | )] 175 | 176 | if not IS_WINNT: 177 | srcs = [ 178 | "src/highwayhash/highwayhash/arch_specific.cc", 179 | "src/highwayhash/highwayhash/instruction_sets.cc", 180 | "src/highwayhash/highwayhash/os_specific.cc", 181 | "src/highwayhash/highwayhash/hh_portable.cc", 182 | ] 183 | cflags = extra_compile_args + [ 184 | "-Isrc/highwayhash", 185 | "-std=c++11", 186 | ] 187 | 188 | if IS_X86_64: 189 | srcs += [ 190 | "src/highwayhash/highwayhash/hh_sse41.cc", 191 | "src/highwayhash/highwayhash/hh_avx2.cc", 192 | ] 193 | cflags += ["-msse4.1", "-mavx2"] 194 | 195 | elif IS_ARM64: 196 | srcs += ["src/highwayhash/highwayhash/hh_neon.cc"] 197 | cflags += [ 198 | '-mfloat-abi=hard', 199 | '-march=armv7-a', 200 | '-mfpu=neon', 201 | ] 202 | 203 | elif IS_PPC64: 204 | srcs += ["src/highwayhash/highwayhash/hh_vsx.cc"] 205 | cflags += ['-mvsx'] 206 | 207 | c_libraries += [( 208 | "highwayhash", { 209 | "sources": srcs, 210 | "cflags": cflags, 211 | } 212 | )] 213 | 214 | libraries += [libname for (libname, _) in c_libraries] 215 | cmdclass = {} 216 | 217 | try: 218 | from pybind11.setup_helpers import Pybind11Extension as Extension, build_ext 219 | 220 | cmdclass["build_ext"] = build_ext 221 | 222 | except ImportError: 223 | from setuptools import Extension 224 | 225 | pyhash = Extension(name="_pyhash", 226 | sources=['src/Hash.cpp'], 227 | depends=glob('src/*.h'), 228 | define_macros=macros, 229 | include_dirs=include_dirs, 230 | library_dirs=library_dirs, 231 | libraries=libraries, 232 | extra_compile_args=extra_compile_args + 233 | ["-std=c++14"], 234 | extra_link_args=extra_link_args, 235 | ) 236 | 237 | setup(name='pyhash', 238 | version='0.9.4', 239 | description='Python Non-cryptographic Hash Library', 240 | long_description=open(os.path.join(here, 'README.md')).read(), 241 | long_description_content_type='text/markdown', 242 | url='https://github.com/flier/pyfasthash', 243 | download_url='https://github.com/flier/pyfasthash/releases', 244 | platforms=["x86", "x64"], 245 | author='Flier Lu', 246 | author_email='flier.lu@gmail.com', 247 | license="Apache Software License", 248 | packages=['pyhash'], 249 | libraries=c_libraries, 250 | cmdclass=cmdclass, 251 | ext_modules=[pyhash], 252 | classifiers=[ 253 | 'Development Status :: 5 - Production/Stable', 254 | 'Intended Audience :: Developers', 255 | 'Intended Audience :: System Administrators', 256 | 'License :: OSI Approved :: Apache Software License', 257 | 'Natural Language :: English', 258 | 'Operating System :: Microsoft :: Windows', 259 | 'Operating System :: POSIX', 260 | 'Programming Language :: C++', 261 | 'Programming Language :: Python', 262 | 'Programming Language :: Python :: 2', 263 | 'Programming Language :: Python :: 3', 264 | 'Programming Language :: Python :: Implementation :: PyPy', 265 | 'Topic :: Internet', 266 | 'Topic :: Software Development', 267 | 'Topic :: Software Development :: Libraries :: Python Modules', 268 | 'Topic :: Utilities' 269 | ], 270 | keywords='hash hashing fasthash', 271 | setup_requires=list(filter(None, [ 272 | 'cpuid' if IS_X86 else None, 273 | 'pybind11', 274 | ])), 275 | tests_require=['pytest', 'pytest-runner', 'pytest-benchmark'], 276 | ) 277 | -------------------------------------------------------------------------------- /src/fnv/Makefile: -------------------------------------------------------------------------------- 1 | #!/bin/make 2 | # 3 | # hash - makefile for FNV hash tools 4 | # 5 | # @(#) $Revision: 5.1 $ 6 | # @(#) $Id: Makefile,v 5.1 2009/06/30 09:01:38 chongo Exp $ 7 | # @(#) $Source: /usr/local/src/cmd/fnv/RCS/Makefile,v $ 8 | # 9 | # See: 10 | # http://www.isthe.com/chongo/tech/comp/fnv/index.html 11 | # 12 | # for the most up to date copy of this code and the FNV hash home page. 13 | # 14 | # Please do not copyright this code. This code is in the public domain. 15 | # 16 | # LANDON CURT NOLL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 17 | # INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO 18 | # EVENT SHALL LANDON CURT NOLL BE LIABLE FOR ANY SPECIAL, INDIRECT OR 19 | # CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF 20 | # USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR 21 | # OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 22 | # PERFORMANCE OF THIS SOFTWARE. 23 | # 24 | # By: 25 | # chongo /\oo/\ 26 | # http://www.isthe.com/chongo/ 27 | # 28 | # Share and Enjoy! :-) 29 | 30 | # make tools 31 | # 32 | SHELL= /bin/sh 33 | CFLAGS= -O3 -g3 34 | #CFLAGS= -O2 -g3 35 | #CC= cc 36 | AR= ar 37 | TAR= tar 38 | EGREP= egrep 39 | GZIP_BIN= gzip 40 | INSTALL= install 41 | 42 | # If your system needs ranlib use: 43 | # RANLIB= ranlib 44 | # otherwise use: 45 | # RANLIB= : 46 | # 47 | #RANLIB= ranlib 48 | RANLIB= : 49 | 50 | # where to install things 51 | # 52 | DESTBIN= /usr/local/bin 53 | DESTLIB= /usr/local/lib 54 | DESTINC= /usr/local/include 55 | 56 | # what to build 57 | # 58 | SRC= hash_32.c hash_32a.c hash_64.c hash_64a.c \ 59 | fnv32.c fnv64.c \ 60 | have_ulong64.c test_fnv.c 61 | NO64BIT_SRC= no64bit_fnv64.c no64bit_hash_64.c \ 62 | no64bit_hash_64a.c no64bit_test_fnv.c 63 | HSRC= fnv.h \ 64 | longlong.h 65 | ALL= ${SRC} ${HSRC} \ 66 | README Makefile 67 | PROGS= fnv032 fnv064 fnv132 fnv164 fnv1a32 fnv1a64 68 | OBSOLETE_PROGS= fnv0_32 fnv0_64 fnv1_32 fnv1_64 fnv1a_32 fnv1a_64 69 | NO64BIT_PROGS= no64bit_fnv064 no64bit_fnv164 no64bit_fnv1a64 70 | LIBS= libfnv.a 71 | LIBOBJ= hash_32.o hash_64.o hash_32a.o hash_64a.o test_fnv.o 72 | NO64BIT_OBJ= no64bit_fnv64.o no64bit_hash_64.o \ 73 | no64bit_hash_64a.o no64bit_test_fnv.o 74 | OTHEROBJ= fnv32.o fnv64.o 75 | TARGETS= ${LIBOBJ} ${LIBS} ${PROGS} 76 | 77 | # default rule 78 | # 79 | all: ${TARGETS} 80 | 81 | # things to build 82 | # 83 | hash_32.o: hash_32.c longlong.h fnv.h 84 | ${CC} ${CFLAGS} hash_32.c -c 85 | 86 | hash_64.o: hash_64.c longlong.h fnv.h 87 | ${CC} ${CFLAGS} hash_64.c -c 88 | 89 | hash_32a.o: hash_32a.c longlong.h fnv.h 90 | ${CC} ${CFLAGS} hash_32a.c -c 91 | 92 | hash_64a.o: hash_64a.c longlong.h fnv.h 93 | ${CC} ${CFLAGS} hash_64a.c -c 94 | 95 | test_fnv.o: test_fnv.c longlong.h fnv.h 96 | ${CC} ${CFLAGS} test_fnv.c -c 97 | 98 | fnv32.o: fnv32.c longlong.h fnv.h 99 | ${CC} ${CFLAGS} fnv32.c -c 100 | 101 | fnv032: fnv32.o libfnv.a 102 | ${CC} fnv32.o libfnv.a -o fnv032 103 | 104 | fnv64.o: fnv64.c longlong.h fnv.h 105 | ${CC} ${CFLAGS} fnv64.c -c 106 | 107 | fnv064: fnv64.o libfnv.a 108 | ${CC} fnv64.o libfnv.a -o fnv064 109 | 110 | libfnv.a: ${LIBOBJ} 111 | rm -f $@ 112 | ${AR} rv $@ ${LIBOBJ} 113 | ${RANLIB} $@ 114 | 115 | fnv132: fnv032 116 | -rm -f $@ 117 | -cp -f $? $@ 118 | 119 | fnv1a32: fnv032 120 | -rm -f $@ 121 | -cp -f $? $@ 122 | 123 | fnv164: fnv064 124 | -rm -f $@ 125 | -cp -f $? $@ 126 | 127 | fnv1a64: fnv064 128 | -rm -f $@ 129 | -cp -f $? $@ 130 | 131 | longlong.h: have_ulong64.c Makefile 132 | -@rm -f have_ulong64 have_ulong64.o ll_tmp longlong.h 133 | @echo 'forming longlong.h' 134 | @echo '/*' > longlong.h 135 | @echo ' * DO NOT EDIT -- generated by the Makefile' >> longlong.h 136 | @echo ' */' >> longlong.h 137 | @echo '' >> longlong.h 138 | @echo '#if !defined(__LONGLONG_H__)' >> longlong.h 139 | @echo '#define __LONGLONG_H__' >> longlong.h 140 | @echo '' >> longlong.h 141 | @echo '/* do we have/want to use a long long type? */' >> longlong.h 142 | -@rm -f have_ulong64.o have_ulong64 143 | -@${CC} ${CFLAGS} have_ulong64.c -c 2>/dev/null; true 144 | -@${CC} ${CFLAGS} have_ulong64.o -o have_ulong64 2>/dev/null; true 145 | -@${SHELL} -c "./have_ulong64 > ll_tmp 2>/dev/null" \ 146 | >/dev/null 2>&1; true 147 | -@if [ -s ll_tmp ]; then \ 148 | cat ll_tmp >> longlong.h; \ 149 | else \ 150 | echo '#undef HAVE_64BIT_LONG_LONG /* no */' >> longlong.h; \ 151 | fi 152 | @echo '' >> longlong.h 153 | @echo '/*' >> longlong.h 154 | @echo ' * NO64BIT_LONG_LONG undef HAVE_64BIT_LONG_LONG' >> longlong.h 155 | @echo ' */' >> longlong.h 156 | @echo '#if defined(NO64BIT_LONG_LONG)' >> longlong.h 157 | @echo '#undef HAVE_64BIT_LONG_LONG' >> longlong.h 158 | @echo '#endif /* NO64BIT_LONG_LONG */' >> longlong.h 159 | @echo '' >> longlong.h 160 | @echo '#endif /* !__LONGLONG_H__ */' >> longlong.h 161 | -@rm -f have_ulong64 have_ulong64.o ll_tmp 162 | @echo 'longlong.h formed' 163 | 164 | # utilities 165 | # 166 | install: all 167 | -@if [ -d "${DESTBIN}" ]; then \ 168 | echo " mkdir -p ${DESTBIN}"; \ 169 | mkdir -p ${DESTBIN}; \ 170 | fi 171 | -@if [ -d "${DESTLIB}" ]; then \ 172 | echo " mkdir -p ${DESTLIB}"; \ 173 | mkdir -p ${DESTLIB}; \ 174 | fi 175 | -@if [ -d "${DESTINC}" ]; then \ 176 | echo " mkdir -p ${DESTINC}"; \ 177 | mkdir -p ${DESTINC}; \ 178 | fi 179 | ${INSTALL} -m 0755 ${PROGS} ${DESTBIN} 180 | ${INSTALL} -m 0644 ${LIBS} ${DESTLIB} 181 | ${RANLIB} ${DESTLIB}/libfnv.a 182 | ${INSTALL} -m 0644 ${HSRC} ${DESTINC} 183 | @# remove osolete programs 184 | for i in ${OBSOLETE_PROGS}; do \ 185 | if [ -f "${DESTBIN}/$$i" ]; then \ 186 | echo "rm -f ${DESTBIN}/$$i"; \ 187 | rm -f "${DESTBIN}/$$i"; \ 188 | fi; \ 189 | done 190 | 191 | clean: 192 | -rm -f have_ulong64 have_ulong64.o ll_tmp ll_tmp2 longlong.h 193 | -rm -f ${LIBOBJ} 194 | -rm -f ${OTHEROBJ} 195 | 196 | clobber: clean 197 | -rm -f ${TARGETS} 198 | -rm -f ${OBSOLETE_PROGS} lltmp lltmp2 ll_tmp 199 | -rm -f ${NO64BIT_SRC} 200 | -rm -f ${NO64BIT_OBJ} 201 | -rm -f ${NO64BIT_PROGS} 202 | -rm -f vector.c 203 | 204 | check: ${PROGS} 205 | @echo -n "FNV-0 32 bit tests: " 206 | @./fnv032 -t 1 -v 207 | @echo -n "FNV-1 32 bit tests: " 208 | @./fnv132 -t 1 -v 209 | @echo -n "FNV-1a 32 bit tests: " 210 | @./fnv1a32 -t 1 -v 211 | @echo -n "FNV-0 64 bit tests: " 212 | @./fnv064 -t 1 -v 213 | @echo -n "FNV-1 64 bit tests: " 214 | @./fnv164 -t 1 -v 215 | @echo -n "FNV-1a 64 bit tests: " 216 | @./fnv1a64 -t 1 -v 217 | 218 | ############################### 219 | # generate test vector source # 220 | ############################### 221 | 222 | no64bit_fnv64.c: fnv64.c 223 | -rm -f $@ 224 | -cp -f $? $@ 225 | 226 | no64bit_hash_64.c: hash_64.c 227 | -rm -f $@ 228 | -cp -f $? $@ 229 | 230 | no64bit_hash_64a.c: hash_64a.c 231 | -rm -f $@ 232 | -cp -f $? $@ 233 | 234 | no64bit_test_fnv.c: test_fnv.c 235 | -rm -f $@ 236 | -cp -f $? $@ 237 | 238 | no64bit_fnv64.o: no64bit_fnv64.c longlong.h fnv.h 239 | ${CC} ${CFLAGS} -DNO64BIT_LONG_LONG no64bit_fnv64.c -c 240 | 241 | no64bit_hash_64.o: no64bit_hash_64.c longlong.h fnv.h 242 | ${CC} ${CFLAGS} -DNO64BIT_LONG_LONG no64bit_hash_64.c -c 243 | 244 | no64bit_hash_64a.o: no64bit_hash_64a.c longlong.h fnv.h 245 | ${CC} ${CFLAGS} -DNO64BIT_LONG_LONG no64bit_hash_64a.c -c 246 | 247 | no64bit_test_fnv.o: no64bit_test_fnv.c longlong.h fnv.h 248 | ${CC} ${CFLAGS} -DNO64BIT_LONG_LONG no64bit_test_fnv.c -c 249 | 250 | no64bit_fnv064: no64bit_fnv64.o no64bit_hash_64.o \ 251 | no64bit_hash_64a.o no64bit_test_fnv.o 252 | ${CC} ${CFLAGS} no64bit_fnv64.o no64bit_hash_64.o \ 253 | no64bit_hash_64a.o no64bit_test_fnv.o -o $@ 254 | 255 | no64bit_fnv164: no64bit_fnv064 256 | -rm -f $@ 257 | -cp -f $? $@ 258 | 259 | no64bit_fnv1a64: no64bit_fnv064 260 | -rm -f $@ 261 | -cp -f $? $@ 262 | 263 | vector.c: ${PROGS} ${NO64BIT_PROGS} 264 | -rm -f $@ 265 | echo '/* start of output generated by make $@ */' >> $@ 266 | echo '' >> $@ 267 | #@ 268 | echo '/* FNV-0 32 bit test vectors */' >> $@ 269 | ./fnv032 -t 0 >> $@ 270 | echo '' >> $@ 271 | #@ 272 | echo '/* FNV-1 32 bit test vectors */' >> $@ 273 | ./fnv132 -t 0 >> $@ 274 | echo '' >> $@ 275 | #@ 276 | echo '/* FNV-1a 32 bit test vectors */' >> $@ 277 | ./fnv1a32 -t 0 >> $@ 278 | echo '' >> $@ 279 | #@ 280 | echo '/* FNV-0 64 bit test vectors */' >> $@ 281 | echo '#if defined(HAVE_64BIT_LONG_LONG)' >> $@ 282 | ./fnv064 -t 0 >> $@ 283 | echo '#else /* HAVE_64BIT_LONG_LONG */' >> $@ 284 | ./no64bit_fnv064 -t 0 >> $@ 285 | echo '#endif /* HAVE_64BIT_LONG_LONG */' >> $@ 286 | echo '' >> $@ 287 | #@ 288 | echo '/* FNV-1 64 bit test vectors */' >> $@ 289 | echo '#if defined(HAVE_64BIT_LONG_LONG)' >> $@ 290 | ./fnv164 -t 0 >> $@ 291 | echo '#else /* HAVE_64BIT_LONG_LONG */' >> $@ 292 | ./no64bit_fnv164 -t 0 >> $@ 293 | echo '#endif /* HAVE_64BIT_LONG_LONG */' >> $@ 294 | echo '' >> $@ 295 | #@ 296 | echo '/* FNV-1a 64 bit test vectors */' >> $@ 297 | echo '#if defined(HAVE_64BIT_LONG_LONG)' >> $@ 298 | ./fnv1a64 -t 0 >> $@ 299 | echo '#else /* HAVE_64BIT_LONG_LONG */' >> $@ 300 | ./no64bit_fnv1a64 -t 0 >> $@ 301 | echo '#endif /* HAVE_64BIT_LONG_LONG */' >> $@ 302 | echo '' >> $@ 303 | #@ 304 | echo '/* end of output generated by make $@ */' >> $@ 305 | -------------------------------------------------------------------------------- /src/fnv/hash_64a.c: -------------------------------------------------------------------------------- 1 | /* 2 | * hash_64 - 64 bit Fowler/Noll/Vo-0 FNV-1a hash code 3 | * 4 | * @(#) $Revision: 5.1 $ 5 | * @(#) $Id: hash_64a.c,v 5.1 2009/06/30 09:01:38 chongo Exp $ 6 | * @(#) $Source: /usr/local/src/cmd/fnv/RCS/hash_64a.c,v $ 7 | * 8 | *** 9 | * 10 | * Fowler/Noll/Vo hash 11 | * 12 | * The basis of this hash algorithm was taken from an idea sent 13 | * as reviewer comments to the IEEE POSIX P1003.2 committee by: 14 | * 15 | * Phong Vo (http://www.research.att.com/info/kpv/) 16 | * Glenn Fowler (http://www.research.att.com/~gsf/) 17 | * 18 | * In a subsequent ballot round: 19 | * 20 | * Landon Curt Noll (http://www.isthe.com/chongo/) 21 | * 22 | * improved on their algorithm. Some people tried this hash 23 | * and found that it worked rather well. In an EMail message 24 | * to Landon, they named it the ``Fowler/Noll/Vo'' or FNV hash. 25 | * 26 | * FNV hashes are designed to be fast while maintaining a low 27 | * collision rate. The FNV speed allows one to quickly hash lots 28 | * of data while maintaining a reasonable collision rate. See: 29 | * 30 | * http://www.isthe.com/chongo/tech/comp/fnv/index.html 31 | * 32 | * for more details as well as other forms of the FNV hash. 33 | * 34 | *** 35 | * 36 | * To use the recommended 64 bit FNV-1a hash, pass FNV1A_64_INIT as the 37 | * Fnv64_t hashval argument to fnv_64a_buf() or fnv_64a_str(). 38 | * 39 | *** 40 | * 41 | * Please do not copyright this code. This code is in the public domain. 42 | * 43 | * LANDON CURT NOLL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 44 | * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO 45 | * EVENT SHALL LANDON CURT NOLL BE LIABLE FOR ANY SPECIAL, INDIRECT OR 46 | * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF 47 | * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR 48 | * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 49 | * PERFORMANCE OF THIS SOFTWARE. 50 | * 51 | * By: 52 | * chongo /\oo/\ 53 | * http://www.isthe.com/chongo/ 54 | * 55 | * Share and Enjoy! :-) 56 | */ 57 | 58 | #include 59 | #include "fnv.h" 60 | 61 | 62 | /* 63 | * FNV-1a defines the initial basis to be non-zero 64 | */ 65 | #if !defined(HAVE_64BIT_LONG_LONG) 66 | const Fnv64_t fnv1a_64_init = { 0x84222325, 0xcbf29ce4 }; 67 | #endif /* ! HAVE_64BIT_LONG_LONG */ 68 | 69 | 70 | /* 71 | * 64 bit magic FNV-1a prime 72 | */ 73 | #if defined(HAVE_64BIT_LONG_LONG) 74 | #define FNV_64_PRIME ((Fnv64_t)0x100000001b3ULL) 75 | #else /* HAVE_64BIT_LONG_LONG */ 76 | #define FNV_64_PRIME_LOW ((unsigned long)0x1b3) /* lower bits of FNV prime */ 77 | #define FNV_64_PRIME_SHIFT (8) /* top FNV prime shift above 2^32 */ 78 | #endif /* HAVE_64BIT_LONG_LONG */ 79 | 80 | 81 | /* 82 | * fnv_64a_buf - perform a 64 bit Fowler/Noll/Vo FNV-1a hash on a buffer 83 | * 84 | * input: 85 | * buf - start of buffer to hash 86 | * len - length of buffer in octets 87 | * hval - previous hash value or 0 if first call 88 | * 89 | * returns: 90 | * 64 bit hash as a static hash type 91 | * 92 | * NOTE: To use the recommended 64 bit FNV-1a hash, use FNV1A_64_INIT as the 93 | * hval arg on the first call to either fnv_64a_buf() or fnv_64a_str(). 94 | */ 95 | Fnv64_t 96 | fnv_64a_buf(void *buf, size_t len, Fnv64_t hval) 97 | { 98 | unsigned char *bp = (unsigned char *)buf; /* start of buffer */ 99 | unsigned char *be = bp + len; /* beyond end of buffer */ 100 | 101 | #if defined(HAVE_64BIT_LONG_LONG) 102 | /* 103 | * FNV-1a hash each octet of the buffer 104 | */ 105 | while (bp < be) { 106 | 107 | /* xor the bottom with the current octet */ 108 | hval ^= (Fnv64_t)*bp++; 109 | 110 | /* multiply by the 64 bit FNV magic prime mod 2^64 */ 111 | #if defined(NO_FNV_GCC_OPTIMIZATION) 112 | hval *= FNV_64_PRIME; 113 | #else /* NO_FNV_GCC_OPTIMIZATION */ 114 | hval += (hval << 1) + (hval << 4) + (hval << 5) + 115 | (hval << 7) + (hval << 8) + (hval << 40); 116 | #endif /* NO_FNV_GCC_OPTIMIZATION */ 117 | } 118 | 119 | #else /* HAVE_64BIT_LONG_LONG */ 120 | 121 | unsigned long val[4]; /* hash value in base 2^16 */ 122 | unsigned long tmp[4]; /* tmp 64 bit value */ 123 | 124 | /* 125 | * Convert Fnv64_t hval into a base 2^16 array 126 | */ 127 | val[0] = hval.w32[0]; 128 | val[1] = (val[0] >> 16); 129 | val[0] &= 0xffff; 130 | val[2] = hval.w32[1]; 131 | val[3] = (val[2] >> 16); 132 | val[2] &= 0xffff; 133 | 134 | /* 135 | * FNV-1a hash each octet of the buffer 136 | */ 137 | while (bp < be) { 138 | 139 | /* xor the bottom with the current octet */ 140 | val[0] ^= (unsigned long)*bp++; 141 | 142 | /* 143 | * multiply by the 64 bit FNV magic prime mod 2^64 144 | * 145 | * Using 0x100000001b3 we have the following digits base 2^16: 146 | * 147 | * 0x0 0x100 0x0 0x1b3 148 | * 149 | * which is the same as: 150 | * 151 | * 0x0 1<> 16); 163 | val[0] = tmp[0] & 0xffff; 164 | tmp[2] += (tmp[1] >> 16); 165 | val[1] = tmp[1] & 0xffff; 166 | val[3] = tmp[3] + (tmp[2] >> 16); 167 | val[2] = tmp[2] & 0xffff; 168 | /* 169 | * Doing a val[3] &= 0xffff; is not really needed since it simply 170 | * removes multiples of 2^64. We can discard these excess bits 171 | * outside of the loop when we convert to Fnv64_t. 172 | */ 173 | } 174 | 175 | /* 176 | * Convert base 2^16 array back into an Fnv64_t 177 | */ 178 | hval.w32[1] = ((val[3]<<16) | val[2]); 179 | hval.w32[0] = ((val[1]<<16) | val[0]); 180 | 181 | #endif /* HAVE_64BIT_LONG_LONG */ 182 | 183 | /* return our new hash value */ 184 | return hval; 185 | } 186 | 187 | 188 | /* 189 | * fnv_64a_str - perform a 64 bit Fowler/Noll/Vo FNV-1a hash on a buffer 190 | * 191 | * input: 192 | * buf - start of buffer to hash 193 | * hval - previous hash value or 0 if first call 194 | * 195 | * returns: 196 | * 64 bit hash as a static hash type 197 | * 198 | * NOTE: To use the recommended 64 bit FNV-1a hash, use FNV1A_64_INIT as the 199 | * hval arg on the first call to either fnv_64a_buf() or fnv_64a_str(). 200 | */ 201 | Fnv64_t 202 | fnv_64a_str(char *str, Fnv64_t hval) 203 | { 204 | unsigned char *s = (unsigned char *)str; /* unsigned string */ 205 | 206 | #if defined(HAVE_64BIT_LONG_LONG) 207 | 208 | /* 209 | * FNV-1a hash each octet of the string 210 | */ 211 | while (*s) { 212 | 213 | /* xor the bottom with the current octet */ 214 | hval ^= (Fnv64_t)*s++; 215 | 216 | /* multiply by the 64 bit FNV magic prime mod 2^64 */ 217 | #if defined(NO_FNV_GCC_OPTIMIZATION) 218 | hval *= FNV_64_PRIME; 219 | #else /* NO_FNV_GCC_OPTIMIZATION */ 220 | hval += (hval << 1) + (hval << 4) + (hval << 5) + 221 | (hval << 7) + (hval << 8) + (hval << 40); 222 | #endif /* NO_FNV_GCC_OPTIMIZATION */ 223 | } 224 | 225 | #else /* !HAVE_64BIT_LONG_LONG */ 226 | 227 | unsigned long val[4]; /* hash value in base 2^16 */ 228 | unsigned long tmp[4]; /* tmp 64 bit value */ 229 | 230 | /* 231 | * Convert Fnv64_t hval into a base 2^16 array 232 | */ 233 | val[0] = hval.w32[0]; 234 | val[1] = (val[0] >> 16); 235 | val[0] &= 0xffff; 236 | val[2] = hval.w32[1]; 237 | val[3] = (val[2] >> 16); 238 | val[2] &= 0xffff; 239 | 240 | /* 241 | * FNV-1a hash each octet of the string 242 | */ 243 | while (*s) { 244 | 245 | /* xor the bottom with the current octet */ 246 | 247 | /* 248 | * multiply by the 64 bit FNV magic prime mod 2^64 249 | * 250 | * Using 1099511628211, we have the following digits base 2^16: 251 | * 252 | * 0x0 0x100 0x0 0x1b3 253 | * 254 | * which is the same as: 255 | * 256 | * 0x0 1<> 16); 268 | val[0] = tmp[0] & 0xffff; 269 | tmp[2] += (tmp[1] >> 16); 270 | val[1] = tmp[1] & 0xffff; 271 | val[3] = tmp[3] + (tmp[2] >> 16); 272 | val[2] = tmp[2] & 0xffff; 273 | /* 274 | * Doing a val[3] &= 0xffff; is not really needed since it simply 275 | * removes multiples of 2^64. We can discard these excess bits 276 | * outside of the loop when we convert to Fnv64_t. 277 | */ 278 | val[0] ^= (unsigned long)(*s++); 279 | } 280 | 281 | /* 282 | * Convert base 2^16 array back into an Fnv64_t 283 | */ 284 | hval.w32[1] = ((val[3]<<16) | val[2]); 285 | hval.w32[0] = ((val[1]<<16) | val[0]); 286 | 287 | #endif /* !HAVE_64BIT_LONG_LONG */ 288 | 289 | /* return our new hash value */ 290 | return hval; 291 | } 292 | -------------------------------------------------------------------------------- /src/fnv/hash_64.c: -------------------------------------------------------------------------------- 1 | /* 2 | * hash_64 - 64 bit Fowler/Noll/Vo-0 hash code 3 | * 4 | * @(#) $Revision: 5.1 $ 5 | * @(#) $Id: hash_64.c,v 5.1 2009/06/30 09:01:38 chongo Exp $ 6 | * @(#) $Source: /usr/local/src/cmd/fnv/RCS/hash_64.c,v $ 7 | * 8 | *** 9 | * 10 | * Fowler/Noll/Vo hash 11 | * 12 | * The basis of this hash algorithm was taken from an idea sent 13 | * as reviewer comments to the IEEE POSIX P1003.2 committee by: 14 | * 15 | * Phong Vo (http://www.research.att.com/info/kpv/) 16 | * Glenn Fowler (http://www.research.att.com/~gsf/) 17 | * 18 | * In a subsequent ballot round: 19 | * 20 | * Landon Curt Noll (http://www.isthe.com/chongo/) 21 | * 22 | * improved on their algorithm. Some people tried this hash 23 | * and found that it worked rather well. In an EMail message 24 | * to Landon, they named it the ``Fowler/Noll/Vo'' or FNV hash. 25 | * 26 | * FNV hashes are designed to be fast while maintaining a low 27 | * collision rate. The FNV speed allows one to quickly hash lots 28 | * of data while maintaining a reasonable collision rate. See: 29 | * 30 | * http://www.isthe.com/chongo/tech/comp/fnv/index.html 31 | * 32 | * for more details as well as other forms of the FNV hash. 33 | * 34 | *** 35 | * 36 | * NOTE: The FNV-0 historic hash is not recommended. One should use 37 | * the FNV-1 hash instead. 38 | * 39 | * To use the 64 bit FNV-0 historic hash, pass FNV0_64_INIT as the 40 | * Fnv64_t hashval argument to fnv_64_buf() or fnv_64_str(). 41 | * 42 | * To use the recommended 64 bit FNV-1 hash, pass FNV1_64_INIT as the 43 | * Fnv64_t hashval argument to fnv_64_buf() or fnv_64_str(). 44 | * 45 | *** 46 | * 47 | * Please do not copyright this code. This code is in the public domain. 48 | * 49 | * LANDON CURT NOLL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 50 | * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO 51 | * EVENT SHALL LANDON CURT NOLL BE LIABLE FOR ANY SPECIAL, INDIRECT OR 52 | * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF 53 | * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR 54 | * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 55 | * PERFORMANCE OF THIS SOFTWARE. 56 | * 57 | * By: 58 | * chongo /\oo/\ 59 | * http://www.isthe.com/chongo/ 60 | * 61 | * Share and Enjoy! :-) 62 | */ 63 | 64 | #include 65 | #include "fnv.h" 66 | 67 | 68 | /* 69 | * FNV-0 defines the initial basis to be zero 70 | */ 71 | #if !defined(HAVE_64BIT_LONG_LONG) 72 | const Fnv64_t fnv0_64_init = { 0UL, 0UL }; 73 | #endif /* ! HAVE_64BIT_LONG_LONG */ 74 | 75 | 76 | /* 77 | * FNV-1 defines the initial basis to be non-zero 78 | */ 79 | #if !defined(HAVE_64BIT_LONG_LONG) 80 | const Fnv64_t fnv1_64_init = { 0x84222325UL, 0xcbf29ce4UL }; 81 | #endif /* ! HAVE_64BIT_LONG_LONG */ 82 | 83 | 84 | /* 85 | * 64 bit magic FNV-0 and FNV-1 prime 86 | */ 87 | #if defined(HAVE_64BIT_LONG_LONG) 88 | #define FNV_64_PRIME ((Fnv64_t)0x100000001b3ULL) 89 | #else /* HAVE_64BIT_LONG_LONG */ 90 | #define FNV_64_PRIME_LOW ((unsigned long)0x1b3) /* lower bits of FNV prime */ 91 | #define FNV_64_PRIME_SHIFT (8) /* top FNV prime shift above 2^32 */ 92 | #endif /* HAVE_64BIT_LONG_LONG */ 93 | 94 | 95 | /* 96 | * fnv_64_buf - perform a 64 bit Fowler/Noll/Vo hash on a buffer 97 | * 98 | * input: 99 | * buf - start of buffer to hash 100 | * len - length of buffer in octets 101 | * hval - previous hash value or 0 if first call 102 | * 103 | * returns: 104 | * 64 bit hash as a static hash type 105 | * 106 | * NOTE: To use the 64 bit FNV-0 historic hash, use FNV0_64_INIT as the hval 107 | * argument on the first call to either fnv_64_buf() or fnv_64_str(). 108 | * 109 | * NOTE: To use the recommended 64 bit FNV-1 hash, use FNV1_64_INIT as the hval 110 | * argument on the first call to either fnv_64_buf() or fnv_64_str(). 111 | */ 112 | Fnv64_t 113 | fnv_64_buf(void *buf, size_t len, Fnv64_t hval) 114 | { 115 | unsigned char *bp = (unsigned char *)buf; /* start of buffer */ 116 | unsigned char *be = bp + len; /* beyond end of buffer */ 117 | 118 | #if defined(HAVE_64BIT_LONG_LONG) 119 | 120 | /* 121 | * FNV-1 hash each octet of the buffer 122 | */ 123 | while (bp < be) { 124 | 125 | /* multiply by the 64 bit FNV magic prime mod 2^64 */ 126 | #if defined(NO_FNV_GCC_OPTIMIZATION) 127 | hval *= FNV_64_PRIME; 128 | #else /* NO_FNV_GCC_OPTIMIZATION */ 129 | hval += (hval << 1) + (hval << 4) + (hval << 5) + 130 | (hval << 7) + (hval << 8) + (hval << 40); 131 | #endif /* NO_FNV_GCC_OPTIMIZATION */ 132 | 133 | /* xor the bottom with the current octet */ 134 | hval ^= (Fnv64_t)*bp++; 135 | } 136 | 137 | #else /* HAVE_64BIT_LONG_LONG */ 138 | 139 | unsigned long val[4]; /* hash value in base 2^16 */ 140 | unsigned long tmp[4]; /* tmp 64 bit value */ 141 | 142 | /* 143 | * Convert Fnv64_t hval into a base 2^16 array 144 | */ 145 | val[0] = hval.w32[0]; 146 | val[1] = (val[0] >> 16); 147 | val[0] &= 0xffff; 148 | val[2] = hval.w32[1]; 149 | val[3] = (val[2] >> 16); 150 | val[2] &= 0xffff; 151 | 152 | /* 153 | * FNV-1 hash each octet of the buffer 154 | */ 155 | while (bp < be) { 156 | 157 | /* 158 | * multiply by the 64 bit FNV magic prime mod 2^64 159 | * 160 | * Using 0x100000001b3 we have the following digits base 2^16: 161 | * 162 | * 0x0 0x100 0x0 0x1b3 163 | * 164 | * which is the same as: 165 | * 166 | * 0x0 1<> 16); 178 | val[0] = tmp[0] & 0xffff; 179 | tmp[2] += (tmp[1] >> 16); 180 | val[1] = tmp[1] & 0xffff; 181 | val[3] = tmp[3] + (tmp[2] >> 16); 182 | val[2] = tmp[2] & 0xffff; 183 | /* 184 | * Doing a val[3] &= 0xffff; is not really needed since it simply 185 | * removes multiples of 2^64. We can discard these excess bits 186 | * outside of the loop when we convert to Fnv64_t. 187 | */ 188 | 189 | /* xor the bottom with the current octet */ 190 | val[0] ^= (unsigned long)*bp++; 191 | } 192 | 193 | /* 194 | * Convert base 2^16 array back into an Fnv64_t 195 | */ 196 | hval.w32[1] = ((val[3]<<16) | val[2]); 197 | hval.w32[0] = ((val[1]<<16) | val[0]); 198 | 199 | #endif /* HAVE_64BIT_LONG_LONG */ 200 | 201 | /* return our new hash value */ 202 | return hval; 203 | } 204 | 205 | 206 | /* 207 | * fnv_64_str - perform a 64 bit Fowler/Noll/Vo hash on a buffer 208 | * 209 | * input: 210 | * buf - start of buffer to hash 211 | * hval - previous hash value or 0 if first call 212 | * 213 | * returns: 214 | * 64 bit hash as a static hash type 215 | * 216 | * NOTE: To use the 64 bit FNV-0 historic hash, use FNV0_64_INIT as the hval 217 | * argument on the first call to either fnv_64_buf() or fnv_64_str(). 218 | * 219 | * NOTE: To use the recommended 64 bit FNV-1 hash, use FNV1_64_INIT as the hval 220 | * argument on the first call to either fnv_64_buf() or fnv_64_str(). 221 | */ 222 | Fnv64_t 223 | fnv_64_str(char *str, Fnv64_t hval) 224 | { 225 | unsigned char *s = (unsigned char *)str; /* unsigned string */ 226 | 227 | #if defined(HAVE_64BIT_LONG_LONG) 228 | 229 | /* 230 | * FNV-1 hash each octet of the string 231 | */ 232 | while (*s) { 233 | 234 | /* multiply by the 64 bit FNV magic prime mod 2^64 */ 235 | #if defined(NO_FNV_GCC_OPTIMIZATION) 236 | hval *= FNV_64_PRIME; 237 | #else /* NO_FNV_GCC_OPTIMIZATION */ 238 | hval += (hval << 1) + (hval << 4) + (hval << 5) + 239 | (hval << 7) + (hval << 8) + (hval << 40); 240 | #endif /* NO_FNV_GCC_OPTIMIZATION */ 241 | 242 | /* xor the bottom with the current octet */ 243 | hval ^= (Fnv64_t)*s++; 244 | } 245 | 246 | #else /* !HAVE_64BIT_LONG_LONG */ 247 | 248 | unsigned long val[4]; /* hash value in base 2^16 */ 249 | unsigned long tmp[4]; /* tmp 64 bit value */ 250 | 251 | /* 252 | * Convert Fnv64_t hval into a base 2^16 array 253 | */ 254 | val[0] = hval.w32[0]; 255 | val[1] = (val[0] >> 16); 256 | val[0] &= 0xffff; 257 | val[2] = hval.w32[1]; 258 | val[3] = (val[2] >> 16); 259 | val[2] &= 0xffff; 260 | 261 | /* 262 | * FNV-1 hash each octet of the string 263 | */ 264 | while (*s) { 265 | 266 | /* 267 | * multiply by the 64 bit FNV magic prime mod 2^64 268 | * 269 | * Using 1099511628211, we have the following digits base 2^16: 270 | * 271 | * 0x0 0x100 0x0 0x1b3 272 | * 273 | * which is the same as: 274 | * 275 | * 0x0 1<> 16); 287 | val[0] = tmp[0] & 0xffff; 288 | tmp[2] += (tmp[1] >> 16); 289 | val[1] = tmp[1] & 0xffff; 290 | val[3] = tmp[3] + (tmp[2] >> 16); 291 | val[2] = tmp[2] & 0xffff; 292 | /* 293 | * Doing a val[3] &= 0xffff; is not really needed since it simply 294 | * removes multiples of 2^64. We can discard these excess bits 295 | * outside of the loop when we convert to Fnv64_t. 296 | */ 297 | 298 | /* xor the bottom with the current octet */ 299 | val[0] ^= (unsigned long)(*s++); 300 | } 301 | 302 | /* 303 | * Convert base 2^16 array back into an Fnv64_t 304 | */ 305 | hval.w32[1] = ((val[3]<<16) | val[2]); 306 | hval.w32[0] = ((val[1]<<16) | val[0]); 307 | 308 | #endif /* !HAVE_64BIT_LONG_LONG */ 309 | 310 | /* return our new hash value */ 311 | return hval; 312 | } 313 | -------------------------------------------------------------------------------- /src/Hash.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | 11 | namespace py = pybind11; 12 | 13 | const size_t BOM_MARK_SIZE = 2; 14 | 15 | #ifdef _MSC_VER 16 | 17 | typedef int int32_t; 18 | typedef unsigned int uint32_t; 19 | typedef __int64 int64_t; 20 | typedef unsigned __int64 uint64_t; 21 | 22 | #else // _MSC_VER 23 | 24 | #include 25 | 26 | #ifdef SUPPORT_INT128 27 | 28 | typedef unsigned __int128 uint128_t; 29 | typedef std::array uint256_t; 30 | typedef std::array uint512_t; 31 | 32 | #define U128_LO(v) static_cast(v >> 64) 33 | #define U128_HI(v) static_cast(v) 34 | 35 | #define U128_NEW(LO, HI) ((static_cast(HI) << 64) + static_cast(LO)) 36 | 37 | #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 38 | const int IS_LITTLE_ENDIAN = 1; 39 | #else 40 | const int IS_LITTLE_ENDIAN = 0; 41 | #endif 42 | const int PyLong_Unsigned = 0; 43 | 44 | namespace pybind11 45 | { 46 | namespace detail 47 | { 48 | template <> 49 | struct type_caster 50 | { 51 | public: 52 | PYBIND11_TYPE_CASTER(uint128_t, _("uint128_t")); 53 | 54 | bool load(handle src, bool) 55 | { 56 | py::object n = py::reinterpret_steal(PyNumber_Long(src.ptr())); 57 | 58 | if (!n) 59 | { 60 | return false; 61 | } 62 | 63 | _PyLong_AsByteArray((PyLongObject *)n.ptr(), (unsigned char *)&value, sizeof(uint128_t), IS_LITTLE_ENDIAN, PyLong_Unsigned); 64 | 65 | return !PyErr_Occurred(); 66 | } 67 | 68 | static handle cast(uint128_t src, return_value_policy /* policy */, handle /* parent */) 69 | { 70 | return _PyLong_FromByteArray((const unsigned char *)&src, sizeof(uint128_t), IS_LITTLE_ENDIAN, PyLong_Unsigned); 71 | } 72 | }; 73 | 74 | template <> 75 | struct type_caster 76 | { 77 | public: 78 | PYBIND11_TYPE_CASTER(uint256_t, _("uint256_t")); 79 | 80 | bool load(handle src, bool) 81 | { 82 | py::object n = py::reinterpret_steal(PyNumber_Long(src.ptr())); 83 | 84 | if (!n) 85 | { 86 | return false; 87 | } 88 | 89 | _PyLong_AsByteArray((PyLongObject *)n.ptr(), (unsigned char *)&value, sizeof(uint256_t), IS_LITTLE_ENDIAN, PyLong_Unsigned); 90 | 91 | return !PyErr_Occurred(); 92 | } 93 | 94 | static handle cast(uint256_t src, return_value_policy /* policy */, handle /* parent */) 95 | { 96 | return _PyLong_FromByteArray((const unsigned char *)src.data(), sizeof(uint256_t), IS_LITTLE_ENDIAN, PyLong_Unsigned); 97 | } 98 | }; 99 | 100 | template <> 101 | struct type_caster 102 | { 103 | public: 104 | PYBIND11_TYPE_CASTER(uint512_t, _("uint512_t")); 105 | 106 | bool load(handle src, bool) 107 | { 108 | py::object n = py::reinterpret_steal(PyNumber_Long(src.ptr())); 109 | 110 | if (!n) 111 | { 112 | return false; 113 | } 114 | 115 | _PyLong_AsByteArray((PyLongObject *)n.ptr(), (unsigned char *)&value, sizeof(uint512_t), IS_LITTLE_ENDIAN, PyLong_Unsigned); 116 | 117 | return !PyErr_Occurred(); 118 | } 119 | 120 | static handle cast(uint512_t src, return_value_policy /* policy */, handle /* parent */) 121 | { 122 | return _PyLong_FromByteArray((const unsigned char *)src.data(), sizeof(uint512_t), IS_LITTLE_ENDIAN, PyLong_Unsigned); 123 | } 124 | }; 125 | } // namespace detail 126 | } // namespace pybind11 127 | 128 | #endif // SUPPORT_INT128 129 | 130 | #endif // _MSC_VER 131 | 132 | template 133 | class Hasher 134 | { 135 | public: 136 | typedef S seed_value_t; 137 | typedef H hash_value_t; 138 | 139 | protected: 140 | seed_value_t _seed; 141 | 142 | Hasher(seed_value_t seed = {}) : _seed(seed) {} 143 | 144 | public: 145 | virtual ~Hasher(void) {} 146 | 147 | static py::object CallWithArgs(py::args args, py::kwargs kwargs); 148 | 149 | static py::class_ Export(const py::module &m, const char *name) 150 | { 151 | return py::class_(m, name) 152 | .def(py::init(), py::arg("seed") = 0) 153 | .def_readwrite("seed", &T::_seed) 154 | .def("__call__", &T::CallWithArgs); 155 | } 156 | }; 157 | 158 | template 159 | class Fingerprinter 160 | { 161 | public: 162 | typedef H fingerprint_t; 163 | 164 | virtual ~Fingerprinter(void) {} 165 | 166 | static py::object CallWithArgs(py::args args, py::kwargs kwargs); 167 | 168 | static void Export(const py::module &m, const char *name) 169 | { 170 | py::class_(m, name) 171 | .def(py::init<>()) 172 | .def("__call__", &T::CallWithArgs); 173 | } 174 | }; 175 | 176 | void handle_data(PyObject *obj, std::function callback); 177 | 178 | template 179 | T as_hash_value(S from) 180 | { 181 | return static_cast(from); 182 | } 183 | 184 | template 185 | S as_seed_value(T hash) 186 | { 187 | return hash; 188 | } 189 | 190 | #ifdef SUPPORT_INT128 191 | 192 | template <> 193 | uint128_t as_hash_value(uint64_t seed) 194 | { 195 | return seed; 196 | } 197 | 198 | template <> 199 | uint64_t as_seed_value(uint128_t hash) 200 | { 201 | return static_cast(hash); 202 | } 203 | 204 | template <> 205 | uint256_t as_hash_value(uint64_t seed) 206 | { 207 | return {seed}; 208 | } 209 | 210 | template <> 211 | uint64_t as_seed_value(uint256_t hash) 212 | { 213 | return hash[0]; 214 | } 215 | 216 | template <> 217 | uint64_t as_hash_value(uint256_t seed) 218 | { 219 | return seed[0]; 220 | } 221 | 222 | template <> 223 | uint256_t as_seed_value(uint64_t hash) 224 | { 225 | return {hash, hash, hash, hash}; 226 | } 227 | 228 | template <> 229 | uint256_t as_hash_value(uint128_t seed) 230 | { 231 | return {U128_LO(seed), U128_HI(seed), U128_LO(seed), U128_HI(seed)}; 232 | } 233 | 234 | template <> 235 | uint128_t as_seed_value(uint256_t hash) 236 | { 237 | return U128_NEW(hash[0], hash[1]); 238 | } 239 | 240 | template <> 241 | uint128_t as_hash_value(uint256_t seed) 242 | { 243 | return U128_NEW(seed[0], seed[1]); 244 | } 245 | 246 | template <> 247 | uint256_t as_seed_value(uint128_t hash) 248 | { 249 | return {U128_LO(hash), U128_HI(hash), U128_LO(hash), U128_HI(hash)}; 250 | } 251 | 252 | #endif // SUPPORT_INT128 253 | 254 | template 255 | py::object Hasher::CallWithArgs(py::args args, py::kwargs kwargs) 256 | { 257 | if (args.size() == 0) 258 | { 259 | throw std::invalid_argument("missed self argument"); 260 | } 261 | 262 | py::object self = args[0]; 263 | 264 | if (!self) 265 | { 266 | PyErr_SetString(PyExc_TypeError, "wrong type of self argument"); 267 | 268 | throw py::error_already_set(); 269 | } 270 | 271 | const T &hasher = self.cast(); 272 | typename T::hash_value_t value = 273 | // for back compatibility, it should be: kwargs["seed"].cast() 274 | kwargs.contains("seed") ? kwargs["seed"].cast() 275 | : as_hash_value(hasher._seed); 276 | 277 | std::for_each(std::next(args.begin()), args.end(), [&](const py::handle &arg) 278 | { handle_data(arg.ptr(), [&](const char *buf, Py_ssize_t len) 279 | { value = hasher((void *)buf, len, as_seed_value(value)); }); }); 280 | return py::cast(value); 281 | } 282 | 283 | template 284 | py::object Fingerprinter::CallWithArgs(py::args args, py::kwargs kwargs) 285 | { 286 | if (args.size() == 0) 287 | { 288 | throw std::invalid_argument("missed self argument"); 289 | } 290 | 291 | py::object self = args[0]; 292 | 293 | if (!self) 294 | { 295 | PyErr_SetString(PyExc_TypeError, "wrong type of self argument"); 296 | 297 | throw py::error_already_set(); 298 | } 299 | 300 | const T &fingerprinter = self.cast(); 301 | std::vector results; 302 | 303 | std::for_each(std::next(args.begin()), args.end(), [&](const py::handle &arg) 304 | { handle_data(arg.ptr(), [&](const char *buf, Py_ssize_t len) 305 | { results.push_back(fingerprinter((void *)buf, len)); }); }); 306 | 307 | if (results.size() == 1) 308 | { 309 | return py::cast(results.front()); 310 | } 311 | 312 | py::list fingerprintes; 313 | 314 | for (auto result : results) 315 | { 316 | fingerprintes.append(py::cast(result)); 317 | } 318 | 319 | return std::move(fingerprintes); 320 | } 321 | 322 | void handle_data(PyObject *obj, std::function callback) 323 | { 324 | const char *buf = nullptr; 325 | Py_ssize_t len = 0; 326 | 327 | #if PY_MAJOR_VERSION < 3 328 | if (PyString_CheckExact(obj)) 329 | { 330 | if (-1 == PyString_AsStringAndSize(obj, (char **)&buf, &len)) 331 | { 332 | throw py::error_already_set(); 333 | } 334 | } 335 | #else 336 | if (PyBytes_CheckExact(obj)) 337 | { 338 | if (-1 == PyBytes_AsStringAndSize(obj, (char **)&buf, &len)) 339 | { 340 | throw py::error_already_set(); 341 | } 342 | } 343 | #endif 344 | else if (PyUnicode_CheckExact(obj)) 345 | { 346 | #if PY_MAJOR_VERSION > 2 347 | #ifndef Py_UNICODE_WIDE 348 | if (PyUnicode_2BYTE_KIND == PyUnicode_KIND(obj) && PyUnicode_IS_READY(obj)) 349 | { 350 | buf = reinterpret_cast(PyUnicode_2BYTE_DATA(obj)); 351 | len = PyUnicode_GET_LENGTH(obj) * Py_UNICODE_SIZE; 352 | } 353 | else 354 | { 355 | #endif 356 | py::object utf16 = py::reinterpret_steal(PyUnicode_AsUTF16String(obj)); 357 | 358 | if (!utf16) 359 | { 360 | throw py::error_already_set(); 361 | } 362 | 363 | if (-1 == PyBytes_AsStringAndSize(utf16.ptr(), (char **)&buf, &len)) 364 | { 365 | throw py::error_already_set(); 366 | } 367 | 368 | buf += BOM_MARK_SIZE; 369 | len -= BOM_MARK_SIZE; 370 | 371 | callback(buf, len); 372 | return; 373 | #ifndef Py_UNICODE_WIDE 374 | } 375 | #endif 376 | #else 377 | #ifdef Py_UNICODE_WIDE 378 | py::object utf16 = py::reinterpret_steal(PyUnicode_AsUTF16String(obj)); 379 | 380 | if (!utf16) 381 | { 382 | throw py::error_already_set(); 383 | } 384 | 385 | buf = PyString_AS_STRING(utf16.ptr()) + BOM_MARK_SIZE; 386 | len = PyString_GET_SIZE(utf16.ptr()) - BOM_MARK_SIZE; 387 | #else 388 | buf = PyUnicode_AS_DATA(obj); 389 | len = PyUnicode_GET_DATA_SIZE(obj); 390 | #endif 391 | 392 | callback(buf, len); 393 | return; 394 | #endif 395 | } 396 | #if PY_MAJOR_VERSION < 3 397 | else if (PyObject_CheckReadBuffer(obj)) 398 | { 399 | if (-1 == PyObject_AsReadBuffer(obj, (const void **)&buf, &len)) 400 | { 401 | throw py::error_already_set(); 402 | } 403 | } 404 | #endif 405 | else if (PyObject_CheckBuffer(obj) || PyMemoryView_Check(obj)) 406 | { 407 | py::buffer_info view = py::reinterpret_borrow(obj).request(false); 408 | 409 | if (!PyBuffer_IsContiguous(view.view(), 'C')) 410 | { 411 | throw std::invalid_argument("only support contiguous buffer"); 412 | } 413 | 414 | buf = (const char *)view.ptr; 415 | len = view.size; 416 | } 417 | else 418 | { 419 | PyErr_SetString(PyExc_TypeError, "unsupported argument type"); 420 | 421 | throw py::error_already_set(); 422 | } 423 | 424 | callback(buf, len); 425 | } 426 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /src/fnv/fnv32.c: -------------------------------------------------------------------------------- 1 | /* 2 | * fnv32 - 32 bit Fowler/Noll/Vo hash of a buffer or string 3 | * 4 | * @(#) $Revision: 5.4 $ 5 | * @(#) $Id: fnv32.c,v 5.4 2009/07/30 22:49:13 chongo Exp $ 6 | * @(#) $Source: /usr/local/src/cmd/fnv/RCS/fnv32.c,v $ 7 | * 8 | *** 9 | * 10 | * Fowler/Noll/Vo hash 11 | * 12 | * The basis of this hash algorithm was taken from an idea sent 13 | * as reviewer comments to the IEEE POSIX P1003.2 committee by: 14 | * 15 | * Phong Vo (http://www.research.att.com/info/kpv/) 16 | * Glenn Fowler (http://www.research.att.com/~gsf/) 17 | * 18 | * In a subsequent ballot round: 19 | * 20 | * Landon Curt Noll (http://www.isthe.com/chongo/) 21 | * 22 | * improved on their algorithm. Some people tried this hash 23 | * and found that it worked rather well. In an EMail message 24 | * to Landon, they named it the ``Fowler/Noll/Vo'' or FNV hash. 25 | * 26 | * FNV hashes are designed to be fast while maintaining a low 27 | * collision rate. The FNV speed allows one to quickly hash lots 28 | * of data while maintaining a reasonable collision rate. See: 29 | * 30 | * http://www.isthe.com/chongo/tech/comp/fnv/index.html 31 | * 32 | * for more details as well as other forms of the FNV hash. 33 | * 34 | *** 35 | * 36 | * Please do not copyright this code. This code is in the public domain. 37 | * 38 | * LANDON CURT NOLL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, 39 | * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO 40 | * EVENT SHALL LANDON CURT NOLL BE LIABLE FOR ANY SPECIAL, INDIRECT OR 41 | * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF 42 | * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR 43 | * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 44 | * PERFORMANCE OF THIS SOFTWARE. 45 | * 46 | * By: 47 | * chongo /\oo/\ 48 | * http://www.isthe.com/chongo/ 49 | * 50 | * Share and Enjoy! :-) 51 | */ 52 | 53 | #include 54 | #include 55 | #include 56 | #include 57 | #include 58 | #include 59 | #include 60 | #include "longlong.h" 61 | #include "fnv.h" 62 | 63 | #define WIDTH 32 /* bit width of hash */ 64 | 65 | #define BUF_SIZE (32*1024) /* number of bytes to hash at a time */ 66 | 67 | static char *usage = 68 | "usage: %s [-b bcnt] [-m] [-s arg] [-t code] [-v] [arg ...]\n" 69 | "\n" 70 | "\t-b bcnt\tmask off all but the lower bcnt bits (default 32)\n" 71 | "\t-m\tmultiple hashes, one per line for each arg\n" 72 | "\t-s\thash arg as a string (ignoring terminating NUL bytes)\n" 73 | "\t-t code\t test hash code: (0 ==> generate test vectors\n" 74 | "\t\t\t\t 1 ==> validate against FNV test vectors)\n" 75 | "\t-v\tverbose mode, print arg after hash (implies -m)\n" 76 | "\targ\tstring (if -s was given) or filename (default stdin)\n" 77 | "\n" 78 | "\tNOTE: Programs that begin with fnv0 implement the FNV-0 hash.\n" 79 | "\t The FNV-0 hash is historic FNV algorithm that is now deprecated.\n" 80 | "\n" 81 | "\tSee http://www.isthe.com/chongo/tech/comp/fnv/index.html for more info.\n" 82 | "\n" 83 | "\t@(#) FNV Version: %s\n"; 84 | static char *program; /* our name */ 85 | 86 | 87 | /* 88 | * test_fnv32 - test the FNV32 hash 89 | * 90 | * given: 91 | * hash_type type of FNV hash to test 92 | * init_hval initial hash value 93 | * mask lower bit mask 94 | * v_flag 1 => print test failure info on stderr 95 | * code 0 ==> generate FNV test vectors 96 | * 1 ==> validate against FNV test vectors 97 | * 98 | * returns: 0 ==> OK, else test vector failure number 99 | */ 100 | static int 101 | test_fnv32(enum fnv_type hash_type, Fnv32_t init_hval, 102 | Fnv32_t mask, int v_flag, int code) 103 | { 104 | struct test_vector *t; /* FNV test vestor */ 105 | Fnv32_t hval; /* current hash value */ 106 | int tstnum; /* test vector that failed, starting at 1 */ 107 | 108 | /* 109 | * print preamble if generating test vectors 110 | */ 111 | if (code == 0) { 112 | switch (hash_type) { 113 | case FNV0_32: 114 | printf("struct fnv0_32_test_vector fnv0_32_vector[] = {\n"); 115 | break; 116 | case FNV1_32: 117 | printf("struct fnv1_32_test_vector fnv1_32_vector[] = {\n"); 118 | break; 119 | case FNV1a_32: 120 | printf("struct fnv1a_32_test_vector fnv1a_32_vector[] = {\n"); 121 | break; 122 | default: 123 | unknown_hash_type(program, hash_type, 12); /* exit(12) */ 124 | /*NOTREACHED*/ 125 | } 126 | } 127 | 128 | /* 129 | * loop thru all test vectors 130 | */ 131 | for (t = fnv_test_str, tstnum = 1; t->buf != NULL; ++t, ++tstnum) { 132 | 133 | /* 134 | * compute the FNV hash 135 | */ 136 | hval = init_hval; 137 | switch (hash_type) { 138 | case FNV0_32: 139 | case FNV1_32: 140 | hval = fnv_32_buf(t->buf, t->len, hval); 141 | break; 142 | case FNV1a_32: 143 | hval = fnv_32a_buf(t->buf, t->len, hval); 144 | break; 145 | default: 146 | unknown_hash_type(program, hash_type, 13); /* exit(13) */ 147 | /*NOTREACHED*/ 148 | } 149 | 150 | /* 151 | * print the vector 152 | */ 153 | switch (code) { 154 | case 0: /* generate the test vector */ 155 | printf(" { &fnv_test_str[%d], (Fnv32_t) 0x%08lxUL },\n", 156 | tstnum-1, hval & mask); 157 | break; 158 | case 1: /* validate against test vector */ 159 | switch (hash_type) { 160 | case FNV0_32: 161 | if ((hval&mask) != (fnv0_32_vector[tstnum-1].fnv0_32 & mask)) { 162 | if (v_flag) { 163 | fprintf(stderr, "%s: failed fnv0_32 test # %d\n", 164 | program, tstnum); 165 | fprintf(stderr, "%s: test # 1 is 1st test\n", program); 166 | fprintf(stderr, 167 | "%s: expected 0x%08lx != generated: 0x%08lx\n", 168 | program, (hval&mask), 169 | (fnv0_32_vector[tstnum-1].fnv0_32 & mask)); 170 | } 171 | return tstnum; 172 | } 173 | break; 174 | case FNV1_32: 175 | if ((hval&mask) != (fnv1_32_vector[tstnum-1].fnv1_32 & mask)) { 176 | if (v_flag) { 177 | fprintf(stderr, "%s: failed fnv1_32 test # %d\n", 178 | program, tstnum); 179 | fprintf(stderr, "%s: test # 1 is 1st test\n", program); 180 | fprintf(stderr, 181 | "%s: expected 0x%08lx != generated: 0x%08lx\n", 182 | program, (hval&mask), 183 | (fnv1_32_vector[tstnum-1].fnv1_32 & mask)); 184 | } 185 | return tstnum; 186 | } 187 | break; 188 | case FNV1a_32: 189 | if ((hval&mask) != (fnv1a_32_vector[tstnum-1].fnv1a_32 &mask)) { 190 | if (v_flag) { 191 | fprintf(stderr, "%s: failed fnv1a_32 test # %d\n", 192 | program, tstnum); 193 | fprintf(stderr, "%s: test # 1 is 1st test\n", program); 194 | fprintf(stderr, 195 | "%s: expected 0x%08lx != generated: 0x%08lx\n", 196 | program, (hval&mask), 197 | (fnv1a_32_vector[tstnum-1].fnv1a_32 & mask)); 198 | } 199 | return tstnum; 200 | } 201 | break; 202 | } 203 | break; 204 | default: 205 | fprintf(stderr, "%s: -m %d not implemented yet\n", program, code); 206 | exit(14); 207 | } 208 | } 209 | 210 | /* 211 | * print completion if generating test vectors 212 | */ 213 | if (code == 0) { 214 | printf(" { NULL, 0 }\n"); 215 | printf("};\n"); 216 | } 217 | 218 | /* 219 | * no failures, return code 0 ==> all OK 220 | */ 221 | return 0; 222 | } 223 | 224 | 225 | /* 226 | * main - the main function 227 | * 228 | * See the above usage for details. 229 | */ 230 | int 231 | main(int argc, char *argv[]) 232 | { 233 | char buf[BUF_SIZE+1]; /* read buffer */ 234 | int readcnt; /* number of characters written */ 235 | Fnv32_t hval; /* current hash value */ 236 | int s_flag = 0; /* 1 => -s was given, hash args as strings */ 237 | int m_flag = 0; /* 1 => print multiple hashes, one per arg */ 238 | int v_flag = 0; /* 1 => verbose hash print */ 239 | int b_flag = WIDTH; /* -b flag value */ 240 | int t_flag = -1; /* FNV test vector code (0=>print, 1=>test) */ 241 | enum fnv_type hash_type = FNV_NONE; /* type of FNV hash to perform */ 242 | Fnv32_t bmask; /* mask to apply to output */ 243 | extern char *optarg; /* option argument */ 244 | extern int optind; /* argv index of the next arg */ 245 | int fd; /* open file to process */ 246 | char *p; 247 | int i; 248 | 249 | /* 250 | * parse args 251 | */ 252 | program = argv[0]; 253 | while ((i = getopt(argc, argv, "b:mst:v")) != -1) { 254 | switch (i) { 255 | case 'b': /* bcnt bit mask count */ 256 | b_flag = atoi(optarg); 257 | break; 258 | case 'm': /* print multiple hashes, one per arg */ 259 | m_flag = 1; 260 | break; 261 | case 's': /* hash args as strings */ 262 | s_flag = 1; 263 | break; 264 | case 't': /* FNV test vector code */ 265 | t_flag = atoi(optarg); 266 | if (t_flag < 0 || t_flag > 1) { 267 | fprintf(stderr, "%s: -t code must be 0 or 1\n", program); 268 | fprintf(stderr, usage, program, FNV_VERSION); 269 | exit(1); 270 | } 271 | m_flag = 1; 272 | break; 273 | case 'v': /* verbose hash print */ 274 | m_flag = 1; 275 | v_flag = 1; 276 | break; 277 | default: 278 | fprintf(stderr, usage, program, FNV_VERSION); 279 | exit(1); 280 | } 281 | } 282 | /* -t code incompatible with -b, -m and args */ 283 | if (t_flag >= 0) { 284 | if (b_flag != WIDTH) { 285 | fprintf(stderr, "%s: -t code incompatible with -b\n", program); 286 | exit(2); 287 | } 288 | if (s_flag != 0) { 289 | fprintf(stderr, "%s: -t code incompatible with -s\n", program); 290 | exit(3); 291 | } 292 | if (optind < argc) { 293 | fprintf(stderr, "%s: -t code incompatible args\n", program); 294 | exit(4); 295 | } 296 | } 297 | /* -s requires at least 1 arg */ 298 | if (s_flag && optind >= argc) { 299 | fprintf(stderr, usage, program, FNV_VERSION); 300 | exit(5); 301 | } 302 | /* limit -b values */ 303 | if (b_flag < 0 || b_flag > WIDTH) { 304 | fprintf(stderr, "%s: -b bcnt: %d must be >= 0 and < %d\n", 305 | program, b_flag, WIDTH); 306 | exit(6); 307 | } 308 | if (b_flag == WIDTH) { 309 | bmask = (Fnv32_t)0xffffffff; 310 | } else { 311 | bmask = (Fnv32_t)((1 << b_flag) - 1); 312 | } 313 | 314 | /* 315 | * start with the initial basis depending on the hash type 316 | */ 317 | p = strrchr(program, '/'); 318 | if (p == NULL) { 319 | p = program; 320 | } else { 321 | ++p; 322 | } 323 | if (strcmp(p, "fnv032") == 0) { 324 | /* using non-recommended FNV-0 and zero initial basis */ 325 | hval = FNV0_32_INIT; 326 | hash_type = FNV0_32; 327 | } else if (strcmp(p, "fnv132") == 0) { 328 | /* using FNV-1 and non-zero initial basis */ 329 | hval = FNV1_32_INIT; 330 | hash_type = FNV1_32; 331 | } else if (strcmp(p, "fnv1a32") == 0) { 332 | /* start with the FNV-1a initial basis */ 333 | hval = FNV1_32A_INIT; 334 | hash_type = FNV1a_32; 335 | } else { 336 | fprintf(stderr, "%s: unknown program name, unknown hash type\n", 337 | program); 338 | exit(7); 339 | } 340 | 341 | /* 342 | * FNV test vector processing, if needed 343 | */ 344 | if (t_flag >= 0) { 345 | int code; /* test vector that failed, starting at 1 */ 346 | 347 | /* 348 | * perform all tests 349 | */ 350 | code = test_fnv32(hash_type, hval, bmask, v_flag, t_flag); 351 | 352 | /* 353 | * evaluate the tests 354 | */ 355 | if (code == 0) { 356 | if (v_flag) { 357 | printf("passed\n"); 358 | } 359 | exit(0); 360 | } else { 361 | printf("failed vector (1 is 1st test): %d\n", code); 362 | exit(8); 363 | } 364 | } 365 | 366 | /* 367 | * string hashing 368 | */ 369 | if (s_flag) { 370 | 371 | /* hash any other strings */ 372 | for (i=optind; i < argc; ++i) { 373 | switch (hash_type) { 374 | case FNV0_32: 375 | case FNV1_32: 376 | hval = fnv_32_str(argv[i], hval); 377 | break; 378 | case FNV1a_32: 379 | hval = fnv_32a_str(argv[i], hval); 380 | break; 381 | default: 382 | unknown_hash_type(program, hash_type, 9); /* exit(9) */ 383 | /*NOTREACHED*/ 384 | } 385 | if (m_flag) { 386 | print_fnv32(hval, bmask, v_flag, argv[i]); 387 | } 388 | } 389 | 390 | 391 | /* 392 | * file hashing 393 | */ 394 | } else { 395 | 396 | /* 397 | * case: process only stdin 398 | */ 399 | if (optind >= argc) { 400 | 401 | /* case: process only stdin */ 402 | while ((readcnt = read(0, buf, BUF_SIZE)) > 0) { 403 | switch (hash_type) { 404 | case FNV0_32: 405 | case FNV1_32: 406 | hval = fnv_32_buf(buf, readcnt, hval); 407 | break; 408 | case FNV1a_32: 409 | hval = fnv_32a_buf(buf, readcnt, hval); 410 | default: 411 | unknown_hash_type(program, hash_type, 10); /* exit(10) */ 412 | /*NOTREACHED*/ 413 | } 414 | } 415 | if (m_flag) { 416 | print_fnv32(hval, bmask, v_flag, "(stdin)"); 417 | } 418 | 419 | } else { 420 | 421 | /* 422 | * process any other files 423 | */ 424 | for (i=optind; i < argc; ++i) { 425 | 426 | /* open the file */ 427 | fd = open(argv[i], O_RDONLY); 428 | if (fd < 0) { 429 | fprintf(stderr, "%s: unable to open file: %s\n", 430 | program, argv[i]); 431 | exit(4); 432 | } 433 | 434 | /* hash the file */ 435 | while ((readcnt = read(fd, buf, BUF_SIZE)) > 0) { 436 | switch (hash_type) { 437 | case FNV0_32: 438 | case FNV1_32: 439 | hval = fnv_32_buf(buf, readcnt, hval); 440 | break; 441 | case FNV1a_32: 442 | hval = fnv_32a_buf(buf, readcnt, hval); 443 | default: 444 | unknown_hash_type(program, hash_type, 11);/* exit(11) */ 445 | /*NOTREACHED*/ 446 | } 447 | } 448 | 449 | /* finish processing the file */ 450 | if (m_flag) { 451 | print_fnv32(hval, bmask, v_flag, argv[i]); 452 | } 453 | close(fd); 454 | } 455 | } 456 | } 457 | 458 | /* 459 | * report hash and exit 460 | */ 461 | if (!m_flag) { 462 | print_fnv32(hval, bmask, v_flag, ""); 463 | } 464 | return 0; /* exit(0); */ 465 | } 466 | --------------------------------------------------------------------------------