├── MANIFEST.in ├── .gitignore ├── LICENSE ├── setup.py ├── README.md └── src ├── python-farmhash.h ├── python-farmhash.cc └── farmhash.h /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include src/python-farmhash.h 3 | include src/farmhash.h 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | bin/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # Installer logs 26 | pip-log.txt 27 | pip-delete-this-directory.txt 28 | 29 | # Unit test / coverage reports 30 | htmlcov/ 31 | .tox/ 32 | .coverage 33 | .cache 34 | nosetests.xml 35 | coverage.xml 36 | 37 | # Translations 38 | *.mo 39 | 40 | # Mr Developer 41 | .mr.developer.cfg 42 | .project 43 | .pydevproject 44 | 45 | # Rope 46 | .ropeproject 47 | 48 | # Django stuff: 49 | *.log 50 | *.pot 51 | 52 | # Sphinx documentation 53 | docs/_build/ 54 | 55 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 veelion 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | 4 | from setuptools import setup, find_packages, Extension 5 | 6 | VERSION = (0, 4, 0) 7 | 8 | setup( 9 | name='pyfarmhash', 10 | version=".".join([str(x) for x in VERSION]), 11 | keywords=('farmhash', 'google'), 12 | description="Google FarmHash Bindings for Python", 13 | author='Veelion Chong', 14 | author_email='veelion@gmail.com', 15 | url='https://github.com/veelion/python-farmhash', 16 | packages=find_packages('src'), 17 | package_dir={'': 'src'}, 18 | ext_modules=[ 19 | Extension('farmhash', [ 20 | 'src/farmhash.cc', 21 | 'src/python-farmhash.cc' 22 | ], extra_compile_args=["-O4"]) 23 | ], 24 | classifiers=[ 25 | 'Development Status :: 5 - Production/Stable', 26 | 'License :: OSI Approved :: MIT License', 27 | 'Intended Audience :: Developers', 28 | 'Programming Language :: C', 29 | 'Programming Language :: Python', 30 | 'Programming Language :: Python :: 2.6', 31 | 'Programming Language :: Python :: 2.7', 32 | 'Programming Language :: Python :: 3.7', 33 | 'Programming Language :: Python :: 3.8', 34 | 'Programming Language :: Python :: 3.9', 35 | 'Programming Language :: Python :: 3.10', 36 | 'Programming Language :: Python :: 3.11', 37 | ], 38 | ) 39 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | python-farmhash 2 | ========== 3 | 4 | 5 | Overview 6 | ======== 7 | This package provides bindings for the [Google's FarmHash](http://code.google.com/p/farmhash/). 8 | 9 | Code specific to this project is covered by [The MIT License](http://opensource.org/licenses/MIT) 10 | 11 | Forked on 2015.04.06 to add bindings for fingerprint functions. Pull request submitted. 12 | 13 | Install 14 | ======= 15 | *Update*: Windows binary wheel uploaded to pypi 16 | 17 | Currently, clone the repo and: 18 | 19 | pip(Linux & Windows): 20 | > $ sudo pip install pyfarmhash 21 | 22 | From Source: 23 | > $ cd python-farmhash 24 | > $ sudo python setup.py Install 25 | 26 | You need `g++` installed. 27 | 28 | Windows: 29 | (Sheer hackery. I'm sure there's a way to do this with the new VS but this is working and 30 | I don't have hours to work on it. Feel free to submit a pull request) 31 | 32 | > Install Microsoft Visual C++ Compiler for Python 2.7 33 | http://www.microsoft.com/en-us/download/details.aspx?id=44266 34 | > Install msinttypes https://code.google.com/p/msinttypes/ 35 | > Copy stdint.h to the src directory, and modify the farmhash.cc line 47 reference to use double quotes instead of angle brackets 36 | > Add "if defined(_MSC_VER) ||" to line 76 of farmhash.cc 37 | > $ cd python-farmhash 38 | > $ sudo python setup.py install 39 | 40 | Usage 41 | ===== 42 | The library is pretty simple to use: 43 | 44 | > import farmhash 45 | > print farmhash.hash64('abc') 46 | > 2640714258260161385 47 | 48 | For more details, use ipython: 49 | > In [1]: import farmhash 50 | > 51 | > In [2]: farmhash.hash64withseed? 52 | > Type: builtin_function_or_method 53 | > String Form: 54 | > Docstring: 55 | > Hash function for a bytes-like object. For convenience, a 64-bit seed is also hashed into the result. 56 | > example: print farmhash.hash64withseed('abc', 12345) 57 | > 13914286602242141520L 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /src/python-farmhash.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, Veelion Chong 3 | 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | #define PY_SSIZE_T_CLEAN 24 | #include "Python.h" 25 | 26 | static PyObject *py_farmhash_Hash32(PyObject *self, PyObject *args); 27 | static PyObject *py_farmhash_Hash32WithSeed(PyObject *self, PyObject *args); 28 | static PyObject *py_farmhash_Hash64(PyObject *self, PyObject *args); 29 | static PyObject *py_farmhash_Hash64WithSeed(PyObject *self, PyObject *args); 30 | static PyObject *py_farmhash_Hash128(PyObject *self, PyObject *args); 31 | static PyObject *py_farmhash_Hash128WithSeed(PyObject *self, PyObject *args); 32 | static PyObject *py_farmhash_Fingerprint32(PyObject *self, PyObject *args); 33 | static PyObject *py_farmhash_Fingerprint64(PyObject *self, PyObject *args); 34 | static PyObject *py_farmhash_Fingerprint128(PyObject *self, PyObject *args); 35 | 36 | PyMODINIT_FUNC initfarmhash(void); 37 | 38 | #define HASH32_DOCSTRING "Hash function for a bytes-like object. Most useful in 32-bit binaries. \nexample: print farmhash.hash32('abc')\n2521517342" 39 | #define HASH32WITHSEED_DOCSTRING "Hash function for a bytes-like object. For convenience, a 32-bit seed is also hashed into the result. \nexample: print farmhash.hash32withseed('abc', 1234)\n2521517342" 40 | #define HASH64_DOCSTRING "Hash function for a bytes-like object. Returns an unsigned 64-bit integer. \nexample: print farmhash.hash64('abc')\n2640714258260161385L" 41 | #define HASH64WITHSEED_DOCSTRING "Hash function for a bytes-like object. For convenience, a 64-bit seed is also hashed into the result. \nexample: print farmhash.hash64withseed('abc', 12345)\n13914286602242141520L" 42 | #define HASH128_DOCSTRING "Hash function for a bytes-like object. Returns a tuple of two unsigned 64-bit integers: (low64, high64). \nexample: print farmhash.hash128('abc')\n(4143508125394299908L, 11566915719555882565L)" 43 | #define HASH128WITHSEED_DOCSTRING "Hash function for a bytes-like object. For convenience, two 32-bit seeds are also hashed into the result.\nexample: print farmhash.hash128withseed('abc', 1234, 0)\n(13364216625615136468L, 11320522948082609695L)" 44 | #define FINGERPRINT32_DOCSTRING "Fingerprint (i.e., good, portable, forever-fixed hash) function for a bytes-like object. Most useful in 32-bit binaries. \nexample: print farmhash.fingerprint32('abc')\n2521517342" 45 | #define FINGERPRINT64_DOCSTRING "Fingerprint (i.e., good, portable, forever-fixed hash) function for a bytes-like object. Returns an unsigned 64-bit integer. \nexample: print farmhash.fingerprint64('abc')\n2640714258260161385L" 46 | #define FINGERPRINT128_DOCSTRING "Fingerprint (i.e., good, portable, forever-fixed hash) function for a bytes-like object. Returns a tuple of two unsigned 64-bit integers: (low64, high64). \nexample: print farmhash.fingerprint128('abc')\n(13364216625615136468L, 11320522948082609695L)" 47 | 48 | #if defined(__SUNPRO_C) || defined(__hpux) || defined(_AIX) 49 | #define inline 50 | #endif 51 | 52 | #ifdef __linux 53 | #define inline __inline 54 | #endif 55 | -------------------------------------------------------------------------------- /src/python-farmhash.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2014, Veelion Chong 3 | 4 | * Permission is hereby granted, free of charge, to any person obtaining a copy 5 | * of this software and associated documentation files (the "Software"), to deal 6 | * in the Software without restriction, including without limitation the rights 7 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 8 | * copies of the Software, and to permit persons to whom the Software is 9 | * furnished to do so, subject to the following conditions: 10 | * 11 | * The above copyright notice and this permission notice shall be included in 12 | * all copies or substantial portions of the Software. 13 | * 14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | * THE SOFTWARE. 21 | */ 22 | 23 | // PY_SSIZE_T_CLEAN macro must be defined for '#' formats. 24 | #define PY_SSIZE_T_CLEAN 25 | 26 | #include 27 | #include 28 | #include "farmhash.h" 29 | #include "python-farmhash.h" 30 | 31 | using namespace NAMESPACE_FOR_HASH_FUNCTIONS; 32 | 33 | static PyObject * 34 | py_farmhash_Hash32(PyObject *self, PyObject *args) 35 | { 36 | PyObject *result; 37 | const char *s; 38 | Py_ssize_t len; 39 | 40 | if (!PyArg_ParseTuple(args, "s#", &s, &len)) 41 | return NULL; 42 | 43 | uint32_t h = Hash32(s, len); 44 | result = Py_BuildValue("I", h); 45 | 46 | return result; 47 | } 48 | 49 | static PyObject * 50 | py_farmhash_Hash32WithSeed(PyObject *self, PyObject *args) 51 | { 52 | PyObject *result; 53 | const char *s; 54 | Py_ssize_t len; 55 | uint32_t seed; 56 | 57 | if (!PyArg_ParseTuple(args, "s#I", &s, &len, &seed)) 58 | return NULL; 59 | 60 | uint32_t h = Hash32WithSeed(s, len, seed); 61 | result = Py_BuildValue("I", h); 62 | 63 | return result; 64 | } 65 | 66 | static PyObject * 67 | py_farmhash_Hash64(PyObject *self, PyObject *args) 68 | { 69 | PyObject *result; 70 | const char *s; 71 | Py_ssize_t len; 72 | 73 | if (!PyArg_ParseTuple(args, "s#", &s, &len)) 74 | return NULL; 75 | 76 | uint64_t h = Hash64(s, len); 77 | # if __WORDSIZE == 64 78 | const char* int_param = "k"; 79 | # else 80 | const char* int_param = "K"; 81 | #endif 82 | result = Py_BuildValue(int_param, h); 83 | 84 | return result; 85 | } 86 | 87 | static PyObject * 88 | py_farmhash_Hash64WithSeed(PyObject *self, PyObject *args) 89 | { 90 | PyObject *result; 91 | const char *s; 92 | Py_ssize_t len; 93 | uint64_t seed; 94 | 95 | if (!PyArg_ParseTuple(args, "s#K", &s, &len, &seed)) 96 | return NULL; 97 | 98 | uint64_t h = Hash64WithSeed(s, len, seed); 99 | # if __WORDSIZE == 64 100 | const char* int_param = "k"; 101 | # else 102 | const char* int_param = "K"; 103 | #endif 104 | result = Py_BuildValue(int_param, h); 105 | 106 | return result; 107 | } 108 | 109 | static PyObject * 110 | py_farmhash_Hash128(PyObject *self, PyObject *args) 111 | { 112 | PyObject *result; 113 | const char *s; 114 | Py_ssize_t len; 115 | 116 | if (!PyArg_ParseTuple(args, "s#", &s, &len)) 117 | return NULL; 118 | 119 | uint128_t h = Hash128(s, len); 120 | uint64_t low64 = Uint128Low64(h); 121 | uint64_t high64 = Uint128High64(h); 122 | result = Py_BuildValue("(KK)", low64, high64); 123 | 124 | return result; 125 | } 126 | 127 | static PyObject * 128 | py_farmhash_Hash128WithSeed(PyObject *self, PyObject *args) 129 | { 130 | PyObject *result; 131 | const char *s; 132 | Py_ssize_t len; 133 | uint64_t seedlow64; 134 | uint64_t seedhigh64; 135 | 136 | if (!PyArg_ParseTuple(args, "s#KK", &s, &len, &seedlow64, &seedhigh64)) 137 | return NULL; 138 | 139 | //std::cout << "seed low64:" << seedlow64 << std::endl; 140 | //std::cout << "seed high64:" << seedhigh64 << std::endl; 141 | uint128_t seed = Uint128(seedlow64, seedhigh64); 142 | uint128_t h = Hash128WithSeed(s, len, seed); 143 | uint64_t low64 = Uint128Low64(h); 144 | uint64_t high64 = Uint128High64(h); 145 | result = Py_BuildValue("(KK)", low64, high64); 146 | 147 | return result; 148 | } 149 | 150 | static PyObject * 151 | py_farmhash_Fingerprint32(PyObject *self, PyObject *args) 152 | { 153 | PyObject *result; 154 | const char *s; 155 | Py_ssize_t len; 156 | 157 | if (!PyArg_ParseTuple(args, "s#", &s, &len)) 158 | return NULL; 159 | 160 | uint32_t h = Fingerprint32(s, len); 161 | result = Py_BuildValue("I", h); 162 | 163 | return result; 164 | } 165 | 166 | static PyObject * 167 | py_farmhash_Fingerprint64(PyObject *self, PyObject *args) 168 | { 169 | PyObject *result; 170 | const char *s; 171 | Py_ssize_t len; 172 | 173 | if (!PyArg_ParseTuple(args, "s#", &s, &len)) 174 | return NULL; 175 | 176 | uint64_t h = Fingerprint64(s, len); 177 | # if __WORDSIZE == 64 178 | const char* int_param = "k"; 179 | # else 180 | const char* int_param = "K"; 181 | #endif 182 | result = Py_BuildValue(int_param, h); 183 | 184 | return result; 185 | } 186 | 187 | static PyObject * 188 | py_farmhash_Fingerprint128(PyObject *self, PyObject *args) 189 | { 190 | PyObject *result; 191 | const char *s; 192 | Py_ssize_t len; 193 | 194 | if (!PyArg_ParseTuple(args, "s#", &s, &len)) 195 | return NULL; 196 | 197 | uint128_t h = Fingerprint128(s, len); 198 | uint64_t low64 = Uint128Low64(h); 199 | uint64_t high64 = Uint128High64(h); 200 | result = Py_BuildValue("(KK)", low64, high64); 201 | 202 | return result; 203 | } 204 | 205 | static PyMethodDef FarmHashMethods[] = { 206 | {"hash32", py_farmhash_Hash32, METH_VARARGS, HASH32_DOCSTRING}, 207 | {"hash32withseed", py_farmhash_Hash32WithSeed, METH_VARARGS, HASH32WITHSEED_DOCSTRING}, 208 | {"hash64", py_farmhash_Hash64, METH_VARARGS, HASH64_DOCSTRING}, 209 | {"hash64withseed", py_farmhash_Hash64WithSeed, METH_VARARGS, HASH64WITHSEED_DOCSTRING}, 210 | {"hash128", py_farmhash_Hash128, METH_VARARGS, HASH128_DOCSTRING}, 211 | {"hash128withseed", py_farmhash_Hash128WithSeed, METH_VARARGS, HASH128WITHSEED_DOCSTRING}, 212 | {"fingerprint32", py_farmhash_Fingerprint32, METH_VARARGS, FINGERPRINT32_DOCSTRING}, 213 | {"fingerprint64", py_farmhash_Fingerprint64, METH_VARARGS, FINGERPRINT64_DOCSTRING}, 214 | {"fingerprint128", py_farmhash_Fingerprint128, METH_VARARGS, FINGERPRINT128_DOCSTRING}, 215 | {NULL, NULL, 0, NULL} 216 | }; 217 | 218 | struct module_state { 219 | PyObject *error; 220 | }; 221 | 222 | #if PY_MAJOR_VERSION >= 3 223 | #define GETSTATE(m) ((struct module_state*)PyModule_GetState(m)) 224 | #else 225 | #define GETSTATE(m) (&_state) 226 | static struct module_state _state; 227 | #endif 228 | 229 | #if PY_MAJOR_VERSION >= 3 230 | 231 | static int myextension_traverse(PyObject *m, visitproc visit, void *arg) { 232 | Py_VISIT(GETSTATE(m)->error); 233 | return 0; 234 | } 235 | 236 | static int myextension_clear(PyObject *m) { 237 | Py_CLEAR(GETSTATE(m)->error); 238 | return 0; 239 | } 240 | 241 | 242 | static struct PyModuleDef moduledef = { 243 | PyModuleDef_HEAD_INIT, 244 | "farmhash", 245 | NULL, 246 | sizeof(struct module_state), 247 | FarmHashMethods, 248 | NULL, 249 | myextension_traverse, 250 | myextension_clear, 251 | NULL 252 | }; 253 | 254 | #define INITERROR return NULL 255 | 256 | extern "C" PyObject * 257 | PyInit_farmhash(void) 258 | 259 | #else 260 | #define INITERROR return 261 | 262 | void 263 | initfarmhash(void) 264 | #endif 265 | { 266 | #if PY_MAJOR_VERSION >= 3 267 | PyObject *module = PyModule_Create(&moduledef); 268 | #else 269 | PyObject *module = Py_InitModule("farmhash", FarmHashMethods); 270 | #endif 271 | struct module_state *st = NULL; 272 | 273 | if (module == NULL) 274 | INITERROR; 275 | st = GETSTATE(module); 276 | 277 | st->error = PyErr_NewException("farmhash.Error", NULL, NULL); 278 | if (st->error == NULL) { 279 | Py_DECREF(module); 280 | INITERROR; 281 | } 282 | 283 | #if PY_MAJOR_VERSION >= 3 284 | return module; 285 | #endif 286 | } 287 | -------------------------------------------------------------------------------- /src/farmhash.h: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2014 Google, Inc. 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), to deal 5 | // in the Software without restriction, including without limitation the rights 6 | // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | // copies of the Software, and to permit persons to whom the Software is 8 | // furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included in 11 | // all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | // THE SOFTWARE. 20 | // 21 | // FarmHash, by Geoff Pike 22 | 23 | // 24 | // http://code.google.com/p/farmhash/ 25 | // 26 | // This file provides a few functions for hashing strings and other 27 | // data. All of them are high-quality functions in the sense that 28 | // they do well on standard tests such as Austin Appleby's SMHasher. 29 | // They're also fast. FarmHash is the successor to CityHash. 30 | // 31 | // Functions in the FarmHash family are not suitable for cryptography. 32 | // 33 | // WARNING: This code has been only lightly tested on big-endian platforms! 34 | // It is known to work well on little-endian platforms that have a small penalty 35 | // for unaligned reads, such as current Intel and AMD moderate-to-high-end CPUs. 36 | // It should work on all 32-bit and 64-bit platforms that allow unaligned reads; 37 | // bug reports are welcome. 38 | // 39 | // By the way, for some hash functions, given strings a and b, the hash 40 | // of a+b is easily derived from the hashes of a and b. This property 41 | // doesn't hold for any hash functions in this file. 42 | 43 | #ifndef FARM_HASH_H_ 44 | #define FARM_HASH_H_ 45 | 46 | #include 47 | #include "stdint.h" 48 | #include 49 | #include // for memcpy and memset 50 | #include 51 | 52 | #ifndef NAMESPACE_FOR_HASH_FUNCTIONS 53 | #define NAMESPACE_FOR_HASH_FUNCTIONS util 54 | #endif 55 | 56 | namespace NAMESPACE_FOR_HASH_FUNCTIONS { 57 | 58 | #if defined(FARMHASH_UINT128_T_DEFINED) 59 | inline uint64_t Uint128Low64(const uint128_t x) { 60 | return static_cast(x); 61 | } 62 | inline uint64_t Uint128High64(const uint128_t x) { 63 | return static_cast(x >> 64); 64 | } 65 | inline uint128_t Uint128(uint64_t lo, uint64_t hi) { 66 | return lo + (((uint128_t)hi) << 64); 67 | } 68 | #else 69 | typedef std::pair uint128_t; 70 | inline uint64_t Uint128Low64(const uint128_t x) { return x.first; } 71 | inline uint64_t Uint128High64(const uint128_t x) { return x.second; } 72 | inline uint128_t Uint128(uint64_t lo, uint64_t hi) { return uint128_t(lo, hi); } 73 | #endif 74 | 75 | 76 | // BASIC STRING HASHING 77 | 78 | // Hash function for a byte array. 79 | // May change from time to time, may differ on different platforms, may differ 80 | // depending on NDEBUG. 81 | size_t Hash(const char* s, size_t len); 82 | 83 | // Hash function for a byte array. Most useful in 32-bit binaries. 84 | // May change from time to time, may differ on different platforms, may differ 85 | // depending on NDEBUG. 86 | uint32_t Hash32(const char* s, size_t len); 87 | 88 | // Hash function for a byte array. For convenience, a 32-bit seed is also 89 | // hashed into the result. 90 | // May change from time to time, may differ on different platforms, may differ 91 | // depending on NDEBUG. 92 | uint32_t Hash32WithSeed(const char* s, size_t len, uint32_t seed); 93 | 94 | // Hash 128 input bits down to 64 bits of output. 95 | // Hash function for a byte array. 96 | // May change from time to time, may differ on different platforms, may differ 97 | // depending on NDEBUG. 98 | uint64_t Hash64(const char* s, size_t len); 99 | 100 | // Hash function for a byte array. For convenience, a 64-bit seed is also 101 | // hashed into the result. 102 | // May change from time to time, may differ on different platforms, may differ 103 | // depending on NDEBUG. 104 | uint64_t Hash64WithSeed(const char* s, size_t len, uint64_t seed); 105 | 106 | // Hash function for a byte array. For convenience, two seeds are also 107 | // hashed into the result. 108 | // May change from time to time, may differ on different platforms, may differ 109 | // depending on NDEBUG. 110 | uint64_t Hash64WithSeeds(const char* s, size_t len, 111 | uint64_t seed0, uint64_t seed1); 112 | 113 | // Hash function for a byte array. 114 | // May change from time to time, may differ on different platforms, may differ 115 | // depending on NDEBUG. 116 | uint128_t Hash128(const char* s, size_t len); 117 | 118 | // Hash function for a byte array. For convenience, a 128-bit seed is also 119 | // hashed into the result. 120 | // May change from time to time, may differ on different platforms, may differ 121 | // depending on NDEBUG. 122 | uint128_t Hash128WithSeed(const char* s, size_t len, uint128_t seed); 123 | 124 | // BASIC NON-STRING HASHING 125 | 126 | // This is intended to be a reasonably good hash function. 127 | // May change from time to time, may differ on different platforms, may differ 128 | // depending on NDEBUG. 129 | inline uint64_t Hash128to64(uint128_t x) { 130 | // Murmur-inspired hashing. 131 | const uint64_t kMul = 0x9ddfea08eb382d69ULL; 132 | uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul; 133 | a ^= (a >> 47); 134 | uint64_t b = (Uint128High64(x) ^ a) * kMul; 135 | b ^= (b >> 47); 136 | b *= kMul; 137 | return b; 138 | } 139 | 140 | // FINGERPRINTING (i.e., good, portable, forever-fixed hash functions) 141 | 142 | // Fingerprint function for a byte array. Most useful in 32-bit binaries. 143 | uint32_t Fingerprint32(const char* s, size_t len); 144 | 145 | // Fingerprint function for a byte array. 146 | uint64_t Fingerprint64(const char* s, size_t len); 147 | 148 | // Fingerprint function for a byte array. 149 | uint128_t Fingerprint128(const char* s, size_t len); 150 | 151 | // This is intended to be a good fingerprinting primitive. 152 | // See below for more overloads. 153 | inline uint64_t Fingerprint(uint128_t x) { 154 | // Murmur-inspired hashing. 155 | const uint64_t kMul = 0x9ddfea08eb382d69ULL; 156 | uint64_t a = (Uint128Low64(x) ^ Uint128High64(x)) * kMul; 157 | a ^= (a >> 47); 158 | uint64_t b = (Uint128High64(x) ^ a) * kMul; 159 | b ^= (b >> 44); 160 | b *= kMul; 161 | b ^= (b >> 41); 162 | b *= kMul; 163 | return b; 164 | } 165 | 166 | // This is intended to be a good fingerprinting primitive. 167 | inline uint64_t Fingerprint(uint64_t x) { 168 | // Murmur-inspired hashing. 169 | const uint64_t kMul = 0x9ddfea08eb382d69ULL; 170 | uint64_t b = x * kMul; 171 | b ^= (b >> 44); 172 | b *= kMul; 173 | b ^= (b >> 41); 174 | b *= kMul; 175 | return b; 176 | } 177 | 178 | #ifndef FARMHASH_NO_CXX_STRING 179 | 180 | // Convenience functions to hash or fingerprint C++ strings. 181 | // These require that Str::data() return a pointer to the first char 182 | // (as a const char*) and that Str::length() return the string's length; 183 | // they work with std::string, for example. 184 | 185 | // Hash function for a byte array. 186 | // May change from time to time, may differ on different platforms, may differ 187 | // depending on NDEBUG. 188 | template 189 | inline size_t Hash(const Str& s) { 190 | assert(sizeof(s[0]) == 1); 191 | return Hash(s.data(), s.length()); 192 | } 193 | 194 | // Hash function for a byte array. Most useful in 32-bit binaries. 195 | // May change from time to time, may differ on different platforms, may differ 196 | // depending on NDEBUG. 197 | template 198 | inline uint32_t Hash32(const Str& s) { 199 | assert(sizeof(s[0]) == 1); 200 | return Hash32(s.data(), s.length()); 201 | } 202 | 203 | // Hash function for a byte array. For convenience, a 32-bit seed is also 204 | // hashed into the result. 205 | // May change from time to time, may differ on different platforms, may differ 206 | // depending on NDEBUG. 207 | template 208 | inline uint32_t Hash32WithSeed(const Str& s, uint32_t seed) { 209 | assert(sizeof(s[0]) == 1); 210 | return Hash32WithSeed(s.data(), s.length(), seed); 211 | } 212 | 213 | // Hash 128 input bits down to 64 bits of output. 214 | // Hash function for a byte array. 215 | // May change from time to time, may differ on different platforms, may differ 216 | // depending on NDEBUG. 217 | template 218 | inline uint64_t Hash64(const Str& s) { 219 | assert(sizeof(s[0]) == 1); 220 | return Hash64(s.data(), s.length()); 221 | } 222 | 223 | // Hash function for a byte array. For convenience, a 64-bit seed is also 224 | // hashed into the result. 225 | // May change from time to time, may differ on different platforms, may differ 226 | // depending on NDEBUG. 227 | template 228 | inline uint64_t Hash64WithSeed(const Str& s, uint64_t seed) { 229 | assert(sizeof(s[0]) == 1); 230 | return Hash64WithSeed(s.data(), s.length(), seed); 231 | } 232 | 233 | // Hash function for a byte array. For convenience, two seeds are also 234 | // hashed into the result. 235 | // May change from time to time, may differ on different platforms, may differ 236 | // depending on NDEBUG. 237 | template 238 | inline uint64_t Hash64WithSeeds(const Str& s, uint64_t seed0, uint64_t seed1) { 239 | assert(sizeof(s[0]) == 1); 240 | return Hash64WithSeeds(s.data(), s.length(), seed0, seed1); 241 | } 242 | 243 | // Hash function for a byte array. 244 | // May change from time to time, may differ on different platforms, may differ 245 | // depending on NDEBUG. 246 | template 247 | inline uint128_t Hash128(const Str& s) { 248 | assert(sizeof(s[0]) == 1); 249 | return Hash128(s.data(), s.length()); 250 | } 251 | 252 | // Hash function for a byte array. For convenience, a 128-bit seed is also 253 | // hashed into the result. 254 | // May change from time to time, may differ on different platforms, may differ 255 | // depending on NDEBUG. 256 | template 257 | inline uint128_t Hash128WithSeed(const Str& s, uint128_t seed) { 258 | assert(sizeof(s[0]) == 1); 259 | return Hash128(s.data(), s.length(), seed); 260 | } 261 | 262 | // FINGERPRINTING (i.e., good, portable, forever-fixed hash functions) 263 | 264 | // Fingerprint function for a byte array. Most useful in 32-bit binaries. 265 | template 266 | inline uint32_t Fingerprint32(const Str& s) { 267 | assert(sizeof(s[0]) == 1); 268 | return Fingerprint32(s.data(), s.length()); 269 | } 270 | 271 | // Fingerprint 128 input bits down to 64 bits of output. 272 | // Fingerprint function for a byte array. 273 | template 274 | inline uint64_t Fingerprint64(const Str& s) { 275 | assert(sizeof(s[0]) == 1); 276 | return Fingerprint64(s.data(), s.length()); 277 | } 278 | 279 | // Fingerprint function for a byte array. 280 | template 281 | inline uint128_t Fingerprint128(const Str& s) { 282 | assert(sizeof(s[0]) == 1); 283 | return Fingerprint128(s.data(), s.length()); 284 | } 285 | 286 | #endif 287 | 288 | } // namespace NAMESPACE_FOR_HASH_FUNCTIONS 289 | 290 | #endif // FARM_HASH_H_ 291 | --------------------------------------------------------------------------------