├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── examples ├── perf.py └── simple.py ├── pyhashxx ├── pycompat.h ├── pyhashxx.c ├── xxhash.c └── xxhash.h ├── setup.py └── tests ├── __init__.py ├── test_hash_bytes.py └── test_oneshot.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | 3 | build/ 4 | pyhashxx.egg-info 5 | pyhashxx.so 6 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | python: 4 | - "2.6" 5 | - "2.7" 6 | - "3.2" 7 | - "3.3" 8 | 9 | script: python setup.py test 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | pyhashxx - Fast Hash algorithm 2 | Copyright (C) 2013, Ewen Cheslack-Postava. 3 | Original C Implementation Copyright (C) 2012-2013, Yann Collet. 4 | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are 8 | met: 9 | 10 | * Redistributions of source code must retain the above copyright 11 | notice, this list of conditions and the following disclaimer. 12 | * Redistributions in binary form must reproduce the above 13 | copyright notice, this list of conditions and the following 14 | disclaimer in the documentation and/or other materials provided 15 | with the distribution. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | pyhashxx 2 | ======== 3 | 4 | Python wrapper of xxhash that supports concurrency and is packaged 5 | nicely for installation with pip/distribute. 6 | 7 | The existing wrapper I could find used a static global hash context 8 | and wasn't packaged nicely for installation with Python package 9 | managers. This version fixes both those issues. The naming 10 | rearrangement is to avoid conflicts with the existing wrapper in 11 | python. 12 | 13 | See http://code.google.com/p/xxhash/ for the original xxHash code. 14 | 15 | Examples 16 | -------- 17 | 18 | The easiest way to use the hash function is the convenience function 19 | `hashxx` that returns the hash of the bytes of its parameters, and 20 | which will traverse tuples (even recursively). 21 | 22 | from pyhashxx import hashxx 23 | hashxx(b'Hello World!') 24 | hashxx(b'Hello', b' ', b'World!') 25 | hashxx((b'Hello', b' ', b'World!')) 26 | hashxx((b'Hello', b' '), (b'World!',)) 27 | # All return 198612872 28 | hashxx(b'Hello World!', seed=1) 29 | # Changing the seed changes the result to 2212595744 30 | 31 | You can also use the `Hashxx` class to compute the hash incrementally, 32 | and extract intermediate digest values: 33 | 34 | from pyhashxx import Hashxx 35 | hasher = Hashxx(seed=0) # seed is optional 36 | hasher.update(b'Hello') 37 | hasher.update(b' ') 38 | print(hasher.digest()) # Prints 1401757748 39 | hasher.update(b'World!') 40 | print(hasher.digest()) # Prints 198612872 41 | 42 | See the `examples/` directory for more, including a script testing 43 | performance. 44 | 45 | Buildbot 46 | -------- 47 | [![Build Status](https://secure.travis-ci.org/ewencp/pyhashxx.png)](http://travis-ci.org/ewencp/pyhashxx) 48 | -------------------------------------------------------------------------------- /examples/perf.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import random, time 3 | 4 | from pyhashxx import hashxx 5 | hashfns = [hashxx] 6 | try: 7 | import smhasher 8 | hashfns += [getattr(smhasher, k) for k in dir(smhasher) if k[0] == 'm'] 9 | except: 10 | pass 11 | 12 | print("Generating random data") 13 | chars = b'abcdefghijklmnopqrstuvwxyz' 14 | short_size = 20 15 | short = b''.join([bytes(random.choice(chars)) for i in range(short_size)]) 16 | long_size = 1024 17 | long = b''.join([bytes(random.choice(chars)) for i in range(long_size)]) 18 | extra_long_size = 64*1024*1024 19 | extra_long = long * 64 20 | 21 | def time_trial(hashfn, size, data, number=1000000): 22 | started = time.time() 23 | # Avoid loop overhead 24 | [hashfn(data) for i in range(number)] 25 | finished = time.time() 26 | duration = finished - started 27 | print(" %d bytes %d times, %f s, %f ms/hash, %f hashes/s, %f MB/s" % (size, number, duration, duration/number*1000, number/duration, number*size/(duration*1024*1024))) 28 | 29 | if __name__ == "__main__": 30 | for hashfn in hashfns: 31 | print("%s:" % hashfn.__name__) 32 | time_trial(hashfn, short_size, short, number=10000000) 33 | time_trial(hashfn, long_size, long, number=250000) 34 | time_trial(hashfn, extra_long_size, extra_long, number=10000) 35 | print() 36 | -------------------------------------------------------------------------------- /examples/simple.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function, unicode_literals 2 | from pyhashxx import Hashxx, hashxx 3 | 4 | def hash_one_value(val): 5 | ''' 6 | Use the hashxx function to initialize, add data, and compute the 7 | digest in one go. Add the seed parameter if you need to control 8 | the initial state of the digest. 9 | ''' 10 | print("Hash of %s = %d" % (repr(val), hashxx(val, seed=0))) 11 | 12 | def hash_multiple_values(vals): 13 | ''' 14 | Same as hash_one_value but iterates over a list and uses the 15 | Hashxx class so it can call update() multiple times, passing in 16 | additional data on each call. This could also be useful for 17 | streaming data, and also allows you to get the current (partial) 18 | digest and continue adding data. 19 | ''' 20 | hasher = Hashxx(seed=0) 21 | for idx, val in enumerate(vals): 22 | hasher.update(val) 23 | print(" Intermediate hash up to %s = %d" % (repr(vals[:idx+1]), hasher.digest())) 24 | print("Hash of %s = %d" % (repr(vals), hasher.digest())) 25 | 26 | if __name__ == "__main__": 27 | # Normally you'll use byte arrays (strings in Python2) 28 | hash_one_value(b'Hello') 29 | hash_one_value(b'Hello World!') 30 | # Tuples can be passed in as a single value and will be treated as 31 | # if you just concatenated them 32 | hash_one_value((b'Hello', b' ', b'World!')) 33 | # Lists need to be iterated over manually (they are not normally 34 | # hashable in Python). Ultimately, the hash is the same as the 35 | # previous tuple. 36 | hash_multiple_values([b'Hello', b' ', b'World!']) 37 | # Note that you *cannot* use Unicode values because their hash 38 | # value would depend on the encoding. You need to convert them to 39 | # bytes (or strings in Python2) before passing them for hashing to 40 | # ensure you get the proper (and consistent) hash value. 41 | try: 42 | hash_one_value('Hello World!') 43 | except TypeError as exc: 44 | print('Trying to hash a unicode literal raised %s: "%s"' % (type(exc), exc,)) 45 | -------------------------------------------------------------------------------- /pyhashxx/pycompat.h: -------------------------------------------------------------------------------- 1 | // Python 2/3 Compatibility Helpers 2 | // Assumes you've already included Python.h 3 | // 4 | // See http://python3porting.com/cextensions.html for the source of most of 5 | // these. 6 | 7 | #ifndef PyVarObject_HEAD_INIT 8 | #define PyVarObject_HEAD_INIT(type, size) \ 9 | PyObject_HEAD_INIT(type) size, 10 | #endif 11 | 12 | #ifndef Py_TYPE 13 | #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) 14 | #endif 15 | 16 | 17 | #if PY_MAJOR_VERSION >= 3 18 | #define MOD_DECL(ob, name, doc, methods) \ 19 | static struct PyModuleDef ob##_moduledef = { \ 20 | PyModuleDef_HEAD_INIT, name, doc, -1, methods, }; 21 | #define MOD_DEF(ob) \ 22 | ob = PyModule_Create(&ob##_moduledef); 23 | #else 24 | #define MOD_DECL(ob, name, doc, methods) \ 25 | char* ob##_name = name; \ 26 | char* ob##_doc = doc; \ 27 | PyMethodDef* ob##_methods = methods; 28 | #define MOD_DEF(ob) \ 29 | ob = Py_InitModule3(ob##_name, ob##_methods, ob##_doc); 30 | #endif 31 | 32 | #if PY_MAJOR_VERSION >= 3 33 | #define MOD_INIT(name) PyMODINIT_FUNC PyInit_##name(void) 34 | #define RETURN_MOD_INIT_ERROR return ((PyObject*)NULL) 35 | #define RETURN_MOD_INIT_SUCCESS(modvar) return modvar 36 | #else 37 | #define MOD_INIT(name) PyMODINIT_FUNC init##name(void) 38 | #define RETURN_MOD_INIT_ERROR return 39 | #define RETURN_MOD_INIT_SUCCESS(modvar) return 40 | #endif 41 | -------------------------------------------------------------------------------- /pyhashxx/pyhashxx.c: -------------------------------------------------------------------------------- 1 | /** 2 | * pyhashxx - Fast Hash Algorithm 3 | * Copyright 2013, Ewen Cheslack-Postava 4 | * BSD 2-Clause License -- See LICENSE file for details. 5 | */ 6 | 7 | #include 8 | #include "pycompat.h" 9 | #include "xxhash.h" 10 | 11 | typedef struct { 12 | PyObject_HEAD 13 | void* xxhash_state; 14 | } HashxxObject; 15 | 16 | static void 17 | Hashxx_dealloc(HashxxObject* self) 18 | { 19 | XXH32_destroy(self->xxhash_state); 20 | Py_TYPE(self)->tp_free((PyObject*)self); 21 | } 22 | 23 | static PyObject * 24 | Hashxx_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 25 | { 26 | HashxxObject *self; 27 | 28 | self = (HashxxObject *)type->tp_alloc(type, 0); 29 | if (self != NULL) { 30 | self->xxhash_state = NULL; 31 | } 32 | 33 | return (PyObject *)self; 34 | } 35 | 36 | static int 37 | Hashxx_init(HashxxObject *self, PyObject *args, PyObject *kwds) 38 | { 39 | unsigned int seed = 0; 40 | static char *kwlist[] = {"seed", NULL}; 41 | 42 | if (! PyArg_ParseTupleAndKeywords(args, kwds, "|I", kwlist, 43 | &seed)) 44 | return -1; 45 | 46 | self->xxhash_state = XXH32_init(seed); 47 | 48 | return 0; 49 | } 50 | 51 | 52 | static PyObject* _update_hash(void* hash_state, PyObject* arg_obj) { 53 | Py_ssize_t tuple_length; 54 | Py_ssize_t tuple_i; 55 | PyObject* tuple_obj, *partial_result; 56 | 57 | #if PY_MAJOR_VERSION >= 3 58 | if (PyBytes_Check(arg_obj)) { 59 | XXH32_update(hash_state, PyBytes_AsString(arg_obj), PyBytes_Size(arg_obj)); 60 | } 61 | #else 62 | if (PyString_Check(arg_obj)) { 63 | XXH32_update(hash_state, PyString_AsString(arg_obj), PyString_Size(arg_obj)); 64 | } 65 | #endif 66 | else if (PyByteArray_Check(arg_obj)) { 67 | XXH32_update(hash_state, PyByteArray_AsString(arg_obj), PyByteArray_Size(arg_obj)); 68 | } 69 | else if (PyTuple_Check(arg_obj)) { 70 | tuple_length = PyTuple_GET_SIZE(arg_obj); 71 | for(tuple_i = 0; tuple_i < tuple_length; tuple_i++) { 72 | tuple_obj = PyTuple_GetItem(arg_obj, tuple_i); 73 | partial_result = _update_hash(hash_state, tuple_obj); 74 | // Check exceptions 75 | if (partial_result == NULL) return NULL; 76 | } 77 | } 78 | else if (arg_obj == Py_None) { 79 | Py_RETURN_NONE; 80 | } 81 | else if (PyUnicode_Check(arg_obj)) { 82 | PyErr_SetString(PyExc_TypeError, "Found unicode string, you must convert to bytes/str before hashing."); 83 | return NULL; 84 | } 85 | else { 86 | PyErr_Format(PyExc_TypeError, "Tried to hash unsupported type: %S.", Py_TYPE(arg_obj)); 87 | return NULL; 88 | } 89 | 90 | Py_RETURN_NONE; 91 | } 92 | 93 | static PyObject * 94 | Hashxx_update(HashxxObject* self, PyObject *args) 95 | { 96 | Py_ssize_t arg_length = PyTuple_GET_SIZE(args); 97 | Py_ssize_t arg_i; 98 | PyObject* arg_obj, *partial_result; 99 | 100 | if (arg_length == 0) { 101 | PyErr_SetString(PyExc_TypeError, "Must provide arguments to hash to Hashxx.update."); 102 | return NULL; 103 | } 104 | 105 | for(arg_i = 0; arg_i < arg_length; arg_i++) { 106 | arg_obj = PyTuple_GetItem(args, arg_i); 107 | partial_result = _update_hash(self->xxhash_state, arg_obj); 108 | // Check exceptions 109 | if (partial_result == NULL) return NULL; 110 | } 111 | 112 | Py_RETURN_NONE; 113 | } 114 | 115 | 116 | static PyObject * 117 | Hashxx_digest(HashxxObject* self) 118 | { 119 | unsigned int digest = XXH32_digest(self->xxhash_state); 120 | return Py_BuildValue("I", digest); 121 | } 122 | 123 | static PyMethodDef Hashxx_methods[] = { 124 | {"update", (PyCFunction)Hashxx_update, METH_VARARGS, 125 | "Update the digest with new data." 126 | }, 127 | {"digest", (PyCFunction)Hashxx_digest, METH_NOARGS, 128 | "Return the current digest value of the data processed so far." 129 | }, 130 | {NULL} /* Sentinel */ 131 | }; 132 | 133 | 134 | 135 | static PyTypeObject pyhashxx_HashxxType = { 136 | PyVarObject_HEAD_INIT(NULL, 0) 137 | "pyhashxx.Hashxx", /*tp_name*/ 138 | sizeof(HashxxObject), /*tp_basicsize*/ 139 | 0, /*tp_itemsize*/ 140 | (destructor)Hashxx_dealloc, /*tp_dealloc*/ 141 | 0, /*tp_print*/ 142 | 0, /*tp_getattr*/ 143 | 0, /*tp_setattr*/ 144 | 0, /*tp_compare*/ 145 | 0, /*tp_repr*/ 146 | 0, /*tp_as_number*/ 147 | 0, /*tp_as_sequence*/ 148 | 0, /*tp_as_mapping*/ 149 | 0, /*tp_hash */ 150 | 0, /*tp_call*/ 151 | 0, /*tp_str*/ 152 | 0, /*tp_getattro*/ 153 | 0, /*tp_setattro*/ 154 | 0, /*tp_as_buffer*/ 155 | Py_TPFLAGS_DEFAULT, /*tp_flags*/ 156 | "Hashxx objects", /* tp_doc */ 157 | 0, /* tp_traverse */ 158 | 0, /* tp_clear */ 159 | 0, /* tp_richcompare */ 160 | 0, /* tp_weaklistoffset */ 161 | 0, /* tp_iter */ 162 | 0, /* tp_iternext */ 163 | Hashxx_methods, /* tp_methods */ 164 | 0, /* tp_members */ 165 | 0, /* tp_getset */ 166 | 0, /* tp_base */ 167 | 0, /* tp_dict */ 168 | 0, /* tp_descr_get */ 169 | 0, /* tp_descr_set */ 170 | 0, /* tp_dictoffset */ 171 | (initproc)Hashxx_init, /* tp_init */ 172 | 0, /* tp_alloc */ 173 | Hashxx_new, /* tp_new */ 174 | }; 175 | 176 | 177 | 178 | static PyObject * 179 | pyhashxx_hashxx(PyObject* self, PyObject *args, PyObject *kwds) 180 | { 181 | unsigned int seed = 0; 182 | const char* err_msg = NULL; 183 | PyObject* err_obj = NULL; 184 | Py_ssize_t args_len = 0; 185 | unsigned int digest = 0; 186 | void* state = NULL; 187 | 188 | if (kwds != NULL) { 189 | Py_ssize_t kwds_size = PyDict_Size(kwds); 190 | PyObject* seed_obj = PyDict_GetItemString(kwds, "seed"); 191 | 192 | if (kwds_size > 1) { 193 | err_msg = "Unexpected keyword arguments, only 'seed' is supported."; 194 | goto badarg; 195 | } 196 | 197 | if (kwds_size == 1) { 198 | if (seed_obj == NULL) { 199 | err_msg = "Unexpected keyword argument, only 'seed' is supported."; 200 | goto badarg; 201 | } 202 | #if PY_MAJOR_VERSION < 3 203 | if (PyInt_Check(seed_obj)) 204 | seed = PyInt_AsLong(seed_obj); 205 | else 206 | #endif 207 | if (PyLong_Check(seed_obj)) 208 | seed = PyLong_AsLong(seed_obj); 209 | else { 210 | err_msg = "Unexpected seed value type: %S"; 211 | err_obj = seed_obj; 212 | goto badseed; 213 | } 214 | } 215 | } 216 | args_len = PyTuple_GET_SIZE(args); 217 | if (args_len == 0) { 218 | err_msg = "Received no arguments to be hashed."; 219 | goto badarg; 220 | } 221 | 222 | // If possible, use the shorter, faster version that elides 223 | // allocating the state variable because it knows there is only 224 | // one input. 225 | if (args_len == 1) { 226 | PyObject* hash_obj = PyTuple_GetItem(args, 0); 227 | int did_hash = 1; 228 | #if PY_MAJOR_VERSION >= 3 229 | if (PyBytes_Check(hash_obj)) { 230 | digest = XXH32(PyBytes_AsString(hash_obj), PyBytes_Size(hash_obj), seed); 231 | } 232 | #else 233 | if (PyString_Check(hash_obj)) { 234 | digest = XXH32(PyString_AsString(hash_obj), PyString_Size(hash_obj), seed); 235 | } 236 | #endif 237 | else if (PyByteArray_Check(hash_obj)) { 238 | digest = XXH32(PyByteArray_AsString(hash_obj), PyByteArray_Size(hash_obj), seed); 239 | } 240 | else if (hash_obj == Py_None) { 241 | // Nothing to hash 242 | digest = XXH32("", 0, seed); 243 | } 244 | else { 245 | did_hash = 0; 246 | } 247 | 248 | if (did_hash) 249 | return Py_BuildValue("I", digest); 250 | } 251 | 252 | // Otherwise, do it the long, slower way 253 | state = XXH32_init(seed); 254 | if (_update_hash(state, args) == NULL) { 255 | XXH32_destroy(state); 256 | return NULL; 257 | } 258 | digest = XXH32_digest(state); 259 | XXH32_destroy(state); 260 | 261 | return Py_BuildValue("I", digest); 262 | 263 | badarg: 264 | PyErr_SetString(PyExc_TypeError, err_msg); 265 | return NULL; 266 | badseed: 267 | PyErr_Format(PyExc_TypeError, err_msg, Py_TYPE(err_obj)); 268 | return NULL; 269 | } 270 | 271 | static PyMethodDef pyhashxx_methods[] = { 272 | {"hashxx", (PyCFunction)pyhashxx_hashxx, METH_VARARGS | METH_KEYWORDS, 273 | "Compute the xxHash value for the given value, optionally providing a seed." 274 | }, 275 | {NULL} /* Sentinel */ 276 | }; 277 | 278 | 279 | 280 | MOD_INIT(pyhashxx) { 281 | PyObject* m; 282 | MOD_DECL(m, "pyhashxx", 283 | "Python wrapper of the xxHash fast hash algorithm.", 284 | pyhashxx_methods); 285 | 286 | if (PyType_Ready(&pyhashxx_HashxxType) < 0) 287 | RETURN_MOD_INIT_ERROR; 288 | 289 | MOD_DEF(m); 290 | if (m == NULL) 291 | RETURN_MOD_INIT_ERROR; 292 | 293 | Py_INCREF(&pyhashxx_HashxxType); 294 | PyModule_AddObject(m, "Hashxx", (PyObject *)&pyhashxx_HashxxType); 295 | 296 | RETURN_MOD_INIT_SUCCESS(m); 297 | } 298 | -------------------------------------------------------------------------------- /pyhashxx/xxhash.c: -------------------------------------------------------------------------------- 1 | /* 2 | xxHash - Fast Hash algorithm 3 | Copyright (C) 2012-2013, Yann Collet. 4 | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are 8 | met: 9 | 10 | * Redistributions of source code must retain the above copyright 11 | notice, this list of conditions and the following disclaimer. 12 | * Redistributions in binary form must reproduce the above 13 | copyright notice, this list of conditions and the following disclaimer 14 | in the documentation and/or other materials provided with the 15 | distribution. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | You can contact the author at : 30 | - xxHash source repository : http://code.google.com/p/xxhash/ 31 | */ 32 | 33 | 34 | 35 | //************************************** 36 | // Tuning parameters 37 | //************************************** 38 | // XXH_ACCEPT_NULL_INPUT_POINTER : 39 | // If the input pointer is a null pointer, xxHash default behavior is to crash, since it is a bad input. 40 | // If this option is enabled, xxHash output for null input pointers will be the same as a null-length input. 41 | // This option has a very small performance cost (only measurable on small inputs). 42 | // By default, this option is disabled. To enable it, uncomment below define : 43 | //#define XXH_ACCEPT_NULL_INPUT_POINTER 1 44 | 45 | // XXH_FORCE_NATIVE_FORMAT : 46 | // By default, xxHash library provides endian-independant Hash values, based on little-endian convention. 47 | // Results are therefore identical for little-endian and big-endian CPU. 48 | // This comes at a performance cost for big-endian CPU, since some swapping is required to emulate little-endian format. 49 | // Should endian-independance be of no importance to your application, you may uncomment the #define below 50 | // It will improve speed for Big-endian CPU. 51 | // This option has no impact on Little_Endian CPU. 52 | //#define XXH_FORCE_NATIVE_FORMAT 1 53 | 54 | 55 | 56 | //************************************** 57 | // Includes 58 | //************************************** 59 | #include // for malloc(), free() 60 | #include // for memcpy() 61 | #include "xxhash.h" 62 | 63 | 64 | 65 | //************************************** 66 | // CPU Feature Detection 67 | //************************************** 68 | // Little Endian or Big Endian ? 69 | // You can overwrite the #define below if you know your architecture endianess 70 | #if defined(XXH_FORCE_NATIVE_FORMAT) && (XXH_FORCE_NATIVE_FORMAT==1) 71 | // Force native format. The result will be endian dependant. 72 | # define XXH_BIG_ENDIAN 0 73 | #elif defined (__GLIBC__) 74 | # include 75 | # if (__BYTE_ORDER == __BIG_ENDIAN) 76 | # define XXH_BIG_ENDIAN 1 77 | # endif 78 | #elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN)) 79 | # define XXH_BIG_ENDIAN 1 80 | #elif defined(__sparc) || defined(__sparc__) \ 81 | || defined(__ppc__) || defined(_POWER) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(__PPC__) || defined(__PPC) || defined(PPC) || defined(__powerpc__) || defined(__powerpc) || defined(powerpc) \ 82 | || defined(__hpux) || defined(__hppa) \ 83 | || defined(_MIPSEB) || defined(__s390__) 84 | # define XXH_BIG_ENDIAN 1 85 | #endif 86 | 87 | #if !defined(XXH_BIG_ENDIAN) 88 | // Little Endian assumed. PDP Endian and other very rare endian format are unsupported. 89 | # define XXH_BIG_ENDIAN 0 90 | #endif 91 | 92 | 93 | //************************************** 94 | // Basic Types 95 | //************************************** 96 | #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L // C99 97 | # include 98 | typedef uint8_t BYTE; 99 | typedef uint16_t U16; 100 | typedef uint32_t U32; 101 | typedef int32_t S32; 102 | typedef uint64_t U64; 103 | #else 104 | typedef unsigned char BYTE; 105 | typedef unsigned short U16; 106 | typedef unsigned int U32; 107 | typedef signed int S32; 108 | typedef unsigned long long U64; 109 | #endif 110 | 111 | 112 | //************************************** 113 | // Compiler-specific Options & Functions 114 | //************************************** 115 | #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 116 | 117 | // Note : under GCC, it may sometimes be faster to enable the (2nd) macro definition, instead of using win32 intrinsic 118 | #if defined(_WIN32) 119 | # define XXH_rotl32(x,r) _rotl(x,r) 120 | #else 121 | # define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r))) 122 | #endif 123 | 124 | #if defined(_MSC_VER) // Visual Studio 125 | # define XXH_swap32 _byteswap_ulong 126 | #elif GCC_VERSION >= 403 127 | # define XXH_swap32 __builtin_bswap32 128 | #else 129 | static inline U32 XXH_swap32 (U32 x) { 130 | return ((x << 24) & 0xff000000 ) | 131 | ((x << 8) & 0x00ff0000 ) | 132 | ((x >> 8) & 0x0000ff00 ) | 133 | ((x >> 24) & 0x000000ff );} 134 | #endif 135 | 136 | 137 | //************************************** 138 | // Constants 139 | //************************************** 140 | #define PRIME32_1 2654435761U 141 | #define PRIME32_2 2246822519U 142 | #define PRIME32_3 3266489917U 143 | #define PRIME32_4 668265263U 144 | #define PRIME32_5 374761393U 145 | 146 | 147 | //************************************** 148 | // Macros 149 | //************************************** 150 | #define XXH_LE32(p) (XXH_BIG_ENDIAN ? XXH_swap32(*(U32*)(p)) : *(U32*)(p)) 151 | 152 | 153 | 154 | //**************************** 155 | // Simple Hash Functions 156 | //**************************** 157 | 158 | U32 XXH32(const void* input, int len, U32 seed) 159 | { 160 | #if 0 161 | // Simple version, good for code maintenance, but unfortunately slow for small inputs 162 | void* state = XXH32_init(seed); 163 | XXH32_update(state, input, len); 164 | U32 result = XXH32_digest(state); 165 | XXH32_destroy(state); 166 | return result; 167 | #else 168 | 169 | const BYTE* p = (const BYTE*)input; 170 | const BYTE* const bEnd = p + len; 171 | U32 h32; 172 | 173 | #ifdef XXH_ACCEPT_NULL_INPUT_POINTER 174 | if (p==NULL) { len=0; p=(const BYTE*)16; } 175 | #endif 176 | 177 | if (len>=16) 178 | { 179 | const BYTE* const limit = bEnd - 16; 180 | U32 v1 = seed + PRIME32_1 + PRIME32_2; 181 | U32 v2 = seed + PRIME32_2; 182 | U32 v3 = seed + 0; 183 | U32 v4 = seed - PRIME32_1; 184 | 185 | do 186 | { 187 | v1 += XXH_LE32(p) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; 188 | v2 += XXH_LE32(p) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; 189 | v3 += XXH_LE32(p) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; 190 | v4 += XXH_LE32(p) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; 191 | } while (p<=limit); 192 | 193 | h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18); 194 | } 195 | else 196 | { 197 | h32 = seed + PRIME32_5; 198 | } 199 | 200 | h32 += (U32) len; 201 | 202 | while (p<=bEnd-4) 203 | { 204 | h32 += XXH_LE32(p) * PRIME32_3; 205 | h32 = XXH_rotl32(h32, 17) * PRIME32_4 ; 206 | p+=4; 207 | } 208 | 209 | while (p> 15; 217 | h32 *= PRIME32_2; 218 | h32 ^= h32 >> 13; 219 | h32 *= PRIME32_3; 220 | h32 ^= h32 >> 16; 221 | 222 | return h32; 223 | 224 | #endif 225 | } 226 | 227 | 228 | //**************************** 229 | // Advanced Hash Functions 230 | //**************************** 231 | 232 | struct XXH_state32_t 233 | { 234 | U32 seed; 235 | U32 v1; 236 | U32 v2; 237 | U32 v3; 238 | U32 v4; 239 | U64 total_len; 240 | char memory[16]; 241 | int memsize; 242 | }; 243 | 244 | 245 | int XXH32_sizeofState(void) { return sizeof(struct XXH_state32_t); } 246 | 247 | 248 | XXH_errorcode XXH32_resetState(void* state_in, unsigned int seed) 249 | { 250 | struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; 251 | state->seed = seed; 252 | state->v1 = seed + PRIME32_1 + PRIME32_2; 253 | state->v2 = seed + PRIME32_2; 254 | state->v3 = seed + 0; 255 | state->v4 = seed - PRIME32_1; 256 | state->total_len = 0; 257 | state->memsize = 0; 258 | return OK; 259 | } 260 | 261 | 262 | void* XXH32_init (U32 seed) 263 | { 264 | struct XXH_state32_t * state = (struct XXH_state32_t *) malloc (sizeof(struct XXH_state32_t)); 265 | XXH32_resetState(state, seed); 266 | return (void*)state; 267 | } 268 | 269 | 270 | XXH_errorcode XXH32_update (void* state_in, const void* input, int len) 271 | { 272 | struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; 273 | const BYTE* p = (const BYTE*)input; 274 | const BYTE* const bEnd = p + len; 275 | 276 | #ifdef XXH_ACCEPT_NULL_INPUT_POINTER 277 | if (input==NULL) return XXH_ERROR; 278 | #endif 279 | 280 | state->total_len += len; 281 | 282 | if (state->memsize + len < 16) // fill in tmp buffer 283 | { 284 | memcpy(state->memory + state->memsize, input, len); 285 | state->memsize += len; 286 | return OK; 287 | } 288 | 289 | if (state->memsize) // some data left from previous update 290 | { 291 | memcpy(state->memory + state->memsize, input, 16-state->memsize); 292 | { 293 | const U32* p32 = (const U32*)state->memory; 294 | state->v1 += XXH_LE32(p32) * PRIME32_2; state->v1 = XXH_rotl32(state->v1, 13); state->v1 *= PRIME32_1; p32++; 295 | state->v2 += XXH_LE32(p32) * PRIME32_2; state->v2 = XXH_rotl32(state->v2, 13); state->v2 *= PRIME32_1; p32++; 296 | state->v3 += XXH_LE32(p32) * PRIME32_2; state->v3 = XXH_rotl32(state->v3, 13); state->v3 *= PRIME32_1; p32++; 297 | state->v4 += XXH_LE32(p32) * PRIME32_2; state->v4 = XXH_rotl32(state->v4, 13); state->v4 *= PRIME32_1; p32++; 298 | } 299 | p += 16-state->memsize; 300 | state->memsize = 0; 301 | } 302 | 303 | if (p <= bEnd-16) 304 | { 305 | const BYTE* const limit = bEnd - 16; 306 | U32 v1 = state->v1; 307 | U32 v2 = state->v2; 308 | U32 v3 = state->v3; 309 | U32 v4 = state->v4; 310 | 311 | do 312 | { 313 | v1 += XXH_LE32(p) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4; 314 | v2 += XXH_LE32(p) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4; 315 | v3 += XXH_LE32(p) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4; 316 | v4 += XXH_LE32(p) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4; 317 | } while (p<=limit); 318 | 319 | state->v1 = v1; 320 | state->v2 = v2; 321 | state->v3 = v3; 322 | state->v4 = v4; 323 | } 324 | 325 | if (p < bEnd) 326 | { 327 | memcpy(state->memory, p, bEnd-p); 328 | state->memsize = (int)(bEnd-p); 329 | } 330 | 331 | return OK; 332 | } 333 | 334 | 335 | U32 XXH32_digest (void* state_in) 336 | { 337 | struct XXH_state32_t * state = (struct XXH_state32_t *) state_in; 338 | BYTE * p = (BYTE*)state->memory; 339 | BYTE* bEnd = (BYTE*)state->memory + state->memsize; 340 | U32 h32; 341 | 342 | 343 | if (state->total_len >= 16) 344 | { 345 | h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18); 346 | } 347 | else 348 | { 349 | h32 = state->seed + PRIME32_5; 350 | } 351 | 352 | h32 += (U32) state->total_len; 353 | 354 | while (p<=bEnd-4) 355 | { 356 | h32 += XXH_LE32(p) * PRIME32_3; 357 | h32 = XXH_rotl32(h32, 17) * PRIME32_4; 358 | p+=4; 359 | } 360 | 361 | while (p> 15; 369 | h32 *= PRIME32_2; 370 | h32 ^= h32 >> 13; 371 | h32 *= PRIME32_3; 372 | h32 ^= h32 >> 16; 373 | 374 | return h32; 375 | } 376 | 377 | 378 | void XXH32_destroy (void* state_in) 379 | { 380 | free(state_in); 381 | } 382 | -------------------------------------------------------------------------------- /pyhashxx/xxhash.h: -------------------------------------------------------------------------------- 1 | /* 2 | xxHash - Fast Hash algorithm 3 | Header File 4 | Copyright (C) 2012-2013, Yann Collet. 5 | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 6 | 7 | Redistribution and use in source and binary forms, with or without 8 | modification, are permitted provided that the following conditions are 9 | met: 10 | 11 | * Redistributions of source code must retain the above copyright 12 | notice, this list of conditions and the following disclaimer. 13 | * Redistributions in binary form must reproduce the above 14 | copyright notice, this list of conditions and the following disclaimer 15 | in the documentation and/or other materials provided with the 16 | distribution. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | 30 | You can contact the author at : 31 | - xxHash source repository : http://code.google.com/p/xxhash/ 32 | */ 33 | 34 | /* Notice extracted from xxHash homepage : 35 | 36 | xxHash is an extremely fast Hash algorithm, running at RAM speed limits. 37 | It also successfully passes all tests from the SMHasher suite. 38 | 39 | Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz) 40 | 41 | Name Speed Q.Score Author 42 | xxHash 5.4 GB/s 10 43 | CrapWow 3.2 GB/s 2 Andrew 44 | MumurHash 3a 2.7 GB/s 10 Austin Appleby 45 | SpookyHash 2.0 GB/s 10 Bob Jenkins 46 | SBox 1.4 GB/s 9 Bret Mulvey 47 | Lookup3 1.2 GB/s 9 Bob Jenkins 48 | SuperFastHash 1.2 GB/s 1 Paul Hsieh 49 | CityHash64 1.05 GB/s 10 Pike & Alakuijala 50 | FNV 0.55 GB/s 5 Fowler, Noll, Vo 51 | CRC32 0.43 GB/s 9 52 | MD5-32 0.33 GB/s 10 Ronald L. Rivest 53 | SHA1-32 0.28 GB/s 10 54 | 55 | Q.Score is a measure of quality of the hash function. 56 | It depends on successfully passing SMHasher test set. 57 | 10 is a perfect score. 58 | */ 59 | 60 | #pragma once 61 | 62 | #if defined (__cplusplus) 63 | extern "C" { 64 | #endif 65 | 66 | 67 | //**************************** 68 | // Type 69 | //**************************** 70 | typedef enum { OK=0, XXH_ERROR } XXH_errorcode; 71 | 72 | 73 | 74 | //**************************** 75 | // Simple Hash Functions 76 | //**************************** 77 | 78 | unsigned int XXH32 (const void* input, int len, unsigned int seed); 79 | 80 | /* 81 | XXH32() : 82 | Calculate the 32-bits hash of sequence of length "len" stored at memory address "input". 83 | The memory between input & input+len must be valid (allocated and read-accessible). 84 | "seed" can be used to alter the result predictably. 85 | This function successfully passes all SMHasher tests. 86 | Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s 87 | Note that "len" is type "int", which means it is limited to 2^31-1. 88 | If your data is larger, use the advanced functions below. 89 | */ 90 | 91 | 92 | 93 | //**************************** 94 | // Advanced Hash Functions 95 | //**************************** 96 | 97 | void* XXH32_init (unsigned int seed); 98 | XXH_errorcode XXH32_update (void* state, const void* input, int len); 99 | unsigned int XXH32_digest (void* state); 100 | void XXH32_destroy (void* state); 101 | 102 | /* 103 | These functions calculate the xxhash of an input provided in several small packets, 104 | as opposed to an input provided as a single block. 105 | 106 | It must be started with : 107 | void* XXH32_init() 108 | The function returns a pointer which holds the state of calculation. 109 | 110 | This pointer must be provided as "void* state" parameter for XXH32_update(). 111 | XXH32_update() can be called as many times as necessary. 112 | The user must provide a valid (allocated) input. 113 | The function returns an error code, with 0 meaning OK, and any other value meaning there is an error. 114 | Note that "len" is type "int", which means it is limited to 2^31-1. 115 | If your data is larger, it is recommended to chunk your data into blocks 116 | of size for example 2^30 (1GB) to avoid any "int" overflow issue. 117 | 118 | Finally, you can end the calculation anytime, by using XXH32_digest(). 119 | This function returns the final 32-bits hash. 120 | You must provide the same "void* state" parameter created by XXH32_init(). 121 | 122 | When you are done computing digests, use XXH32_destroy() to clean up 123 | the state, freeing memory associated with the hash calculation state. 124 | */ 125 | 126 | 127 | int XXH32_sizeofState(void); 128 | XXH_errorcode XXH32_resetState(void* state_in, unsigned int seed); 129 | /* 130 | These functions are the basic elements of XXH32_init(); 131 | The objective is to allow user application to make its own allocation. 132 | 133 | XXH32_sizeofState() is used to know how much space must be allocated by the application. 134 | This space must be referenced by a void* pointer. 135 | This pointer must be provided as 'state_in' into XXH32_resetState(), which initializes the state. 136 | */ 137 | 138 | 139 | #if defined (__cplusplus) 140 | } 141 | #endif 142 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup, Extension 2 | 3 | headers = [ 'pyhashxx/xxhash.h', 4 | 'pyhashxx/pycompat.h', 5 | ] 6 | sources = [ 'pyhashxx/xxhash.c', 7 | 'pyhashxx/pyhashxx.c', 8 | ] 9 | pyhashxx = Extension('pyhashxx', sources=sources, depends=headers) 10 | 11 | setup( 12 | name = "pyhashxx", 13 | version = "0.1.3", 14 | description = "Python wrapper for xxHash algorithm", 15 | author = "Ewen Cheslack-Postava", 16 | author_email = 'me@ewencp.org', 17 | platforms=["any"], 18 | license="BSD", 19 | url = "http://github.com/ewencp/pyhashxx", 20 | packages = find_packages(), 21 | ext_modules = [ pyhashxx ], 22 | test_suite = "tests", 23 | headers = headers, 24 | ) 25 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ewencp/pyhashxx/80b827612d8d4e3b3cb9a581aa22331b89ca8883/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_hash_bytes.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | from pyhashxx import Hashxx 3 | import unittest 4 | 5 | class TestHashBytes(unittest.TestCase): 6 | 7 | def test_empty_string(self): 8 | h = Hashxx() 9 | h.update(b'') 10 | self.assertEqual(h.digest(), 46947589) 11 | 12 | def test_one_string(self): 13 | h = Hashxx() 14 | h.update(b'hello') 15 | self.assertEqual(h.digest(), 4211111929) 16 | 17 | h = Hashxx() 18 | h.update(b'goodbye') 19 | self.assertEqual(h.digest(), 2269043192) 20 | 21 | 22 | def test_multiple_strings(self): 23 | h = Hashxx() 24 | h.update(b'hello') 25 | h.update(b'goodbye') 26 | self.assertEqual(h.digest(), 4110974955) 27 | 28 | def test_tuple(self): 29 | # Tuples shouldn't affect the hash, they should be equivalent to hashing 30 | # each part in a separate update 31 | h = Hashxx() 32 | h.update((b'hello',b'goodbye')) 33 | self.assertEqual(h.digest(), 4110974955) 34 | 35 | def test_seeds(self): 36 | h = Hashxx(seed=0) 37 | h.update(b'hello') 38 | self.assertEqual(h.digest(), 4211111929) 39 | 40 | h = Hashxx(seed=1) 41 | h.update(b'hello') 42 | self.assertEqual(h.digest(), 4244634537) 43 | 44 | h = Hashxx(seed=2) 45 | h.update(b'hello') 46 | self.assertEqual(h.digest(), 4191738725) 47 | 48 | def hash_value(self, val, seed=0): 49 | h = Hashxx(seed=seed) 50 | h.update(val) 51 | return h.digest() 52 | 53 | def test_incremental(self): 54 | # Make sure incrementally computed results match those 55 | # computed all at once 56 | hello_hash = self.hash_value(b'hello') 57 | hello_world_hash = self.hash_value(b'helloworld') 58 | 59 | h = Hashxx() 60 | h.update(b'hello') 61 | self.assertEqual(h.digest(), hello_hash) 62 | h.update(b'world') 63 | self.assertEqual(h.digest(), hello_world_hash) 64 | 65 | 66 | def test_simultaneous(self): 67 | # Ensure that interleaved updates still give same results as 68 | # independent 69 | h1 = Hashxx() 70 | h2 = Hashxx() 71 | 72 | h1.update(b'he') 73 | h2.update(b'goo') 74 | h1.update(b'll') 75 | h2.update(b'db') 76 | h1.update(b'o') 77 | h2.update(b'ye') 78 | 79 | self.assertEqual(h1.digest(), self.hash_value(b'hello')) 80 | self.assertEqual(h2.digest(), self.hash_value(b'goodbye')) 81 | 82 | 83 | def test_bad_seed(self): 84 | self.assertRaises(TypeError, Hashxx, seed="badseed") 85 | 86 | def test_bad_arg(self): 87 | h = Hashxx() 88 | self.assertRaises(TypeError, h.update, [1,2,3]) 89 | 90 | def test_no_args(self): 91 | h = Hashxx() 92 | self.assertRaises(TypeError, h.update) 93 | 94 | def test_no_unicode(self): 95 | h = Hashxx() 96 | self.assertRaises(TypeError, h.update, 'hello') 97 | -------------------------------------------------------------------------------- /tests/test_oneshot.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | from pyhashxx import hashxx, Hashxx 3 | import unittest 4 | 5 | class TestOneShot(unittest.TestCase): 6 | # The shorthand should be equivalent to this simple function: 7 | def hash_value(self, val, seed=0): 8 | h = Hashxx(seed=seed) 9 | h.update(val) 10 | return h.digest() 11 | 12 | def test_empty_string(self): 13 | self.assertEqual(hashxx(b''), self.hash_value(b'')) 14 | 15 | def test_string(self): 16 | self.assertEqual(hashxx(b'hello'), self.hash_value(b'hello')) 17 | 18 | def test_seeds(self): 19 | self.assertNotEqual(hashxx(b'hello', seed=0), hashxx(b'hello', seed=1)) 20 | 21 | self.assertEqual(hashxx(b'hello', seed=0), self.hash_value(b'hello', seed=0)) 22 | self.assertEqual(hashxx(b'hello', seed=1), self.hash_value(b'hello', seed=1)) 23 | self.assertEqual(hashxx(b'hello', seed=2), self.hash_value(b'hello', seed=2)) 24 | 25 | def test_bad_arg(self): 26 | self.assertRaises(TypeError, hashxx, [1, 2, 3]) 27 | 28 | def test_bad_seed(self): 29 | self.assertRaises(TypeError, hashxx, seed="badseed") 30 | 31 | def test_no_args(self): 32 | self.assertRaises(TypeError, hashxx) 33 | 34 | def test_no_unicode(self): 35 | self.assertRaises(TypeError, hashxx, 'hello') 36 | --------------------------------------------------------------------------------