├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── examples
    ├── perf.py
    └── simple.py
├── pyhashxx
    ├── pycompat.h
    ├── pyhashxx.c
    ├── xxhash.c
    └── xxhash.h
├── setup.py
└── tests
    ├── __init__.py
    ├── test_hash_bytes.py
    └── test_oneshot.py


/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | 
3 | build/
4 | pyhashxx.egg-info
5 | pyhashxx.so
6 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | 
 3 | python:
 4 |   - "2.6"
 5 |   - "2.7"
 6 |   - "3.2"
 7 |   - "3.3"
 8 | 
 9 | script: python setup.py test
10 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | pyhashxx - Fast Hash algorithm
 2 | Copyright (C) 2013, Ewen Cheslack-Postava.
 3 | Original C Implementation Copyright (C) 2012-2013, Yann Collet.
 4 | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are
 8 | met:
 9 | 
10 |     * Redistributions of source code must retain the above copyright
11 |       notice, this list of conditions and the following disclaimer.
12 |     * Redistributions in binary form must reproduce the above
13 |       copyright notice, this list of conditions and the following
14 |       disclaimer in the documentation and/or other materials provided
15 |       with the distribution.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | pyhashxx
 2 | ========
 3 | 
 4 | Python wrapper of xxhash that supports concurrency and is packaged
 5 | nicely for installation with pip/distribute.
 6 | 
 7 | The existing wrapper I could find used a static global hash context
 8 | and wasn't packaged nicely for installation with Python package
 9 | managers. This version fixes both those issues. The naming
10 | rearrangement is to avoid conflicts with the existing wrapper in
11 | python.
12 | 
13 | See http://code.google.com/p/xxhash/ for the original xxHash code.
14 | 
15 | Examples
16 | --------
17 | 
18 | The easiest way to use the hash function is the convenience function
19 | `hashxx` that returns the hash of the bytes of its parameters, and
20 | which will traverse tuples (even recursively).
21 | 
22 |     from pyhashxx import hashxx
23 |     hashxx(b'Hello World!')
24 |     hashxx(b'Hello', b' ', b'World!')
25 |     hashxx((b'Hello', b' ', b'World!'))
26 |     hashxx((b'Hello', b' '), (b'World!',))
27 |     # All return 198612872
28 |     hashxx(b'Hello World!', seed=1)
29 |     # Changing the seed changes the result to 2212595744
30 | 
31 | You can also use the `Hashxx` class to compute the hash incrementally,
32 | and extract intermediate digest values:
33 | 
34 |     from pyhashxx import Hashxx
35 |     hasher = Hashxx(seed=0) # seed is optional
36 |     hasher.update(b'Hello')
37 |     hasher.update(b' ')
38 |     print(hasher.digest()) # Prints 1401757748
39 |     hasher.update(b'World!')
40 |     print(hasher.digest()) # Prints 198612872
41 | 
42 | See the `examples/` directory for more, including a script testing
43 | performance.
44 | 
45 | Buildbot
46 | --------
47 | [![Build Status](https://secure.travis-ci.org/ewencp/pyhashxx.png)](http://travis-ci.org/ewencp/pyhashxx)
48 | 


--------------------------------------------------------------------------------
/examples/perf.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import random, time
 3 | 
 4 | from pyhashxx import hashxx
 5 | hashfns = [hashxx]
 6 | try:
 7 |     import smhasher
 8 |     hashfns += [getattr(smhasher, k) for k in dir(smhasher) if k[0] == 'm']
 9 | except:
10 |     pass
11 | 
12 | print("Generating random data")
13 | chars = b'abcdefghijklmnopqrstuvwxyz'
14 | short_size = 20
15 | short = b''.join([bytes(random.choice(chars)) for i in range(short_size)])
16 | long_size = 1024
17 | long = b''.join([bytes(random.choice(chars)) for i in range(long_size)])
18 | extra_long_size = 64*1024*1024
19 | extra_long = long * 64
20 | 
21 | def time_trial(hashfn, size, data, number=1000000):
22 |     started = time.time()
23 |     # Avoid loop overhead
24 |     [hashfn(data) for i in range(number)]
25 |     finished = time.time()
26 |     duration = finished - started
27 |     print(" %d bytes %d times, %f s, %f ms/hash, %f hashes/s, %f MB/s" % (size, number, duration, duration/number*1000, number/duration, number*size/(duration*1024*1024)))
28 | 
29 | if __name__ == "__main__":
30 |     for hashfn in hashfns:
31 |         print("%s:" % hashfn.__name__)
32 |         time_trial(hashfn, short_size, short, number=10000000)
33 |         time_trial(hashfn, long_size, long, number=250000)
34 |         time_trial(hashfn, extra_long_size, extra_long, number=10000)
35 |         print()
36 | 


--------------------------------------------------------------------------------
/examples/simple.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function, unicode_literals
 2 | from pyhashxx import Hashxx, hashxx
 3 | 
 4 | def hash_one_value(val):
 5 |     '''
 6 |     Use the hashxx function to initialize, add data, and compute the
 7 |     digest in one go. Add the seed parameter if you need to control
 8 |     the initial state of the digest.
 9 |     '''
10 |     print("Hash of %s = %d" % (repr(val), hashxx(val, seed=0)))
11 | 
12 | def hash_multiple_values(vals):
13 |     '''
14 |     Same as hash_one_value but iterates over a list and uses the
15 |     Hashxx class so it can call update() multiple times, passing in
16 |     additional data on each call. This could also be useful for
17 |     streaming data, and also allows you to get the current (partial)
18 |     digest and continue adding data.
19 |     '''
20 |     hasher = Hashxx(seed=0)
21 |     for idx, val in enumerate(vals):
22 |         hasher.update(val)
23 |         print(" Intermediate hash up to %s = %d" % (repr(vals[:idx+1]), hasher.digest()))
24 |     print("Hash of %s = %d" % (repr(vals), hasher.digest()))
25 | 
26 | if __name__ == "__main__":
27 |     # Normally you'll use byte arrays (strings in Python2)
28 |     hash_one_value(b'Hello')
29 |     hash_one_value(b'Hello World!')
30 |     # Tuples can be passed in as a single value and will be treated as
31 |     # if you just concatenated them
32 |     hash_one_value((b'Hello', b' ', b'World!'))
33 |     # Lists need to be iterated over manually (they are not normally
34 |     # hashable in Python). Ultimately, the hash is the same as the
35 |     # previous tuple.
36 |     hash_multiple_values([b'Hello', b' ', b'World!'])
37 |     # Note that you *cannot* use Unicode values because their hash
38 |     # value would depend on the encoding. You need to convert them to
39 |     # bytes (or strings in Python2) before passing them for hashing to
40 |     # ensure you get the proper (and consistent) hash value.
41 |     try:
42 |         hash_one_value('Hello World!')
43 |     except TypeError as exc:
44 |         print('Trying to hash a unicode literal raised %s: "%s"' % (type(exc), exc,))
45 | 


--------------------------------------------------------------------------------
/pyhashxx/pycompat.h:
--------------------------------------------------------------------------------
 1 | // Python 2/3 Compatibility Helpers
 2 | // Assumes you've already included Python.h
 3 | //
 4 | // See http://python3porting.com/cextensions.html for the source of most of
 5 | // these.
 6 | 
 7 | #ifndef PyVarObject_HEAD_INIT
 8 | #define PyVarObject_HEAD_INIT(type, size) \
 9 |     PyObject_HEAD_INIT(type) size,
10 | #endif
11 | 
12 | #ifndef Py_TYPE
13 | #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
14 | #endif
15 | 
16 | 
17 | #if PY_MAJOR_VERSION >= 3
18 | #define MOD_DECL(ob, name, doc, methods) \
19 |     static struct PyModuleDef ob##_moduledef = { \
20 |         PyModuleDef_HEAD_INIT, name, doc, -1, methods, };
21 | #define MOD_DEF(ob) \
22 |     ob = PyModule_Create(&ob##_moduledef);
23 | #else
24 | #define MOD_DECL(ob, name, doc, methods) \
25 |     char* ob##_name = name; \
26 |     char* ob##_doc = doc; \
27 |     PyMethodDef* ob##_methods = methods;
28 | #define MOD_DEF(ob) \
29 |     ob = Py_InitModule3(ob##_name, ob##_methods, ob##_doc);
30 | #endif
31 | 
32 | #if PY_MAJOR_VERSION >= 3
33 | #define MOD_INIT(name) PyMODINIT_FUNC PyInit_##name(void)
34 | #define RETURN_MOD_INIT_ERROR return ((PyObject*)NULL)
35 | #define RETURN_MOD_INIT_SUCCESS(modvar) return modvar
36 | #else
37 | #define MOD_INIT(name) PyMODINIT_FUNC init##name(void)
38 | #define RETURN_MOD_INIT_ERROR return
39 | #define RETURN_MOD_INIT_SUCCESS(modvar) return
40 | #endif
41 | 


--------------------------------------------------------------------------------
/pyhashxx/pyhashxx.c:
--------------------------------------------------------------------------------
  1 | /**
  2 |  *  pyhashxx - Fast Hash Algorithm
  3 |  *  Copyright 2013, Ewen Cheslack-Postava
  4 |  *  BSD 2-Clause License -- See LICENSE file for details.
  5 |  */
  6 | 
  7 | #include <Python.h>
  8 | #include "pycompat.h"
  9 | #include "xxhash.h"
 10 | 
 11 | typedef struct {
 12 |     PyObject_HEAD
 13 |     void* xxhash_state;
 14 | } HashxxObject;
 15 | 
 16 | static void
 17 | Hashxx_dealloc(HashxxObject* self)
 18 | {
 19 |     XXH32_destroy(self->xxhash_state);
 20 |     Py_TYPE(self)->tp_free((PyObject*)self);
 21 | }
 22 | 
 23 | static PyObject *
 24 | Hashxx_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
 25 | {
 26 |     HashxxObject *self;
 27 | 
 28 |     self = (HashxxObject *)type->tp_alloc(type, 0);
 29 |     if (self != NULL) {
 30 |         self->xxhash_state = NULL;
 31 |     }
 32 | 
 33 |     return (PyObject *)self;
 34 | }
 35 | 
 36 | static int
 37 | Hashxx_init(HashxxObject *self, PyObject *args, PyObject *kwds)
 38 | {
 39 |     unsigned int seed = 0;
 40 |     static char *kwlist[] = {"seed", NULL};
 41 | 
 42 |     if (! PyArg_ParseTupleAndKeywords(args, kwds, "|I", kwlist,
 43 |             &seed))
 44 |         return -1;
 45 | 
 46 |     self->xxhash_state = XXH32_init(seed);
 47 | 
 48 |     return 0;
 49 | }
 50 | 
 51 | 
 52 | static PyObject* _update_hash(void* hash_state, PyObject* arg_obj) {
 53 |     Py_ssize_t tuple_length;
 54 |     Py_ssize_t tuple_i;
 55 |     PyObject* tuple_obj, *partial_result;
 56 | 
 57 | #if PY_MAJOR_VERSION >= 3
 58 |     if (PyBytes_Check(arg_obj)) {
 59 |         XXH32_update(hash_state, PyBytes_AsString(arg_obj), PyBytes_Size(arg_obj));
 60 |     }
 61 | #else
 62 |     if (PyString_Check(arg_obj)) {
 63 |         XXH32_update(hash_state, PyString_AsString(arg_obj), PyString_Size(arg_obj));
 64 |     }
 65 | #endif
 66 |     else if (PyByteArray_Check(arg_obj)) {
 67 |         XXH32_update(hash_state, PyByteArray_AsString(arg_obj), PyByteArray_Size(arg_obj));
 68 |     }
 69 |     else if (PyTuple_Check(arg_obj)) {
 70 |         tuple_length = PyTuple_GET_SIZE(arg_obj);
 71 |         for(tuple_i = 0; tuple_i < tuple_length; tuple_i++) {
 72 |             tuple_obj = PyTuple_GetItem(arg_obj, tuple_i);
 73 |             partial_result = _update_hash(hash_state, tuple_obj);
 74 |             // Check exceptions
 75 |             if (partial_result == NULL) return NULL;
 76 |         }
 77 |     }
 78 |     else if (arg_obj == Py_None) {
 79 |         Py_RETURN_NONE;
 80 |     }
 81 |     else if (PyUnicode_Check(arg_obj)) {
 82 |         PyErr_SetString(PyExc_TypeError, "Found unicode string, you must convert to bytes/str before hashing.");
 83 |         return NULL;
 84 |     }
 85 |     else {
 86 |         PyErr_Format(PyExc_TypeError, "Tried to hash unsupported type: %S.", Py_TYPE(arg_obj));
 87 |         return NULL;
 88 |     }
 89 | 
 90 |     Py_RETURN_NONE;
 91 | }
 92 | 
 93 | static PyObject *
 94 | Hashxx_update(HashxxObject* self, PyObject *args)
 95 | {
 96 |     Py_ssize_t arg_length = PyTuple_GET_SIZE(args);
 97 |     Py_ssize_t arg_i;
 98 |     PyObject* arg_obj, *partial_result;
 99 | 
100 |     if (arg_length == 0) {
101 |         PyErr_SetString(PyExc_TypeError, "Must provide arguments to hash to Hashxx.update.");
102 |         return NULL;
103 |     }
104 | 
105 |     for(arg_i = 0; arg_i < arg_length; arg_i++) {
106 |         arg_obj = PyTuple_GetItem(args, arg_i);
107 |         partial_result = _update_hash(self->xxhash_state, arg_obj);
108 |         // Check exceptions
109 |         if (partial_result == NULL) return NULL;
110 |     }
111 | 
112 |     Py_RETURN_NONE;
113 | }
114 | 
115 | 
116 | static PyObject *
117 | Hashxx_digest(HashxxObject* self)
118 | {
119 |     unsigned int digest = XXH32_digest(self->xxhash_state);
120 |     return Py_BuildValue("I", digest);
121 | }
122 | 
123 | static PyMethodDef Hashxx_methods[] = {
124 |     {"update", (PyCFunction)Hashxx_update, METH_VARARGS,
125 |      "Update the digest with new data."
126 |     },
127 |     {"digest", (PyCFunction)Hashxx_digest, METH_NOARGS,
128 |      "Return the current digest value of the data processed so far."
129 |     },
130 |     {NULL}  /* Sentinel */
131 | };
132 | 
133 | 
134 | 
135 | static PyTypeObject pyhashxx_HashxxType = {
136 |     PyVarObject_HEAD_INIT(NULL, 0)
137 |     "pyhashxx.Hashxx",         /*tp_name*/
138 |     sizeof(HashxxObject), /*tp_basicsize*/
139 |     0,                         /*tp_itemsize*/
140 |     (destructor)Hashxx_dealloc,  /*tp_dealloc*/
141 |     0,                         /*tp_print*/
142 |     0,                         /*tp_getattr*/
143 |     0,                         /*tp_setattr*/
144 |     0,                         /*tp_compare*/
145 |     0,                         /*tp_repr*/
146 |     0,                         /*tp_as_number*/
147 |     0,                         /*tp_as_sequence*/
148 |     0,                         /*tp_as_mapping*/
149 |     0,                         /*tp_hash */
150 |     0,                         /*tp_call*/
151 |     0,                         /*tp_str*/
152 |     0,                         /*tp_getattro*/
153 |     0,                         /*tp_setattro*/
154 |     0,                         /*tp_as_buffer*/
155 |     Py_TPFLAGS_DEFAULT,        /*tp_flags*/
156 |     "Hashxx objects",           /* tp_doc */
157 |     0,               /* tp_traverse */
158 |     0,               /* tp_clear */
159 |     0,               /* tp_richcompare */
160 |     0,               /* tp_weaklistoffset */
161 |     0,               /* tp_iter */
162 |     0,               /* tp_iternext */
163 |     Hashxx_methods,             /* tp_methods */
164 |     0,             /* tp_members */
165 |     0,                         /* tp_getset */
166 |     0,                         /* tp_base */
167 |     0,                         /* tp_dict */
168 |     0,                         /* tp_descr_get */
169 |     0,                         /* tp_descr_set */
170 |     0,                         /* tp_dictoffset */
171 |     (initproc)Hashxx_init,      /* tp_init */
172 |     0,                         /* tp_alloc */
173 |     Hashxx_new,                 /* tp_new */
174 | };
175 | 
176 | 
177 | 
178 | static PyObject *
179 | pyhashxx_hashxx(PyObject* self, PyObject *args, PyObject *kwds)
180 | {
181 |     unsigned int seed = 0;
182 |     const char* err_msg = NULL;
183 |     PyObject* err_obj = NULL;
184 |     Py_ssize_t args_len = 0;
185 |     unsigned int digest = 0;
186 |     void* state = NULL;
187 | 
188 |     if (kwds != NULL) {
189 |         Py_ssize_t kwds_size = PyDict_Size(kwds);
190 |         PyObject* seed_obj = PyDict_GetItemString(kwds, "seed");
191 | 
192 |         if (kwds_size > 1) {
193 |             err_msg = "Unexpected keyword arguments, only 'seed' is supported.";
194 |             goto badarg;
195 |         }
196 | 
197 |         if (kwds_size == 1) {
198 |             if (seed_obj == NULL) {
199 |                 err_msg = "Unexpected keyword argument, only 'seed' is supported.";
200 |                 goto badarg;
201 |             }
202 | #if PY_MAJOR_VERSION < 3
203 |             if (PyInt_Check(seed_obj))
204 |                 seed = PyInt_AsLong(seed_obj);
205 |             else
206 | #endif
207 |                 if (PyLong_Check(seed_obj))
208 |                 seed = PyLong_AsLong(seed_obj);
209 |             else {
210 |                 err_msg = "Unexpected seed value type: %S";
211 |                 err_obj = seed_obj;
212 |                 goto badseed;
213 |             }
214 |         }
215 |     }
216 |     args_len = PyTuple_GET_SIZE(args);
217 |     if (args_len == 0) {
218 |         err_msg = "Received no arguments to be hashed.";
219 |         goto badarg;
220 |     }
221 | 
222 |     // If possible, use the shorter, faster version that elides
223 |     // allocating the state variable because it knows there is only
224 |     // one input.
225 |     if (args_len == 1) {
226 |         PyObject* hash_obj = PyTuple_GetItem(args, 0);
227 |         int did_hash = 1;
228 | #if PY_MAJOR_VERSION >= 3
229 |         if (PyBytes_Check(hash_obj)) {
230 |             digest = XXH32(PyBytes_AsString(hash_obj), PyBytes_Size(hash_obj), seed);
231 |         }
232 | #else
233 |         if (PyString_Check(hash_obj)) {
234 |             digest = XXH32(PyString_AsString(hash_obj), PyString_Size(hash_obj), seed);
235 |         }
236 | #endif
237 |         else if (PyByteArray_Check(hash_obj)) {
238 |             digest = XXH32(PyByteArray_AsString(hash_obj), PyByteArray_Size(hash_obj), seed);
239 |         }
240 |         else if (hash_obj == Py_None) {
241 |             // Nothing to hash
242 |             digest = XXH32("", 0, seed);
243 |         }
244 |         else {
245 |             did_hash = 0;
246 |         }
247 | 
248 |         if (did_hash)
249 |             return Py_BuildValue("I", digest);
250 |     }
251 | 
252 |     // Otherwise, do it the long, slower way
253 |     state = XXH32_init(seed);
254 |     if (_update_hash(state, args) == NULL) {
255 |         XXH32_destroy(state);
256 |         return NULL;
257 |     }
258 |     digest = XXH32_digest(state);
259 |     XXH32_destroy(state);
260 | 
261 |     return Py_BuildValue("I", digest);
262 | 
263 | badarg:
264 |     PyErr_SetString(PyExc_TypeError, err_msg);
265 |     return NULL;
266 | badseed:
267 |     PyErr_Format(PyExc_TypeError, err_msg, Py_TYPE(err_obj));
268 |     return NULL;
269 | }
270 | 
271 | static PyMethodDef pyhashxx_methods[] = {
272 |     {"hashxx", (PyCFunction)pyhashxx_hashxx, METH_VARARGS | METH_KEYWORDS,
273 |      "Compute the xxHash value for the given value, optionally providing a seed."
274 |     },
275 |     {NULL}  /* Sentinel */
276 | };
277 | 
278 | 
279 | 
280 | MOD_INIT(pyhashxx) {
281 |     PyObject* m;
282 |     MOD_DECL(m, "pyhashxx",
283 |         "Python wrapper of the xxHash fast hash algorithm.",
284 |         pyhashxx_methods);
285 | 
286 |     if (PyType_Ready(&pyhashxx_HashxxType) < 0)
287 |         RETURN_MOD_INIT_ERROR;
288 | 
289 |     MOD_DEF(m);
290 |     if (m == NULL)
291 |         RETURN_MOD_INIT_ERROR;
292 | 
293 |     Py_INCREF(&pyhashxx_HashxxType);
294 |     PyModule_AddObject(m, "Hashxx", (PyObject *)&pyhashxx_HashxxType);
295 | 
296 |     RETURN_MOD_INIT_SUCCESS(m);
297 | }
298 | 


--------------------------------------------------------------------------------
/pyhashxx/xxhash.c:
--------------------------------------------------------------------------------
  1 | /*
  2 | xxHash - Fast Hash algorithm
  3 | Copyright (C) 2012-2013, Yann Collet.
  4 | BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
  5 | 
  6 | Redistribution and use in source and binary forms, with or without
  7 | modification, are permitted provided that the following conditions are
  8 | met:
  9 | 
 10 | * Redistributions of source code must retain the above copyright
 11 | notice, this list of conditions and the following disclaimer.
 12 | * Redistributions in binary form must reproduce the above
 13 | copyright notice, this list of conditions and the following disclaimer
 14 | in the documentation and/or other materials provided with the
 15 | distribution.
 16 | 
 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 28 | 
 29 | You can contact the author at :
 30 | - xxHash source repository : http://code.google.com/p/xxhash/
 31 | */
 32 | 
 33 | 
 34 | 
 35 | //**************************************
 36 | // Tuning parameters
 37 | //**************************************
 38 | // XXH_ACCEPT_NULL_INPUT_POINTER :
 39 | // If the input pointer is a null pointer, xxHash default behavior is to crash, since it is a bad input.
 40 | // If this option is enabled, xxHash output for null input pointers will be the same as a null-length input.
 41 | // This option has a very small performance cost (only measurable on small inputs).
 42 | // By default, this option is disabled. To enable it, uncomment below define :
 43 | //#define XXH_ACCEPT_NULL_INPUT_POINTER 1
 44 | 
 45 | // XXH_FORCE_NATIVE_FORMAT :
 46 | // By default, xxHash library provides endian-independant Hash values, based on little-endian convention.
 47 | // Results are therefore identical for little-endian and big-endian CPU.
 48 | // This comes at a  performance cost for big-endian CPU, since some swapping is required to emulate little-endian format.
 49 | // Should endian-independance be of no importance to your application, you may uncomment the #define below
 50 | // It will improve speed for Big-endian CPU.
 51 | // This option has no impact on Little_Endian CPU.
 52 | //#define XXH_FORCE_NATIVE_FORMAT 1
 53 | 
 54 | 
 55 | 
 56 | //**************************************
 57 | // Includes
 58 | //**************************************
 59 | #include <stdlib.h>    // for malloc(), free()
 60 | #include <string.h>    // for memcpy()
 61 | #include "xxhash.h"
 62 | 
 63 | 
 64 | 
 65 | //**************************************
 66 | // CPU Feature Detection
 67 | //**************************************
 68 | // Little Endian or Big Endian ?
 69 | // You can overwrite the #define below if you know your architecture endianess
 70 | #if defined(XXH_FORCE_NATIVE_FORMAT) && (XXH_FORCE_NATIVE_FORMAT==1)
 71 | // Force native format. The result will be endian dependant.
 72 | #  define XXH_BIG_ENDIAN 0
 73 | #elif defined (__GLIBC__)
 74 | #  include <endian.h>
 75 | #  if (__BYTE_ORDER == __BIG_ENDIAN)
 76 | #     define XXH_BIG_ENDIAN 1
 77 | #  endif
 78 | #elif (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN)) && !(defined(__LITTLE_ENDIAN__) || defined(__LITTLE_ENDIAN) || defined(_LITTLE_ENDIAN))
 79 | #  define XXH_BIG_ENDIAN 1
 80 | #elif defined(__sparc) || defined(__sparc__) \
 81 |     || defined(__ppc__) || defined(_POWER) || defined(__powerpc__) || defined(_ARCH_PPC) || defined(__PPC__) || defined(__PPC) || defined(PPC) || defined(__powerpc__) || defined(__powerpc) || defined(powerpc) \
 82 |     || defined(__hpux)  || defined(__hppa) \
 83 |     || defined(_MIPSEB) || defined(__s390__)
 84 | #  define XXH_BIG_ENDIAN 1
 85 | #endif
 86 | 
 87 | #if !defined(XXH_BIG_ENDIAN)
 88 | // Little Endian assumed. PDP Endian and other very rare endian format are unsupported.
 89 | #  define XXH_BIG_ENDIAN 0
 90 | #endif
 91 | 
 92 | 
 93 | //**************************************
 94 | // Basic Types
 95 | //**************************************
 96 | #if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   // C99
 97 | # include <stdint.h>
 98 |   typedef uint8_t  BYTE;
 99 |   typedef uint16_t U16;
100 |   typedef uint32_t U32;
101 |   typedef  int32_t S32;
102 |   typedef uint64_t U64;
103 | #else
104 |   typedef unsigned char       BYTE;
105 |   typedef unsigned short      U16;
106 |   typedef unsigned int        U32;
107 |   typedef   signed int        S32;
108 |   typedef unsigned long long  U64;
109 | #endif
110 | 
111 | 
112 | //**************************************
113 | // Compiler-specific Options & Functions
114 | //**************************************
115 | #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
116 | 
117 | // Note : under GCC, it may sometimes be faster to enable the (2nd) macro definition, instead of using win32 intrinsic
118 | #if defined(_WIN32)
119 | #  define XXH_rotl32(x,r) _rotl(x,r)
120 | #else
121 | #  define XXH_rotl32(x,r) ((x << r) | (x >> (32 - r)))
122 | #endif
123 | 
124 | #if defined(_MSC_VER)     // Visual Studio
125 | #  define XXH_swap32 _byteswap_ulong
126 | #elif GCC_VERSION >= 403
127 | #  define XXH_swap32 __builtin_bswap32
128 | #else
129 | static inline U32 XXH_swap32 (U32 x) {
130 |     return  ((x << 24) & 0xff000000 ) |
131 |         ((x <<  8) & 0x00ff0000 ) |
132 |         ((x >>  8) & 0x0000ff00 ) |
133 |         ((x >> 24) & 0x000000ff );}
134 | #endif
135 | 
136 | 
137 | //**************************************
138 | // Constants
139 | //**************************************
140 | #define PRIME32_1   2654435761U
141 | #define PRIME32_2   2246822519U
142 | #define PRIME32_3   3266489917U
143 | #define PRIME32_4    668265263U
144 | #define PRIME32_5    374761393U
145 | 
146 | 
147 | //**************************************
148 | // Macros
149 | //**************************************
150 | #define XXH_LE32(p)  (XXH_BIG_ENDIAN ? XXH_swap32(*(U32*)(p)) : *(U32*)(p))
151 | 
152 | 
153 | 
154 | //****************************
155 | // Simple Hash Functions
156 | //****************************
157 | 
158 | U32 XXH32(const void* input, int len, U32 seed)
159 | {
160 | #if 0
161 |     // Simple version, good for code maintenance, but unfortunately slow for small inputs
162 |     void* state = XXH32_init(seed);
163 |     XXH32_update(state, input, len);
164 |     U32 result = XXH32_digest(state);
165 |     XXH32_destroy(state);
166 |     return result;
167 | #else
168 | 
169 |     const BYTE* p = (const BYTE*)input;
170 |     const BYTE* const bEnd = p + len;
171 |     U32 h32;
172 | 
173 | #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
174 |     if (p==NULL) { len=0; p=(const BYTE*)16; }
175 | #endif
176 | 
177 |     if (len>=16)
178 |     {
179 |         const BYTE* const limit = bEnd - 16;
180 |         U32 v1 = seed + PRIME32_1 + PRIME32_2;
181 |         U32 v2 = seed + PRIME32_2;
182 |         U32 v3 = seed + 0;
183 |         U32 v4 = seed - PRIME32_1;
184 | 
185 |         do
186 |         {
187 |             v1 += XXH_LE32(p) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4;
188 |             v2 += XXH_LE32(p) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4;
189 |             v3 += XXH_LE32(p) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4;
190 |             v4 += XXH_LE32(p) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4;
191 |         } while (p<=limit);
192 | 
193 |         h32 = XXH_rotl32(v1, 1) + XXH_rotl32(v2, 7) + XXH_rotl32(v3, 12) + XXH_rotl32(v4, 18);
194 |     }
195 |     else
196 |     {
197 |         h32  = seed + PRIME32_5;
198 |     }
199 | 
200 |     h32 += (U32) len;
201 | 
202 |     while (p<=bEnd-4)
203 |     {
204 |         h32 += XXH_LE32(p) * PRIME32_3;
205 |         h32 = XXH_rotl32(h32, 17) * PRIME32_4 ;
206 |         p+=4;
207 |     }
208 | 
209 |     while (p<bEnd)
210 |     {
211 |         h32 += (*p) * PRIME32_5;
212 |         h32 = XXH_rotl32(h32, 11) * PRIME32_1 ;
213 |         p++;
214 |     }
215 | 
216 |     h32 ^= h32 >> 15;
217 |     h32 *= PRIME32_2;
218 |     h32 ^= h32 >> 13;
219 |     h32 *= PRIME32_3;
220 |     h32 ^= h32 >> 16;
221 | 
222 |     return h32;
223 | 
224 | #endif
225 | }
226 | 
227 | 
228 | //****************************
229 | // Advanced Hash Functions
230 | //****************************
231 | 
232 | struct XXH_state32_t
233 | {
234 |     U32 seed;
235 |     U32 v1;
236 |     U32 v2;
237 |     U32 v3;
238 |     U32 v4;
239 |     U64 total_len;
240 |     char memory[16];
241 |     int memsize;
242 | };
243 | 
244 | 
245 | int XXH32_sizeofState(void) { return sizeof(struct XXH_state32_t); }
246 | 
247 | 
248 | XXH_errorcode XXH32_resetState(void* state_in, unsigned int seed)
249 | {
250 |     struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
251 |     state->seed = seed;
252 |     state->v1 = seed + PRIME32_1 + PRIME32_2;
253 |     state->v2 = seed + PRIME32_2;
254 |     state->v3 = seed + 0;
255 |     state->v4 = seed - PRIME32_1;
256 |     state->total_len = 0;
257 |     state->memsize = 0;
258 |     return OK;
259 | }
260 | 
261 | 
262 | void* XXH32_init (U32 seed)
263 | {
264 |     struct XXH_state32_t * state = (struct XXH_state32_t *) malloc (sizeof(struct XXH_state32_t));
265 |     XXH32_resetState(state, seed);
266 |     return (void*)state;
267 | }
268 | 
269 | 
270 | XXH_errorcode XXH32_update (void* state_in, const void* input, int len)
271 | {
272 |     struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
273 |     const BYTE* p = (const BYTE*)input;
274 |     const BYTE* const bEnd = p + len;
275 | 
276 | #ifdef XXH_ACCEPT_NULL_INPUT_POINTER
277 |     if (input==NULL) return XXH_ERROR;
278 | #endif
279 | 
280 |     state->total_len += len;
281 | 
282 |     if (state->memsize + len < 16)   // fill in tmp buffer
283 |     {
284 |         memcpy(state->memory + state->memsize, input, len);
285 |         state->memsize +=  len;
286 |         return OK;
287 |     }
288 | 
289 |     if (state->memsize)   // some data left from previous update
290 |     {
291 |         memcpy(state->memory + state->memsize, input, 16-state->memsize);
292 |         {
293 |             const U32* p32 = (const U32*)state->memory;
294 |             state->v1 += XXH_LE32(p32) * PRIME32_2; state->v1 = XXH_rotl32(state->v1, 13); state->v1 *= PRIME32_1; p32++;
295 |             state->v2 += XXH_LE32(p32) * PRIME32_2; state->v2 = XXH_rotl32(state->v2, 13); state->v2 *= PRIME32_1; p32++;
296 |             state->v3 += XXH_LE32(p32) * PRIME32_2; state->v3 = XXH_rotl32(state->v3, 13); state->v3 *= PRIME32_1; p32++;
297 |             state->v4 += XXH_LE32(p32) * PRIME32_2; state->v4 = XXH_rotl32(state->v4, 13); state->v4 *= PRIME32_1; p32++;
298 |         }
299 |         p += 16-state->memsize;
300 |         state->memsize = 0;
301 |     }
302 | 
303 |     if (p <= bEnd-16)
304 |     {
305 |         const BYTE* const limit = bEnd - 16;
306 |         U32 v1 = state->v1;
307 |         U32 v2 = state->v2;
308 |         U32 v3 = state->v3;
309 |         U32 v4 = state->v4;
310 | 
311 |         do
312 |         {
313 |             v1 += XXH_LE32(p) * PRIME32_2; v1 = XXH_rotl32(v1, 13); v1 *= PRIME32_1; p+=4;
314 |             v2 += XXH_LE32(p) * PRIME32_2; v2 = XXH_rotl32(v2, 13); v2 *= PRIME32_1; p+=4;
315 |             v3 += XXH_LE32(p) * PRIME32_2; v3 = XXH_rotl32(v3, 13); v3 *= PRIME32_1; p+=4;
316 |             v4 += XXH_LE32(p) * PRIME32_2; v4 = XXH_rotl32(v4, 13); v4 *= PRIME32_1; p+=4;
317 |         } while (p<=limit);
318 | 
319 |         state->v1 = v1;
320 |         state->v2 = v2;
321 |         state->v3 = v3;
322 |         state->v4 = v4;
323 |     }
324 | 
325 |     if (p < bEnd)
326 |     {
327 |         memcpy(state->memory, p, bEnd-p);
328 |         state->memsize = (int)(bEnd-p);
329 |     }
330 | 
331 |     return OK;
332 | }
333 | 
334 | 
335 | U32 XXH32_digest (void* state_in)
336 | {
337 |     struct XXH_state32_t * state = (struct XXH_state32_t *) state_in;
338 |     BYTE * p   = (BYTE*)state->memory;
339 |     BYTE* bEnd = (BYTE*)state->memory + state->memsize;
340 |     U32 h32;
341 | 
342 | 
343 |     if (state->total_len >= 16)
344 |     {
345 |         h32 = XXH_rotl32(state->v1, 1) + XXH_rotl32(state->v2, 7) + XXH_rotl32(state->v3, 12) + XXH_rotl32(state->v4, 18);
346 |     }
347 |     else
348 |     {
349 |         h32  = state->seed + PRIME32_5;
350 |     }
351 | 
352 |     h32 += (U32) state->total_len;
353 | 
354 |     while (p<=bEnd-4)
355 |     {
356 |         h32 += XXH_LE32(p) * PRIME32_3;
357 |         h32 = XXH_rotl32(h32, 17) * PRIME32_4;
358 |         p+=4;
359 |     }
360 | 
361 |     while (p<bEnd)
362 |     {
363 |         h32 += (*p) * PRIME32_5;
364 |         h32 = XXH_rotl32(h32, 11) * PRIME32_1;
365 |         p++;
366 |     }
367 | 
368 |     h32 ^= h32 >> 15;
369 |     h32 *= PRIME32_2;
370 |     h32 ^= h32 >> 13;
371 |     h32 *= PRIME32_3;
372 |     h32 ^= h32 >> 16;
373 | 
374 |     return h32;
375 | }
376 | 
377 | 
378 | void XXH32_destroy (void* state_in)
379 | {
380 |     free(state_in);
381 | }
382 | 


--------------------------------------------------------------------------------
/pyhashxx/xxhash.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |    xxHash - Fast Hash algorithm
  3 |    Header File
  4 |    Copyright (C) 2012-2013, Yann Collet.
  5 |    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
  6 | 
  7 |    Redistribution and use in source and binary forms, with or without
  8 |    modification, are permitted provided that the following conditions are
  9 |    met:
 10 | 
 11 |        * Redistributions of source code must retain the above copyright
 12 |    notice, this list of conditions and the following disclaimer.
 13 |        * Redistributions in binary form must reproduce the above
 14 |    copyright notice, this list of conditions and the following disclaimer
 15 |    in the documentation and/or other materials provided with the
 16 |    distribution.
 17 | 
 18 |    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
 19 |    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
 20 |    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
 21 |    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
 22 |    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 23 |    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 24 |    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 25 |    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 26 |    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 27 |    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 28 |    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 | 
 30 | 	You can contact the author at :
 31 | 	- xxHash source repository : http://code.google.com/p/xxhash/
 32 | */
 33 | 
 34 | /* Notice extracted from xxHash homepage :
 35 | 
 36 | xxHash is an extremely fast Hash algorithm, running at RAM speed limits.
 37 | It also successfully passes all tests from the SMHasher suite.
 38 | 
 39 | Comparison (single thread, Windows Seven 32 bits, using SMHasher on a Core 2 Duo @3GHz)
 40 | 
 41 | Name            Speed       Q.Score   Author
 42 | xxHash          5.4 GB/s     10
 43 | CrapWow         3.2 GB/s      2       Andrew
 44 | MumurHash 3a    2.7 GB/s     10       Austin Appleby
 45 | SpookyHash      2.0 GB/s     10       Bob Jenkins
 46 | SBox            1.4 GB/s      9       Bret Mulvey
 47 | Lookup3         1.2 GB/s      9       Bob Jenkins
 48 | SuperFastHash   1.2 GB/s      1       Paul Hsieh
 49 | CityHash64      1.05 GB/s    10       Pike & Alakuijala
 50 | FNV             0.55 GB/s     5       Fowler, Noll, Vo
 51 | CRC32           0.43 GB/s     9
 52 | MD5-32          0.33 GB/s    10       Ronald L. Rivest
 53 | SHA1-32         0.28 GB/s    10
 54 | 
 55 | Q.Score is a measure of quality of the hash function.
 56 | It depends on successfully passing SMHasher test set.
 57 | 10 is a perfect score.
 58 | */
 59 | 
 60 | #pragma once
 61 | 
 62 | #if defined (__cplusplus)
 63 | extern "C" {
 64 | #endif
 65 | 
 66 | 
 67 | //****************************
 68 | // Type
 69 | //****************************
 70 | typedef enum { OK=0, XXH_ERROR } XXH_errorcode;
 71 | 
 72 | 
 73 | 
 74 | //****************************
 75 | // Simple Hash Functions
 76 | //****************************
 77 | 
 78 | unsigned int XXH32 (const void* input, int len, unsigned int seed);
 79 | 
 80 | /*
 81 | XXH32() :
 82 | 	Calculate the 32-bits hash of sequence of length "len" stored at memory address "input".
 83 |     The memory between input & input+len must be valid (allocated and read-accessible).
 84 | 	"seed" can be used to alter the result predictably.
 85 | 	This function successfully passes all SMHasher tests.
 86 | 	Speed on Core 2 Duo @ 3 GHz (single thread, SMHasher benchmark) : 5.4 GB/s
 87 | 	Note that "len" is type "int", which means it is limited to 2^31-1.
 88 | 	If your data is larger, use the advanced functions below.
 89 | */
 90 | 
 91 | 
 92 | 
 93 | //****************************
 94 | // Advanced Hash Functions
 95 | //****************************
 96 | 
 97 | void*         XXH32_init   (unsigned int seed);
 98 | XXH_errorcode XXH32_update (void* state, const void* input, int len);
 99 | unsigned int  XXH32_digest (void* state);
100 | void  XXH32_destroy (void* state);
101 | 
102 | /*
103 | These functions calculate the xxhash of an input provided in several small packets,
104 | as opposed to an input provided as a single block.
105 | 
106 | It must be started with :
107 | void* XXH32_init()
108 | The function returns a pointer which holds the state of calculation.
109 | 
110 | This pointer must be provided as "void* state" parameter for XXH32_update().
111 | XXH32_update() can be called as many times as necessary.
112 | The user must provide a valid (allocated) input.
113 | The function returns an error code, with 0 meaning OK, and any other value meaning there is an error.
114 | Note that "len" is type "int", which means it is limited to 2^31-1.
115 | If your data is larger, it is recommended to chunk your data into blocks
116 | of size for example 2^30 (1GB) to avoid any "int" overflow issue.
117 | 
118 | Finally, you can end the calculation anytime, by using XXH32_digest().
119 | This function returns the final 32-bits hash.
120 | You must provide the same "void* state" parameter created by XXH32_init().
121 | 
122 | When you are done computing digests, use XXH32_destroy() to clean up
123 | the state, freeing memory associated with the hash calculation state.
124 | */
125 | 
126 | 
127 | int           XXH32_sizeofState(void);
128 | XXH_errorcode XXH32_resetState(void* state_in, unsigned int seed);
129 | /*
130 | These functions are the basic elements of XXH32_init();
131 | The objective is to allow user application to make its own allocation.
132 | 
133 | XXH32_sizeofState() is used to know how much space must be allocated by the application.
134 | This space must be referenced by a void* pointer.
135 | This pointer must be provided as 'state_in' into XXH32_resetState(), which initializes the state.
136 | */
137 | 
138 | 
139 | #if defined (__cplusplus)
140 | }
141 | #endif
142 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import find_packages, setup, Extension
 2 | 
 3 | headers = [  'pyhashxx/xxhash.h',
 4 |              'pyhashxx/pycompat.h',
 5 |          ]
 6 | sources = [ 'pyhashxx/xxhash.c',
 7 |             'pyhashxx/pyhashxx.c',
 8 |         ]
 9 | pyhashxx = Extension('pyhashxx', sources=sources, depends=headers)
10 | 
11 | setup(
12 |     name = "pyhashxx",
13 |     version = "0.1.3",
14 |     description = "Python wrapper for xxHash algorithm",
15 |     author = "Ewen Cheslack-Postava",
16 |     author_email = 'me@ewencp.org',
17 |     platforms=["any"],
18 |     license="BSD",
19 |     url = "http://github.com/ewencp/pyhashxx",
20 |     packages = find_packages(),
21 |     ext_modules = [ pyhashxx ],
22 |     test_suite = "tests",
23 |     headers = headers,
24 | )
25 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ewencp/pyhashxx/80b827612d8d4e3b3cb9a581aa22331b89ca8883/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test_hash_bytes.py:
--------------------------------------------------------------------------------
 1 | from __future__ import unicode_literals
 2 | from pyhashxx import Hashxx
 3 | import unittest
 4 | 
 5 | class TestHashBytes(unittest.TestCase):
 6 | 
 7 |     def test_empty_string(self):
 8 |         h = Hashxx()
 9 |         h.update(b'')
10 |         self.assertEqual(h.digest(), 46947589)
11 | 
12 |     def test_one_string(self):
13 |         h = Hashxx()
14 |         h.update(b'hello')
15 |         self.assertEqual(h.digest(), 4211111929)
16 | 
17 |         h = Hashxx()
18 |         h.update(b'goodbye')
19 |         self.assertEqual(h.digest(), 2269043192)
20 | 
21 | 
22 |     def test_multiple_strings(self):
23 |         h = Hashxx()
24 |         h.update(b'hello')
25 |         h.update(b'goodbye')
26 |         self.assertEqual(h.digest(), 4110974955)
27 | 
28 |     def test_tuple(self):
29 |         # Tuples shouldn't affect the hash, they should be equivalent to hashing
30 |         # each part in a separate update
31 |         h = Hashxx()
32 |         h.update((b'hello',b'goodbye'))
33 |         self.assertEqual(h.digest(), 4110974955)
34 | 
35 |     def test_seeds(self):
36 |         h = Hashxx(seed=0)
37 |         h.update(b'hello')
38 |         self.assertEqual(h.digest(), 4211111929)
39 | 
40 |         h = Hashxx(seed=1)
41 |         h.update(b'hello')
42 |         self.assertEqual(h.digest(), 4244634537)
43 | 
44 |         h = Hashxx(seed=2)
45 |         h.update(b'hello')
46 |         self.assertEqual(h.digest(), 4191738725)
47 | 
48 |     def hash_value(self, val, seed=0):
49 |         h = Hashxx(seed=seed)
50 |         h.update(val)
51 |         return h.digest()
52 | 
53 |     def test_incremental(self):
54 |         # Make sure incrementally computed results match those
55 |         # computed all at once
56 |         hello_hash = self.hash_value(b'hello')
57 |         hello_world_hash = self.hash_value(b'helloworld')
58 | 
59 |         h = Hashxx()
60 |         h.update(b'hello')
61 |         self.assertEqual(h.digest(), hello_hash)
62 |         h.update(b'world')
63 |         self.assertEqual(h.digest(), hello_world_hash)
64 | 
65 | 
66 |     def test_simultaneous(self):
67 |         # Ensure that interleaved updates still give same results as
68 |         # independent
69 |         h1 = Hashxx()
70 |         h2 = Hashxx()
71 | 
72 |         h1.update(b'he')
73 |         h2.update(b'goo')
74 |         h1.update(b'll')
75 |         h2.update(b'db')
76 |         h1.update(b'o')
77 |         h2.update(b'ye')
78 | 
79 |         self.assertEqual(h1.digest(), self.hash_value(b'hello'))
80 |         self.assertEqual(h2.digest(), self.hash_value(b'goodbye'))
81 | 
82 | 
83 |     def test_bad_seed(self):
84 |         self.assertRaises(TypeError, Hashxx, seed="badseed")
85 | 
86 |     def test_bad_arg(self):
87 |         h = Hashxx()
88 |         self.assertRaises(TypeError, h.update, [1,2,3])
89 | 
90 |     def test_no_args(self):
91 |         h = Hashxx()
92 |         self.assertRaises(TypeError, h.update)
93 | 
94 |     def test_no_unicode(self):
95 |         h = Hashxx()
96 |         self.assertRaises(TypeError, h.update, 'hello')
97 | 


--------------------------------------------------------------------------------
/tests/test_oneshot.py:
--------------------------------------------------------------------------------
 1 | from __future__ import unicode_literals
 2 | from pyhashxx import hashxx, Hashxx
 3 | import unittest
 4 | 
 5 | class TestOneShot(unittest.TestCase):
 6 |     # The shorthand should be equivalent to this simple function:
 7 |     def hash_value(self, val, seed=0):
 8 |         h = Hashxx(seed=seed)
 9 |         h.update(val)
10 |         return h.digest()
11 | 
12 |     def test_empty_string(self):
13 |         self.assertEqual(hashxx(b''), self.hash_value(b''))
14 | 
15 |     def test_string(self):
16 |         self.assertEqual(hashxx(b'hello'), self.hash_value(b'hello'))
17 | 
18 |     def test_seeds(self):
19 |         self.assertNotEqual(hashxx(b'hello', seed=0), hashxx(b'hello', seed=1))
20 | 
21 |         self.assertEqual(hashxx(b'hello', seed=0), self.hash_value(b'hello', seed=0))
22 |         self.assertEqual(hashxx(b'hello', seed=1), self.hash_value(b'hello', seed=1))
23 |         self.assertEqual(hashxx(b'hello', seed=2), self.hash_value(b'hello', seed=2))
24 | 
25 |     def test_bad_arg(self):
26 |         self.assertRaises(TypeError, hashxx, [1, 2, 3])
27 | 
28 |     def test_bad_seed(self):
29 |         self.assertRaises(TypeError, hashxx, seed="badseed")
30 | 
31 |     def test_no_args(self):
32 |         self.assertRaises(TypeError, hashxx)
33 | 
34 |     def test_no_unicode(self):
35 |         self.assertRaises(TypeError, hashxx, 'hello')
36 | 


--------------------------------------------------------------------------------