├── update_c.sh
├── bench
    ├── __init__.py
    ├── words100k.txt.zip
    └── speed.py
├── tests
    ├── __init__.py
    ├── test_inttrie.py
    ├── test_floattrie.py
    ├── test_base_trie.py
    └── test_trie.py
├── hat-trie
    ├── TODO
    ├── .travis.yml
    ├── Makefile.am
    ├── src
    │   ├── murmurhash3.h
    │   ├── Makefile.am
    │   ├── common.h
    │   ├── misc.h
    │   ├── misc.c
    │   ├── murmurhash3.c
    │   ├── hat-trie.h
    │   ├── ahtable.h
    │   ├── ahtable.c
    │   ├── hat-trie.c
    │   └── pstdint.h
    ├── .gitignore
    ├── hat-trie-0.1.pc.in
    ├── test
    │   ├── Makefile.am
    │   ├── str_map.h
    │   ├── bench_sorted_iter.c
    │   ├── check_ahtable.c
    │   ├── str_map.c
    │   └── check_hattrie.c
    ├── configure.ac
    ├── COPYING
    └── README.md
├── .hgtags
├── tox.ini
├── .travis.yml
├── .gitignore
├── MANIFEST.in
├── bench.ini
├── .hgignore
├── CHANGES.rst
├── LICENSE
├── src
    ├── chat_trie.pxd
    ├── hat_trie.pyx
    └── chat_trie.c
├── setup.py
└── README.rst


/update_c.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | cython src/hat_trie.pyx src/chat_trie.pxd -a


--------------------------------------------------------------------------------
/bench/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import absolute_import


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | from __future__ import absolute_import


--------------------------------------------------------------------------------
/bench/words100k.txt.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pytries/hat-trie/HEAD/bench/words100k.txt.zip


--------------------------------------------------------------------------------
/hat-trie/TODO:
--------------------------------------------------------------------------------
1 | 
2 | todo:
3 |   * Deletion in ahtable.
4 |   * Deletion in hattrie.
5 | 
6 | 
7 | 


--------------------------------------------------------------------------------
/.hgtags:
--------------------------------------------------------------------------------
1 | 4b4c246aae46e501bb55e24495eb30de41ff0b8d 0.1
2 | 1871f420d10a49dae6f35677de5bb28b6b2cc71a 0.2
3 | 


--------------------------------------------------------------------------------
/hat-trie/.travis.yml:
--------------------------------------------------------------------------------
1 | language: c
2 | compiler:
3 |     - clang
4 |     - gcc
5 | before_script: autoreconf -i
6 | script: ./configure && make && make check
7 | 


--------------------------------------------------------------------------------
/hat-trie/Makefile.am:
--------------------------------------------------------------------------------
 1 | 
 2 | SUBDIRS = src test
 3 | 
 4 | EXTRA_DIST = README.md COPYING
 5 | 
 6 | pkgconfigdir = $(libdir)/pkgconfig
 7 | pkgconfig_DATA = hat-trie-0.1.pc
 8 | 
 9 | ACLOCAL_AMFLAGS=-I m4
10 | 
11 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py27,py33,py34,py35
 3 | 
 4 | [testenv]
 5 | deps =
 6 |     pytest
 7 |     # psutil
 8 | commands=
 9 |     pip install -I .
10 |     py.test []
11 | #    python bench/speed.py
12 | 


--------------------------------------------------------------------------------
/hat-trie/src/murmurhash3.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef MURMURHASH3_H
 3 | #define MURMURHASH3_H
 4 | 
 5 | #include <stdlib.h>
 6 | 
 7 | #include "pstdint.h"
 8 | 
 9 | uint32_t hash(const char* data, size_t len);
10 | 
11 | #endif
12 | 
13 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | python:
 3 |   - "3.5"
 4 |   - "3.4"
 5 |   - "3.3"
 6 |   - "2.7"
 7 | #  - "pypy"
 8 | 
 9 | install:
10 |   - pip install cython
11 |   - python setup.py install
12 | 
13 | script:
14 | - ./update_c.sh
15 | - py.test
16 | 


--------------------------------------------------------------------------------
/hat-trie/.gitignore:
--------------------------------------------------------------------------------
 1 | *.la
 2 | *.lo
 3 | *.o
 4 | *~
 5 | .DS_Store
 6 | .deps
 7 | .libs
 8 | Makefile
 9 | Makefile.in
10 | aclocal.m4
11 | autom4te.cache
12 | config.*
13 | configure
14 | depcomp
15 | hat-trie-*.pc
16 | hat-trie-*.tar.gz
17 | install-sh
18 | libtool
19 | ltmain.sh
20 | m4
21 | missing
22 | 


--------------------------------------------------------------------------------
/hat-trie/hat-trie-0.1.pc.in:
--------------------------------------------------------------------------------
 1 | 
 2 | prefix=@prefix@
 3 | exec_prefix=@exec_prefix@
 4 | libdir=@libdir@
 5 | includedir=@includedir@
 6 | 
 7 | Name: @PACKAGE_NAME@
 8 | Description: An efficient trie implementation.
 9 | Version: @PACKAGE_VERSION@
10 | Cflags: -I{includedir}
11 | Libs: -L${libdir}
12 | 
13 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | MANIFEST
 2 | src/*.html
 3 | 
 4 | *.py[cod]
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Packages
10 | *.egg
11 | *.egg-info
12 | dist
13 | build
14 | sdist
15 | __pycache__
16 | 
17 | # Installer logs
18 | pip-log.txt
19 | 
20 | # Unit test / coverage reports
21 | .coverage
22 | .tox
23 | .cache
24 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | include README.rst
 2 | include CHANGES.rst
 3 | include LICENSE
 4 | include tox.ini
 5 | include update_c.sh
 6 | 
 7 | recursive-include hat-trie/src *.h *.c
 8 | include hat-trie/src/config.h.in
 9 | include hat-trie/configure
10 | include hat-trie/configure.ac
11 | 
12 | recursive-include src *.pyx *.pxd *.c
13 | 


--------------------------------------------------------------------------------
/bench.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py26,py27,py32,py33
 3 | 
 4 | [testenv]
 5 | deps =
 6 |     cython
 7 |     pytest
 8 |     # psutil
 9 | commands=
10 |     python bench/speed.py
11 | 
12 | [testenv:pypy]
13 | deps =
14 |     git+https://github.com/cython/cython.git@8102e17127206b51d7a419a3e9673ad795672a7d#egg=cython
15 |     pytest
16 | 


--------------------------------------------------------------------------------
/hat-trie/src/Makefile.am:
--------------------------------------------------------------------------------
 1 | 
 2 | lib_LTLIBRARIES = libhat-trie.la
 3 | 
 4 | libhat_trie_la_SOURCES = common.h \
 5 |                          ahtable.h        ahtable.c \
 6 |                          hat-trie.h       hat-trie.c \
 7 |                          misc.h           misc.c \
 8 |                          murmurhash3.h    murmurhash3.c
 9 | 
10 | pkginclude_HEADERS = hat-trie.h ahtable.h common.h pstdint.h
11 | 
12 | 


--------------------------------------------------------------------------------
/hat-trie/src/common.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is part of hat-trie.
 3 |  *
 4 |  * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
 5 |  *
 6 |  *
 7 |  * Common typedefs, etc.
 8 |  *
 9 |  */
10 | 
11 | 
12 | #ifndef HATTRIE_COMMON_H
13 | #define HATTRIE_COMMON_H
14 | 
15 | #include "pstdint.h"
16 | 
17 | // an unsigned int that is guaranteed to be the same size as a pointer
18 | typedef uintptr_t value_t;
19 | 
20 | #endif
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/hat-trie/src/misc.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is part of hat-trie.
 3 |  *
 4 |  * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
 5 |  *
 6 |  * misc :
 7 |  * miscelaneous functions.
 8 |  *
 9 |  */
10 | 
11 | #ifndef LINESET_MISC_H
12 | #define LINESET_MISC_H
13 | 
14 | #include <stdio.h>
15 | 
16 | void* malloc_or_die(size_t);
17 | void* realloc_or_die(void*, size_t);
18 | FILE* fopen_or_die(const char*, const char*);
19 | 
20 | #endif
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/.hgignore:
--------------------------------------------------------------------------------
 1 | ^build
 2 | ^MANIFEST$
 3 | ^dist
 4 | \.so$
 5 | \.o$
 6 | \.lo$
 7 | \.la$
 8 | 
 9 | ^stuff/
10 | \.rej$
11 | \.pyc$
12 | ^.tox
13 | \.orig$
14 | \.prof$
15 | \.coverage$
16 | \.git
17 | 
18 | Makefile
19 | Makefile.in
20 | 
21 | hat-trie/aclocal.m4
22 | hat-trie/autom4te
23 | hat-trie/config
24 | hat-trie/depcomp
25 | hat-trie/hat-trie-0.1.pc
26 | hat-trie/install-sh
27 | hat-trie/libtool
28 | hat-trie/ltmain.sh
29 | hat-trie/missing
30 | 
31 | \.deps
32 | \.libs
33 | hat-trie/stamp-h1
34 | hat-trie/test/check_ahtable$
35 | hat-trie/test/check_hattrie$
36 | src/.*\.html$
37 | src/.*\.c$


--------------------------------------------------------------------------------
/hat-trie/test/Makefile.am:
--------------------------------------------------------------------------------
 1 | 
 2 | TESTS = check_ahtable check_hattrie
 3 | check_PROGRAMS = check_ahtable check_hattrie bench_sorted_iter
 4 | 
 5 | check_ahtable_SOURCES  = check_ahtable.c str_map.c
 6 | check_ahtable_LDADD    = $(top_builddir)/src/libhat-trie.la
 7 | check_ahtable_CPPFLAGS = -I$(top_builddir)/src
 8 | 
 9 | check_hattrie_SOURCES  = check_hattrie.c str_map.c
10 | check_hattrie_LDADD    = $(top_builddir)/src/libhat-trie.la
11 | check_hattrie_CPPFLAGS = -I$(top_builddir)/src
12 | 
13 | bench_sorted_iter_SOURCES  = bench_sorted_iter.c
14 | bench_sorted_iter_LDADD    = $(top_builddir)/src/libhat-trie.la
15 | bench_sorted_iter_CPPFLAGS = -I$(top_builddir)/src
16 | 


--------------------------------------------------------------------------------
/hat-trie/configure.ac:
--------------------------------------------------------------------------------
 1 | 
 2 | AC_INIT([hat-trie], [0.1.0], [dcjones@cs.washington.edu])
 3 | AM_INIT_AUTOMAKE([foreign])
 4 | m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES([yes])])
 5 | AC_CONFIG_MACRO_DIR([m4])
 6 | 
 7 | base_CFLAGS="-std=c99 -Wall -Wextra -pedantic"
 8 | opt_CFLAGS="${base_CFLAGS} -O3"
 9 | dbg_CFLAGS="${base_CFLAGS} -g -O0"
10 | 
11 | AC_ARG_ENABLE([debugging],
12 |               [AS_HELP_STRING([--enable-debugging],
13 | 	                      [enable debugging info (default is no)])],
14 |               [], [enable_debugging=no])
15 | 
16 | AS_IF([test "x$enable_debugging" = xyes],
17 |       [CFLAGS="$dbg_CFLAGS"],
18 |       [CFLAGS="$opt_CFLAGS"])
19 | 
20 | 
21 | AC_PROG_CC
22 | AC_PROG_CPP
23 | AC_PROG_INSTALL
24 | AC_PROG_LN_S
25 | AC_PROG_MAKE_SET
26 | AC_DISABLE_SHARED
27 | AC_PROG_LIBTOOL
28 | 
29 | AC_C_BIGENDIAN([AC_MSG_ERROR([Big-endian systems are not currently supported.])])
30 | AC_HEADER_STDBOOL
31 | 
32 | AC_CONFIG_FILES([hat-trie-0.1.pc Makefile src/Makefile test/Makefile])
33 | AC_OUTPUT
34 | 
35 | 


--------------------------------------------------------------------------------
/hat-trie/src/misc.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is part of hat-trie.
 3 |  *
 4 |  * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
 5 |  *
 6 |  */
 7 | 
 8 | #include "misc.h"
 9 | #include <stdlib.h>
10 | 
11 | 
12 | void* malloc_or_die(size_t n)
13 | {
14 |     void* p = malloc(n);
15 |     if (p == NULL && n != 0) {
16 |         fprintf(stderr, "Cannot allocate %zu bytes.\n", n);
17 |         exit(EXIT_FAILURE);
18 |     }
19 |     return p;
20 | }
21 | 
22 | 
23 | void* realloc_or_die(void* ptr, size_t n)
24 | {
25 |     void* p = realloc(ptr, n);
26 |     if (p == NULL && n != 0) {
27 |         fprintf(stderr, "Cannot allocate %zu bytes.\n", n);
28 |         exit(EXIT_FAILURE);
29 |     }
30 |     return p;
31 | }
32 | 
33 | 
34 | FILE* fopen_or_die(const char* path, const char* mode)
35 | {
36 |     FILE* f = fopen(path, mode);
37 |     if (f == NULL) {
38 |         fprintf(stderr, "Cannot open file %s with mode %s.\n", path, mode);
39 |         exit(EXIT_FAILURE);
40 |     }
41 |     return f;
42 | }
43 | 
44 | 
45 | 
46 | 
47 | 


--------------------------------------------------------------------------------
/CHANGES.rst:
--------------------------------------------------------------------------------
 1 | 0.3 (2016-02-08)
 2 | ----------------
 3 | 
 4 | * hat-trie C library is updated to the latest version (thanks Michael Phan-Ba);
 5 | * FloatTrie (thanks Michael Phan-Ba);
 6 | * Python 2.6 and Python 3.2 support is dropped. hat-trie 0.3 likely still works
 7 |   in 2.6 and 3.2, but this is no longer checked by unit tests, and
 8 |   future compatibility is not guaranteed;
 9 | * setup.py is switched to setuptools.
10 | 
11 | 
12 | 0.2 (2014-08-22)
13 | ----------------
14 | 
15 | * Installation is simplified: Cython is no longer required;
16 | * ``get`` method for tries (thanks Brandon Forehand);
17 | * ``iterkeys`` method is fixed (thanks Brandon Forehand);
18 | * ``hat_trie.Trie`` can store any Python object as a value (thanks Brandon Forehand);
19 | * segfault is fixed for large int values (thanks Brandon Forehand);
20 | * hat-trie C library is updated to the latest version to fix some issues
21 |   with 64bit builds and RHEL (thanks Brandon Forehand and Michael Heilman);
22 | 
23 | 0.1 (2014-03-27)
24 | ----------------
25 | 
26 | Initial release.
27 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) Mikhail Korobov, 2012-2014
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is furnished
 8 | to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
14 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR
15 | A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
16 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
17 | CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE
18 | OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19 | 


--------------------------------------------------------------------------------
/hat-trie/COPYING:
--------------------------------------------------------------------------------
 1 | Copyright (C) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 4 | this software and associated documentation files (the "Software"), to deal in
 5 | the Software without restriction, including without limitation the rights to
 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 7 | the Software, and to permit persons to whom the Software is furnished to do so,
 8 | subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
15 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
16 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | 
20 | 


--------------------------------------------------------------------------------
/hat-trie/test/str_map.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
 3 |  *
 4 |  * hash :
 5 |  * A quick and simple hash table mapping strings to things.
 6 |  *
 7 |  */
 8 | 
 9 | 
10 | #ifndef ISOLATOR_STR_MAP_H
11 | #define ISOLATOR_STR_MAP_H
12 | 
13 | #if defined(__cplusplus)
14 | extern "C" {
15 | #endif
16 | 
17 | #include <stdlib.h>
18 | #include <stdint.h>
19 | 
20 | #include "common.h"
21 | 
22 | 
23 | typedef struct str_map_pair_
24 | {
25 |     char*         key;
26 |     size_t        keylen;
27 |     value_t       value;
28 | 
29 |     struct str_map_pair_* next;
30 | } str_map_pair;
31 | 
32 | 
33 | typedef struct
34 | {
35 |     str_map_pair** A; /* table proper */
36 |     size_t n;         /* table size */
37 |     size_t m;         /* hashed items */
38 |     size_t max_m;     /* max hashed items before rehash */
39 | } str_map;
40 | 
41 | 
42 | 
43 | str_map* str_map_create(void);
44 | void     str_map_destroy(str_map*);
45 | void     str_map_set(str_map*, const char* key, size_t keylen, value_t value);
46 | value_t  str_map_get(const str_map*, const char* key, size_t keylen);
47 | void     str_map_del(str_map* T, const char* key, size_t keylen);
48 | 
49 | #if defined(__cplusplus)
50 | }
51 | #endif
52 | 
53 | #endif
54 | 
55 | 


--------------------------------------------------------------------------------
/hat-trie/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | Hat-Trie
 3 | ========
 4 | 
 5 | [![Build Status](https://travis-ci.org/dcjones/hat-trie.svg)](https://travis-ci.org/dcjones/hat-trie)
 6 | 
 7 | This a ANSI C99 implementation of the HAT-trie data structure of Askitis and
 8 | Sinha, an extremely efficient (space and time) modern variant of tries.
 9 | 
10 | The version implemented here maps arrays of bytes to words (i.e., unsigned
11 | longs), which can be used to store counts, pointers, etc, or not used at all if
12 | you simply want to maintain a set of unique strings.
13 | 
14 | For details see,
15 | 
16 |   1. Askitis, N., & Sinha, R. (2007). HAT-trie: a cache-conscious trie-based data
17 |      structure for strings. Proceedings of the thirtieth Australasian conference on
18 |      Computer science-Volume 62 (pp. 97–105). Australian Computer Society, Inc.
19 | 
20 |   2. Askitis, N., & Zobel, J. (2005). Cache-conscious collision resolution in
21 |      string hash tables. String Processing and Information Retrieval (pp.
22 |      91–102). Springer.
23 | 
24 | 
25 | Installation
26 | ------------
27 | 
28 |     git clone git@github.com:dcjones/hat-trie.git
29 |     cd hat-trie
30 |     autoreconf -i
31 |     ./configure
32 |     make install
33 | 
34 | To use the library, include `hat-trie.h` and link using `-lhat-trie`.
35 | 
36 | 
37 | Tests
38 | -----
39 | 
40 | Build and run the tests:
41 | 
42 |     make check
43 | 
44 | Other Language Bindings
45 | -----------------------
46 |  * Ruby - https://github.com/luikore/triez
47 |  * Python - https://github.com/kmike/hat-trie
48 | 


--------------------------------------------------------------------------------
/hat-trie/src/murmurhash3.c:
--------------------------------------------------------------------------------
 1 | /* This is MurmurHash3. The original C++ code was placed in the public domain
 2 |  * by its author, Austin Appleby. */
 3 | 
 4 | #include "murmurhash3.h"
 5 | 
 6 | static inline uint32_t fmix(uint32_t h)
 7 | {
 8 |     h ^= h >> 16;
 9 |     h *= 0x85ebca6b;
10 |     h ^= h >> 13;
11 |     h *= 0xc2b2ae35;
12 |     h ^= h >> 16;
13 | 
14 |     return h;
15 | }
16 | 
17 | 
18 | static inline uint32_t rotl32(uint32_t x, int8_t r)
19 | {
20 |     return (x << r) | (x >> (32 - r));
21 | }
22 | 
23 | 
24 | uint32_t hash(const char* data, size_t len_)
25 | {
26 |     const int len = (int) len_;
27 |     const int nblocks = len / 4;
28 | 
29 |     uint32_t h1 = 0xc062fb4a;
30 | 
31 |     uint32_t c1 = 0xcc9e2d51;
32 |     uint32_t c2 = 0x1b873593;
33 | 
34 |     //----------
35 |     // body
36 | 
37 |     const uint32_t * blocks = (const uint32_t*) (data + nblocks * 4);
38 | 
39 |     int i;
40 |     for(i = -nblocks; i; i++)
41 |     {
42 |         uint32_t k1 = blocks[i];
43 | 
44 |         k1 *= c1;
45 |         k1 = rotl32(k1, 15);
46 |         k1 *= c2;
47 | 
48 |         h1 ^= k1;
49 |         h1 = rotl32(h1, 13);
50 |         h1 = h1*5+0xe6546b64;
51 |     }
52 | 
53 |     //----------
54 |     // tail
55 | 
56 |     const uint8_t * tail = (const uint8_t*)(data + nblocks*4);
57 | 
58 |     uint32_t k1 = 0;
59 | 
60 |     switch(len & 3)
61 |     {
62 |         case 3: k1 ^= tail[2] << 16;
63 |         case 2: k1 ^= tail[1] << 8;
64 |         case 1: k1 ^= tail[0];
65 |               k1 *= c1; k1 = rotl32(k1,15); k1 *= c2; h1 ^= k1;
66 |     }
67 | 
68 |     //----------
69 |     // finalization
70 | 
71 |     h1 ^= len;
72 | 
73 |     h1 = fmix(h1);
74 | 
75 |     return h1;
76 | }
77 | 
78 | 


--------------------------------------------------------------------------------
/src/chat_trie.pxd:
--------------------------------------------------------------------------------
 1 | cdef extern from "../hat-trie/src/hat-trie.h":
 2 | 
 3 |     ctypedef int value_t
 4 |     ctypedef int size_t
 5 | 
 6 |     ctypedef struct hattrie_t:
 7 |         pass
 8 | 
 9 |     hattrie_t* hattrie_create ()                 # Create an empty hat-trie.
10 |     void       hattrie_free   (hattrie_t*)       # Free all memory used by a trie.
11 |     hattrie_t* hattrie_dup    (hattrie_t*)       # Duplicate an existing trie.
12 |     void       hattrie_clear  (hattrie_t*)       # Remove all entries.
13 |     size_t     hattrie_size   (const hattrie_t*) # Number of stored keys.
14 | 
15 | 
16 |     # Find the given key in the trie, inserting it if it does not exist, and
17 |     # returning a pointer to it's key.
18 |     # This pointer is not guaranteed to be valid after additional calls to
19 |     # hattrie_get, hattrie_del, hattrie_clear, or other functions that
20 |     # modifies the trie.
21 |     value_t* hattrie_get (hattrie_t*, char* key, size_t len)
22 | 
23 |     # Find a given key in the table, returning a NULL pointer if it does not exist.
24 |     value_t* hattrie_tryget (hattrie_t*, char* key, size_t len)
25 | 
26 |     ctypedef struct hattrie_iter_t:
27 |         pass
28 | 
29 |     hattrie_iter_t* hattrie_iter_begin     (hattrie_t*, bint sorted)
30 |     void            hattrie_iter_next      (hattrie_iter_t*)
31 |     bint            hattrie_iter_finished  (hattrie_iter_t*)
32 |     void            hattrie_iter_free      (hattrie_iter_t*)
33 |     char*           hattrie_iter_key       (hattrie_iter_t*, size_t* len)
34 |     value_t*        hattrie_iter_val       (hattrie_iter_t*)
35 | 
36 | cdef struct hattrie_t_:
37 |     void* root
38 |     size_t m      # number of stored keys
39 | 


--------------------------------------------------------------------------------
/tests/test_inttrie.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import, unicode_literals
 3 | import string
 4 | import random
 5 | 
 6 | import pytest
 7 | import hat_trie
 8 | 
 9 | def test_getitem_set():
10 |     trie = hat_trie.IntTrie()
11 |     trie['foo'] = 5
12 |     trie['bar'] = 10
13 |     assert trie['foo'] == 5
14 |     assert trie['bar'] == 10
15 | 
16 |     with pytest.raises(KeyError):
17 |         trie['f']
18 | 
19 |     with pytest.raises(KeyError):
20 |         trie['foob']
21 | 
22 |     with pytest.raises(KeyError):
23 |         trie['x']
24 | 
25 |     non_ascii_key = 'вася'
26 |     trie[non_ascii_key] = 20
27 |     assert trie[non_ascii_key] == 20
28 | 
29 | def test_get():
30 |     trie = hat_trie.IntTrie()
31 | 
32 |     assert trie.get('foo') == -1
33 |     assert trie.get('bar') == -1
34 |     assert trie.get('foo', 5) == 5
35 | 
36 |     trie['foo'] = 5
37 |     trie['bar'] = 10
38 | 
39 |     assert trie.get('foo') == 5
40 |     assert trie.get('bar') == 10
41 | 
42 | def test_contains():
43 |     trie = hat_trie.IntTrie()
44 |     assert 'foo' not in trie
45 |     trie['foo'] = 5
46 |     assert 'foo' in trie
47 |     assert 'f' not in trie
48 | 
49 | 
50 | def test_get_set_fuzzy():
51 |     russian = 'абвгдеёжзиклмнопрстуфхцчъыьэюя'
52 |     alphabet = russian.upper() + string.ascii_lowercase
53 |     words = list(set([
54 |         "".join([random.choice(alphabet) for x in range(random.randint(2,10))])
55 |         for y in range(20000)
56 |     ]))
57 | 
58 |     trie = hat_trie.IntTrie()
59 | 
60 |     enumerated_words = list(enumerate(words))
61 | 
62 |     for index, word in enumerated_words:
63 |         trie[word] = index
64 | 
65 |     random.shuffle(enumerated_words)
66 |     for index, word in enumerated_words:
67 |         assert word in trie, word
68 |         assert trie[word] == index, (word, index)
69 | 
70 |     assert sorted(trie.keys()) == sorted(words)
71 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import os
 3 | from setuptools import setup
 4 | from distutils.extension import Extension
 5 | 
 6 | HATTRIE_DIR = 'hat-trie/src'
 7 | HATTRIE_FILE_NAMES = ['ahtable.c', 'hat-trie.c', 'misc.c', 'murmurhash3.c']
 8 | HATTRIE_FILES = [os.path.join(HATTRIE_DIR, name) for name in HATTRIE_FILE_NAMES]
 9 | 
10 | with open('README.rst') as file_readme:
11 |     readme = file_readme.read()
12 | 
13 | with open('CHANGES.rst') as file_changes:
14 |     changes = file_changes.read()
15 | 
16 | setup(
17 |     name="hat-trie",
18 |     version="0.3",
19 |     description="HAT-Trie for Python",
20 |     long_description = readme + "\n\n" + changes,
21 |     author='Mikhail Korobov',
22 |     author_email='kmike84@gmail.com',
23 |     url='https://github.com/kmike/hat-trie/',
24 | 
25 |     ext_modules = [
26 |         Extension(
27 |             "hat_trie",
28 |             ['src/hat_trie.c', 'src/chat_trie.c'] + HATTRIE_FILES,
29 |             include_dirs=['hat-trie/src'],
30 |             extra_compile_args=["-Wno-error=declaration-after-statement"],
31 |         )
32 |     ],
33 | 
34 |     classifiers=[
35 |         'Development Status :: 3 - Alpha',
36 |         'Intended Audience :: Developers',
37 |         'Intended Audience :: Science/Research',
38 |         'License :: OSI Approved :: MIT License',
39 |         'Programming Language :: Cython',
40 |         'Programming Language :: Python',
41 |         'Programming Language :: Python :: 2',
42 |         'Programming Language :: Python :: 2.7',
43 |         'Programming Language :: Python :: 3',
44 |         'Programming Language :: Python :: 3.3',
45 |         'Programming Language :: Python :: 3.4',
46 |         'Programming Language :: Python :: 3.5',
47 |         'Programming Language :: Python :: Implementation :: CPython',
48 |         'Topic :: Software Development :: Libraries :: Python Modules',
49 |         'Topic :: Scientific/Engineering :: Information Analysis',
50 |         'Topic :: Text Processing :: Linguistic',
51 |     ],
52 | )
53 | 


--------------------------------------------------------------------------------
/hat-trie/test/bench_sorted_iter.c:
--------------------------------------------------------------------------------
 1 | 
 2 | /* A quick test of the degree to which ordered iteration is slower than unordered. */
 3 | 
 4 | #include "../src/hat-trie.h"
 5 | #include <stdio.h>
 6 | #include <time.h>
 7 | 
 8 | 
 9 | /* Simple random string generation. */
10 | void randstr(char* x, size_t len)
11 | {
12 |     x[len] = '\0';
13 |     while (len > 0) {
14 |         x[--len] = '\x20' + (rand() % ('\x7e' - '\x20' + 1));
15 |     }
16 | }
17 | 
18 | int main()
19 | {
20 |     hattrie_t* T = hattrie_create();
21 |     const size_t n = 1000000;  // how many strings
22 |     const size_t m_low  = 50;  // minimum length of each string
23 |     const size_t m_high = 500; // maximum length of each string
24 |     char x[501];
25 | 
26 |     size_t i, m;
27 |     for (i = 0; i < n; ++i) {
28 |         m = m_low + rand() % (m_high - m_low);
29 |         randstr(x, m);
30 |         *hattrie_get(T, x, m) = 1;
31 |     }
32 | 
33 |     hattrie_iter_t* it;
34 |     clock_t t0, t;
35 |     const size_t repetitions = 100;
36 |     size_t r;
37 | 
38 |     /* iterate in unsorted order */
39 |     fprintf(stderr, "iterating out of order ... ");
40 |     t0 = clock();
41 |     for (r = 0; r < repetitions; ++r) {
42 |         it = hattrie_iter_begin(T, false);
43 |         while (!hattrie_iter_finished(it)) {
44 |             hattrie_iter_next(it);
45 |         }
46 |         hattrie_iter_free(it);
47 |     }
48 |     t = clock();
49 |     fprintf(stderr, "finished. (%0.2f seconds)\n", (double) (t - t0) / (double) CLOCKS_PER_SEC);
50 | 
51 | 
52 |     /* iterate in sorted order */
53 |     fprintf(stderr, "iterating in order ... ");
54 |     t0 = clock();
55 |     for (r = 0; r < repetitions; ++r) {
56 |         it = hattrie_iter_begin(T, true);
57 |         while (!hattrie_iter_finished(it)) {
58 |             hattrie_iter_next(it);
59 |         }
60 |         hattrie_iter_free(it);
61 |     }
62 |     t = clock();
63 |     fprintf(stderr, "finished. (%0.2f seconds)\n", (double) (t - t0) / (double) CLOCKS_PER_SEC);
64 | 
65 | 
66 |     hattrie_free(T);
67 | 
68 |     return 0;
69 | }
70 | 


--------------------------------------------------------------------------------
/tests/test_floattrie.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import, unicode_literals
 3 | import string
 4 | import random
 5 | 
 6 | import pytest
 7 | import hat_trie
 8 | 
 9 | try:
10 |     from math import isnan
11 | except:
12 |     def nan(i): i != i
13 | 
14 | def test_getitem_set():
15 |     trie = hat_trie.FloatTrie()
16 |     trie['foo'] = 5.1
17 |     trie['bar'] = 10.1
18 |     assert abs(trie['foo'] - 5.1) < .001
19 |     assert abs(trie['bar'] - 10.1) < .001
20 | 
21 |     with pytest.raises(KeyError):
22 |         trie['f']
23 | 
24 |     with pytest.raises(KeyError):
25 |         trie['foob']
26 | 
27 |     with pytest.raises(KeyError):
28 |         trie['x']
29 | 
30 |     non_ascii_key = 'вася'
31 |     trie[non_ascii_key] = 20.1
32 |     assert abs(trie[non_ascii_key] - 20.1) < .001
33 | 
34 | def test_get():
35 |     trie = hat_trie.FloatTrie()
36 | 
37 |     assert isnan(trie.get('foo'))
38 |     assert isnan(trie.get('bar'))
39 |     assert abs(trie.get('foo', 5.0) - 5.0) < .001
40 | 
41 |     trie['foo'] = 5.5
42 |     trie['bar'] = 10.1
43 | 
44 |     assert abs(trie.get('foo') - 5.5) < .001
45 |     assert abs(trie.get('bar') - 10.1) < .001
46 | 
47 | def test_contains():
48 |     trie = hat_trie.FloatTrie()
49 |     assert 'foo' not in trie
50 |     trie['foo'] = 5.1
51 |     assert 'foo' in trie
52 |     assert 'f' not in trie
53 | 
54 | 
55 | def test_get_set_fuzzy():
56 |     russian = 'абвгдеёжзиклмнопрстуфхцчъыьэюя'
57 |     alphabet = russian.upper() + string.ascii_lowercase
58 |     words = list(set([
59 |         "".join([random.choice(alphabet) for x in range(random.randint(2,10))])
60 |         for y in range(20000)
61 |     ]))
62 | 
63 |     trie = hat_trie.FloatTrie()
64 | 
65 |     enumerated_words = list(enumerate(words))
66 | 
67 |     for index, word in enumerated_words:
68 |         trie[word] = index
69 | 
70 |     random.shuffle(enumerated_words)
71 |     for index, word in enumerated_words:
72 |         assert word in trie, word
73 |         assert trie[word] == index, (word, index)
74 | 
75 |     assert sorted(trie.keys()) == sorted(words)
76 | 


--------------------------------------------------------------------------------
/tests/test_base_trie.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import, unicode_literals
 3 | import string
 4 | import random
 5 | 
 6 | import pytest
 7 | import hat_trie
 8 | 
 9 | def test_getitem_set():
10 |     trie = hat_trie.BaseTrie()
11 |     trie[b'foo'] = 5
12 |     trie[b'bar'] = 10
13 |     assert trie[b'foo'] == 5
14 |     assert trie[b'bar'] == 10
15 | 
16 |     with pytest.raises(KeyError):
17 |         trie[b'f']
18 | 
19 |     with pytest.raises(KeyError):
20 |         trie[b'foob']
21 | 
22 |     with pytest.raises(KeyError):
23 |         trie[b'x']
24 | 
25 |     non_ascii_key = 'вася'.encode('cp1251')
26 |     trie[non_ascii_key] = 20
27 |     assert trie[non_ascii_key] == 20
28 | 
29 | def test_get():
30 |     trie = hat_trie.BaseTrie()
31 | 
32 |     assert trie.get(b'foo') == -1
33 |     assert trie.get(b'bar') == -1
34 |     assert trie.get(b'foo', 5) == 5
35 | 
36 |     trie[b'foo'] = 5
37 |     trie[b'bar'] = 10
38 | 
39 |     assert trie.get(b'foo') == 5
40 |     assert trie.get(b'bar') == 10
41 | 
42 | def test_contains():
43 |     trie = hat_trie.BaseTrie()
44 |     assert b'foo' not in trie
45 |     trie[b'foo'] = 5
46 |     assert b'foo' in trie
47 |     assert b'f' not in trie
48 | 
49 | def test_len():
50 |     trie = hat_trie.BaseTrie()
51 |     assert len(trie) == 0
52 |     trie[b'foo'] = 1
53 |     assert len(trie) == 1
54 |     trie[b'bar'] = 1
55 |     assert len(trie) == 2
56 |     trie[b'f'] = 1
57 |     assert len(trie) == 3
58 | 
59 | def test_setdefault():
60 |     trie = hat_trie.BaseTrie()
61 | 
62 |     with pytest.raises(KeyError):
63 |         trie[b'foo']
64 | 
65 |     trie.setdefault(b'foo', 1)
66 |     assert trie[b'foo'] == 1
67 |     trie.setdefault(b'foo', 5)
68 |     assert trie[b'foo'] == 1
69 | 
70 | 
71 | 
72 | @pytest.mark.parametrize(("encoding",), [['cp1251'], ['utf8']])
73 | def test_get_set_fuzzy(encoding):
74 |     russian = 'абвгдеёжзиклмнопрстуфхцчъыьэюя'
75 |     alphabet = string.ascii_lowercase + russian.upper()
76 |     words = list(set([
77 |         "".join([random.choice(alphabet) for x in range(random.randint(2,10))])
78 |         for y in range(20000)
79 |     ]))
80 | 
81 |     words = [w.encode(encoding) for w in words]
82 | 
83 |     trie = hat_trie.BaseTrie()
84 | 
85 |     enumerated_words = list(enumerate(words))
86 | 
87 |     for index, word in enumerated_words:
88 |         trie[word] = index
89 | 
90 |     random.shuffle(enumerated_words)
91 |     for index, word in enumerated_words:
92 |         assert word in trie, word
93 |         assert trie[word] == index, (word, index)
94 | 
95 |     assert sorted(trie.keys()) == sorted(words)
96 | 


--------------------------------------------------------------------------------
/hat-trie/src/hat-trie.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This file is part of hat-trie
 3 |  *
 4 |  * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
 5 |  *
 6 |  *
 7 |  * This is an implementation of the HAT-trie data structure described in,
 8 |  *
 9 |  *    Askitis, N., & Sinha, R. (2007). HAT-trie: a cache-conscious trie-based data
10 |  *    structure for strings. Proceedings of the thirtieth Australasian conference on
11 |  *    Computer science-Volume 62 (pp. 97–105). Australian Computer Society, Inc.
12 |  *
13 |  * The HAT-trie is in essence a hybrid data structure, combining tries and hash
14 |  * tables in a clever way to try to get the best of both worlds.
15 |  *
16 |  */
17 | 
18 | #ifndef HATTRIE_HATTRIE_H
19 | #define HATTRIE_HATTRIE_H
20 | 
21 | #ifdef __cplusplus
22 | extern "C" {
23 | #endif
24 | 
25 | #include "common.h"
26 | #include <stdlib.h>
27 | #include <stdbool.h>
28 | 
29 | typedef struct hattrie_t_ hattrie_t;
30 | 
31 | hattrie_t* hattrie_create (void);             // Create an empty hat-trie.
32 | void       hattrie_free   (hattrie_t*);       // Free all memory used by a trie.
33 | hattrie_t* hattrie_dup    (const hattrie_t*); // Duplicate an existing trie.
34 | void       hattrie_clear  (hattrie_t*);       // Remove all entries.
35 | size_t     hattrie_size   (const hattrie_t*); // Number of stored keys.
36 | size_t     hattrie_sizeof (const hattrie_t*); // Memory used in structure in bytes.
37 | 
38 | 
39 | /** Find the given key in the trie, inserting it if it does not exist, and
40 |  * returning a pointer to it's key.
41 |  *
42 |  * This pointer is not guaranteed to be valid after additional calls to
43 |  * hattrie_get, hattrie_del, hattrie_clear, or other functions that modifies the
44 |  * trie.
45 |  */
46 | value_t* hattrie_get (hattrie_t*, const char* key, size_t len);
47 | 
48 | 
49 | /** Find a given key in the table, returning a NULL pointer if it does not
50 |  * exist. */
51 | value_t* hattrie_tryget (hattrie_t*, const char* key, size_t len);
52 | 
53 | /** Delete a given key from trie. Returns 0 if successful or -1 if not found.
54 |  */
55 | int hattrie_del(hattrie_t* T, const char* key, size_t len);
56 | 
57 | typedef struct hattrie_iter_t_ hattrie_iter_t;
58 | 
59 | hattrie_iter_t* hattrie_iter_begin     (const hattrie_t*, bool sorted);
60 | void            hattrie_iter_next      (hattrie_iter_t*);
61 | bool            hattrie_iter_finished  (hattrie_iter_t*);
62 | void            hattrie_iter_free      (hattrie_iter_t*);
63 | const char*     hattrie_iter_key       (hattrie_iter_t*, size_t* len);
64 | value_t*        hattrie_iter_val       (hattrie_iter_t*);
65 | 
66 | /* Return true if two iterators are equal. */
67 | bool            hattrie_iter_equal     (const hattrie_iter_t* a,
68 |                                         const hattrie_iter_t* b);
69 | 
70 | #ifdef __cplusplus
71 | }
72 | #endif
73 | 
74 | #endif
75 | 


--------------------------------------------------------------------------------
/tests/test_trie.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import absolute_import, unicode_literals
  3 | import string
  4 | import random
  5 | 
  6 | import pytest
  7 | import hat_trie
  8 | 
  9 | def test_getitem_set():
 10 |     trie = hat_trie.Trie()
 11 |     trie['foo'] = 5
 12 |     trie['bar'] = 'asdf'
 13 |     trie['baz'] = (10, 'quuz')
 14 |     assert trie['foo'] == 5
 15 |     assert trie['bar'] == 'asdf'
 16 |     assert trie['baz'] == (10, 'quuz')
 17 | 
 18 |     with pytest.raises(KeyError):
 19 |         trie['f']
 20 | 
 21 |     with pytest.raises(KeyError):
 22 |         trie['foob']
 23 | 
 24 |     with pytest.raises(KeyError):
 25 |         trie['x']
 26 | 
 27 |     non_ascii_key = 'вася'
 28 |     trie[non_ascii_key] = 20
 29 |     assert trie[non_ascii_key] == 20
 30 | 
 31 | def test_get():
 32 |     trie = hat_trie.Trie()
 33 | 
 34 |     assert trie.get('foo') is None 
 35 |     assert trie.get('bar') is None
 36 |     assert trie.get('foo', 5) == 5
 37 | 
 38 |     trie['foo'] = 5
 39 |     trie['bar'] = 10
 40 | 
 41 |     assert trie.get('foo') == 5
 42 |     assert trie.get('bar') == 10
 43 | 
 44 | def test_contains():
 45 |     trie = hat_trie.Trie()
 46 |     assert 'foo' not in trie
 47 |     trie['foo'] = 5
 48 |     assert 'foo' in trie
 49 |     assert 'f' not in trie
 50 | 
 51 | def test_iterkeys():
 52 |     trie = hat_trie.Trie()
 53 | 
 54 |     non_ascii_key = 'вася'
 55 |     trie[non_ascii_key] = 20
 56 | 
 57 |     assert next(trie.iterkeys()) == non_ascii_key
 58 | 
 59 | def test_get_set_fuzzy():
 60 |     russian = 'абвгдеёжзиклмнопрстуфхцчъыьэюя'
 61 |     alphabet = russian.upper() + string.ascii_lowercase
 62 |     words = list(set([
 63 |         "".join([random.choice(alphabet) for x in range(random.randint(2,10))])
 64 |         for y in range(20000)
 65 |     ]))
 66 | 
 67 |     trie = hat_trie.Trie()
 68 | 
 69 |     enumerated_words = list(enumerate(words))
 70 | 
 71 |     for index, word in enumerated_words:
 72 |         trie[word] = index
 73 | 
 74 |     random.shuffle(enumerated_words)
 75 |     for index, word in enumerated_words:
 76 |         assert word in trie, word
 77 |         assert trie[word] == index, (word, index)
 78 | 
 79 |     assert sorted(trie.keys()) == sorted(words)
 80 | 
 81 | def test_leak():
 82 |     import sys
 83 | 
 84 |     values = list(string.ascii_lowercase)
 85 |     # Using "list(map())" to avoid the list comprehension variable
 86 |     # which increases the reference count.
 87 |     counts = list(map(sys.getrefcount, values))
 88 | 
 89 |     trie = hat_trie.Trie()
 90 | 
 91 |     for v in values:
 92 |         trie['foo'] = v
 93 |     # Python's for loop variables leak scope into the function body
 94 |     del v
 95 | 
 96 |     count = sys.getrefcount(trie['foo'])
 97 |     for i in range(10):
 98 |         current_count = sys.getrefcount(trie['foo'])
 99 |         assert current_count == count
100 | 
101 |     count0 = sys.getrefcount(values[0])
102 |     count_last = sys.getrefcount(values[-1])
103 |     assert count0 == counts[0]
104 |     assert count_last == counts[-1] + 1
105 | 
106 |     del trie
107 | 
108 |     after = list(map(sys.getrefcount, values))
109 |     assert after == counts
110 | 


--------------------------------------------------------------------------------
/hat-trie/src/ahtable.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This file is part of hat-trie.
  3 |  *
  4 |  * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
  5 |  *
  6 |  *
  7 |  * This is an implementation of the 'cache-conscious' hash tables described in,
  8 |  *
  9 |  *    Askitis, N., & Zobel, J. (2005). Cache-conscious collision resolution in
 10 |  *    string hash tables. String Processing and Information Retrieval (pp.
 11 |  *    91–102). Springer.
 12 |  *
 13 |  *    http://naskitis.com/naskitis-spire05.pdf
 14 |  *
 15 |  * Briefly, the idea behind an Array Hash Table is, as opposed to separate
 16 |  * chaining with linked lists, to store keys contiguously in one big array,
 17 |  * thereby improving the caching behavior, and reducing space requirements.
 18 |  *
 19 |  * ahtable keeps a fixed number (array) of slots, each of which contains a
 20 |  * variable number of key/value pairs. Each key is preceded by its length--
 21 |  * one byte for lengths < 128 bytes, and TWO bytes for longer keys. The least
 22 |  * significant bit of the first byte indicates, if set, that the size is two
 23 |  * bytes. The slot number where a key/value pair goes is determined by finding
 24 |  * the murmurhashed integer value of its key, modulus the number of slots.
 25 |  * The number of slots expands in a stepwise fashion when the number of
 26 |  # key/value pairs reaches an arbitrarily large number.
 27 |  *
 28 |  * +-------+-------+-------+-------+-------+-------+
 29 |  * |   0   |   1   |   2   |   3   |  ...  |   N   |
 30 |  * +-------+-------+-------+-------+-------+-------+
 31 |  *     |       |       |       |               |
 32 |  *     v       |       |       v               v
 33 |  *    NULL     |       |     4html[VALUE]     etc.
 34 |  *             |       v
 35 |  *             |     5space[VALUE]4jury[VALUE]
 36 |  *             v
 37 |  *           6justice[VALUE]3car[VALUE]4star[VALUE]
 38 |  *
 39 |  */
 40 | 
 41 | #ifndef HATTRIE_AHTABLE_H
 42 | #define HATTRIE_AHTABLE_H
 43 | 
 44 | #ifdef __cplusplus
 45 | extern "C" {
 46 | #endif
 47 | 
 48 | #include <stdlib.h>
 49 | #include <stdbool.h>
 50 | #include "pstdint.h"
 51 | #include "common.h"
 52 | 
 53 | typedef unsigned char* slot_t;
 54 | 
 55 | typedef struct ahtable_t_
 56 | {
 57 |     /* these fields are reserved for hattrie to fiddle with */
 58 |     uint8_t flag;
 59 |     unsigned char c0;
 60 |     unsigned char c1;
 61 | 
 62 |     size_t n;        // number of slots
 63 |     size_t m;        // number of key/value pairs stored
 64 |     size_t max_m;    // number of stored keys before we resize
 65 | 
 66 |     size_t*  slot_sizes;
 67 |     slot_t*  slots;
 68 | } ahtable_t;
 69 | 
 70 | extern const double ahtable_max_load_factor;
 71 | extern const size_t ahtable_initial_size;
 72 | 
 73 | ahtable_t* ahtable_create   (void);         // Create an empty hash table.
 74 | ahtable_t* ahtable_create_n (size_t n);     // Create an empty hash table, with
 75 |                                             //  n slots reserved.
 76 | 
 77 | void       ahtable_free   (ahtable_t*);       // Free all memory used by a table.
 78 | void       ahtable_clear  (ahtable_t*);       // Remove all entries.
 79 | size_t     ahtable_size   (const ahtable_t*); // Number of stored keys.
 80 | size_t     ahtable_sizeof (const ahtable_t*); // Memory used by the table in bytes.
 81 | 
 82 | 
 83 | /** Find the given key in the table, inserting it if it does not exist, and
 84 |  * returning a pointer to it's value.
 85 |  *
 86 |  * This pointer is not guaranteed to be valid after additional calls to
 87 |  * ahtable_get, ahtable_del, ahtable_clear, or other functions that modify the
 88 |  * table.
 89 |  */
 90 | value_t* ahtable_get (ahtable_t*, const char* key, size_t len);
 91 | 
 92 | 
 93 | /* Find a given key in the table, return a NULL pointer if it does not exist. */
 94 | value_t* ahtable_tryget (ahtable_t*, const char* key, size_t len);
 95 | 
 96 | 
 97 | int ahtable_del(ahtable_t*, const char* key, size_t len);
 98 | 
 99 | 
100 | typedef struct ahtable_iter_t_ ahtable_iter_t;
101 | 
102 | ahtable_iter_t* ahtable_iter_begin     (const ahtable_t*, bool sorted);
103 | void            ahtable_iter_next      (ahtable_iter_t*);
104 | bool            ahtable_iter_finished  (ahtable_iter_t*);
105 | void            ahtable_iter_free      (ahtable_iter_t*);
106 | const char*     ahtable_iter_key       (ahtable_iter_t*, size_t* len);
107 | value_t*        ahtable_iter_val       (ahtable_iter_t*);
108 | 
109 | 
110 | #ifdef __cplusplus
111 | }
112 | #endif
113 | 
114 | #endif
115 | 
116 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | hat-trie
  2 | ========
  3 | 
  4 | HAT-Trie structure for Python (2.x and 3.x).
  5 | 
  6 | This package is a Python wrapper for `hat-trie`_ C library.
  7 | 
  8 | .. image:: https://travis-ci.org/kmike/hat-trie.svg?branch=master
  9 |     :target: https://travis-ci.org/kmike/hat-trie
 10 | 
 11 | .. _hat-trie: https://github.com/dcjones/hat-trie
 12 | 
 13 | Installation
 14 | ============
 15 | 
 16 | ::
 17 | 
 18 |     pip install hat-trie
 19 | 
 20 | Usage
 21 | =====
 22 | 
 23 | Create a new trie::
 24 | 
 25 |     >>> from hat_trie import Trie
 26 |     >>> trie = Trie()
 27 | 
 28 | ``trie`` variable is a dict-like object that support unicode
 29 | keys and can have any Python object as a value. For keys that share prefixes
 30 | it usually uses less memory than Python dict.
 31 | 
 32 | There is also ``hat_trie.IntTrie`` which only supports positive
 33 | integers as values. It can be more efficient when you don't need
 34 | arbitrary objects as values. For example, if you need to store float
 35 | values then storing them in an array (either numpy or stdlib's ``array.array``)
 36 | and using IntTrie values as indices could be more memory efficient
 37 | than storing Python float objects directly in ``hat_trie.Trie``.
 38 | 
 39 | Another way to store float values is to use hat_trie.FloatTrie().
 40 | In this case precision is limited to float32.
 41 | 
 42 | Currently implemented methods are:
 43 | 
 44 | * __getitem__()
 45 | * __setitem__()
 46 | * __contains__()
 47 | * __len__()
 48 | * get()
 49 | * setdefault()
 50 | * keys()
 51 | * iterkeys()
 52 | 
 53 | Other methods are not implemented - contributions are welcome!
 54 | 
 55 | 
 56 | Performance
 57 | ===========
 58 | 
 59 | Performance is measured for ``hat_trie.Trie`` against Python's dict with
 60 | 100k unique unicode words (English and Russian) as keys and '1' numbers
 61 | as values.
 62 | 
 63 | Benchmark results for Python 3.3 (intel i5 1.8GHz,
 64 | "1.000M ops/sec" == "1 000 000 operations per second")::
 65 | 
 66 |     dict __getitem__ (hits)      6.874M ops/sec
 67 |     trie __getitem__ (hits)      3.754M ops/sec
 68 |     dict __contains__ (hits)     7.035M ops/sec
 69 |     trie __contains__ (hits)     3.772M ops/sec
 70 |     dict __contains__ (misses)   5.356M ops/sec
 71 |     trie __contains__ (misses)   3.364M ops/sec
 72 |     dict __len__                 785958.286 ops/sec
 73 |     trie __len__                 574164.704 ops/sec
 74 |     dict __setitem__ (updates)   6.830M ops/sec
 75 |     trie __setitem__ (updates)   3.472M ops/sec
 76 |     dict __setitem__ (inserts)   6.774M ops/sec
 77 |     trie __setitem__ (inserts)   2.460M ops/sec
 78 |     dict setdefault (updates)    3.522M ops/sec
 79 |     trie setdefault (updates)    2.680M ops/sec
 80 |     dict setdefault (inserts)    4.062M ops/sec
 81 |     trie setdefault (inserts)    1.866M ops/sec
 82 |     dict keys()                  189.564 ops/sec
 83 |     trie keys()                  16.067 ops/sec
 84 | 
 85 | 
 86 | HAT-Trie is about 1.5x faster that `datrie`_ on all supported operations;
 87 | it also supports fast inserts unlike datrie. On the other hand,
 88 | datrie has more features (e.g. better iteration support and richer API);
 89 | datrie is also more memory efficient.
 90 | 
 91 | If you need a memory efficient data structure and don't need inserts
 92 | then marisa-trie_ or DAWG_ should work better.
 93 | 
 94 | .. _datrie: https://github.com/kmike/datrie
 95 | .. _marisa-trie: https://github.com/kmike/marisa-trie
 96 | .. _DAWG: https://github.com/kmike/DAWG
 97 | 
 98 | Contributing
 99 | ============
100 | 
101 | Development happens at github:
102 | 
103 | * https://github.com/kmike/hat-trie
104 | 
105 | Feel free to submit ideas, bugs, pull requests or regular patches.
106 | 
107 | Please don't commit changes to generated C files; I will rebuild them myself.
108 | 
109 | Running tests and benchmarks
110 | ----------------------------
111 | 
112 | Make sure `tox`_ is installed and run
113 | 
114 | ::
115 | 
116 |     $ ./update_c.sh
117 |     $ tox
118 | 
119 | from the source checkout. You will need Cython_ to do that.
120 | 
121 | Tests should pass under python 2.7 and 3.3+.
122 | 
123 | ::
124 | 
125 |     $ tox -c bench.ini
126 | 
127 | runs benchmarks.
128 | 
129 | .. _Cython: http://cython.org
130 | .. _tox: http://tox.testrun.org
131 | 
132 | Authors & Contributors
133 | ----------------------
134 | 
135 | * Mikhail Korobov <kmike84@gmail.com>
136 | * Brandon Forehand <b4hand@users.sf.net>
137 | * https://github.com/yflau
138 | * Michael Heilman <https://github.com/mheilman/>
139 | * Michael Phan-Ba <michael@mikepb.com> @mikepb
140 | 
141 | This module wraps `hat-trie`_ C library by Daniel Jones & contributors.
142 | 
143 | License
144 | =======
145 | 
146 | Licensed under MIT License.
147 | 


--------------------------------------------------------------------------------
/hat-trie/test/check_ahtable.c:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <stdlib.h>
  3 | #include <string.h>
  4 | #include <stdio.h>
  5 | 
  6 | #include "str_map.h"
  7 | #include "../src/ahtable.h"
  8 | 
  9 | /* Simple random string generation. */
 10 | void randstr(char* x, size_t len)
 11 | {
 12 |     x[len] = '\0';
 13 |     while (len > 0) {
 14 |         x[--len] = '\x20' + (rand() % ('\x7e' - '\x20' + 1));
 15 |     }
 16 | }
 17 | 
 18 | 
 19 | const size_t n = 100000;  // how many unique strings
 20 | const size_t m_low  = 50;  // minimum length of each string
 21 | const size_t m_high = 500; // maximum length of each string
 22 | const size_t k = 200000;  // number of insertions
 23 | char** xs;
 24 | 
 25 | ahtable_t* T;
 26 | str_map* M;
 27 | 
 28 | 
 29 | void setup()
 30 | {
 31 |     fprintf(stderr, "generating %zu keys ... ", n);
 32 |     xs = malloc(n * sizeof(char*));
 33 |     size_t i;
 34 |     size_t m;
 35 |     for (i = 0; i < n; ++i) {
 36 |         m = m_low + rand() % (m_high - m_low);
 37 |         xs[i] = malloc(m + 1);
 38 |         randstr(xs[i], m);
 39 |     }
 40 | 
 41 |     T = ahtable_create();
 42 |     M = str_map_create();
 43 |     fprintf(stderr, "done.\n");
 44 | }
 45 | 
 46 | 
 47 | void teardown()
 48 | {
 49 |     ahtable_free(T);
 50 |     str_map_destroy(M);
 51 | 
 52 |     size_t i;
 53 |     for (i = 0; i < n; ++i) {
 54 |         free(xs[i]);
 55 |     }
 56 |     free(xs);
 57 | }
 58 | 
 59 | 
 60 | void test_ahtable_insert()
 61 | {
 62 |     fprintf(stderr, "inserting %zu keys ... \n", k);
 63 | 
 64 |     size_t i, j;
 65 |     value_t* u;
 66 |     value_t  v;
 67 | 
 68 |     for (j = 0; j < k; ++j) {
 69 |         i = rand() % n;
 70 | 
 71 | 
 72 |         v = 1 + str_map_get(M, xs[i], strlen(xs[i]));
 73 |         str_map_set(M, xs[i], strlen(xs[i]), v);
 74 | 
 75 | 
 76 |         u = ahtable_get(T, xs[i], strlen(xs[i]));
 77 |         *u += 1;
 78 | 
 79 | 
 80 |         if (*u != v) {
 81 |             fprintf(stderr, "[error] tally mismatch (reported: %lu, correct: %lu)\n",
 82 |                             *u, v);
 83 |         }
 84 |     }
 85 | 
 86 |     fprintf(stderr, "sizeof: %zu\n", ahtable_sizeof(T));
 87 | 
 88 |     /* delete some keys */
 89 |     for (j = 0; i < k/100; ++j) {
 90 |         i = rand() % n;
 91 |         ahtable_del(T, xs[i], strlen(xs[i]));
 92 |         str_map_del(M, xs[i], strlen(xs[i]));
 93 |         u = ahtable_tryget(T, xs[i], strlen(xs[i]));
 94 |         if (u) {
 95 |             fprintf(stderr, "[error] deleted node found in ahtable\n");
 96 |         }
 97 |     }
 98 | 
 99 |     fprintf(stderr, "done.\n");
100 | }
101 | 
102 | 
103 | void test_ahtable_iteration()
104 | {
105 |     fprintf(stderr, "iterating through %zu keys ... \n", k);
106 | 
107 |     ahtable_iter_t* i = ahtable_iter_begin(T, false);
108 | 
109 |     size_t count = 0;
110 |     value_t* u;
111 |     value_t  v;
112 | 
113 |     size_t len;
114 |     const char* key;
115 | 
116 |     while (!ahtable_iter_finished(i)) {
117 |         ++count;
118 | 
119 |         key = ahtable_iter_key(i, &len);
120 |         u   = ahtable_iter_val(i);
121 |         v   = str_map_get(M, key, len);
122 | 
123 |         if (*u != v) {
124 |             if (v == 0) {
125 |                 fprintf(stderr, "[error] incorrect iteration (%lu, %lu)\n", *u, v);
126 |             }
127 |             else {
128 |                 fprintf(stderr, "[error] incorrect iteration tally (%lu, %lu)\n", *u, v);
129 |             }
130 |         }
131 | 
132 |         // this way we will see an error if the same key is iterated through
133 |         // twice
134 |         str_map_set(M, key, len, 0);
135 | 
136 |         ahtable_iter_next(i);
137 |     }
138 | 
139 |     if (count != M->m) {
140 |         fprintf(stderr, "[error] iterated through %zu element, expected %zu\n",
141 |                 count, M->m);
142 |     }
143 | 
144 |     ahtable_iter_free(i);
145 | 
146 |     fprintf(stderr, "done.\n");
147 | }
148 | 
149 | 
150 | int cmpkey(const char* a, size_t ka, const char* b, size_t kb)
151 | {
152 |     int c = memcmp(a, b, ka < kb ? ka : kb);
153 |     return c == 0 ? (int) ka - (int) kb : c;
154 | }
155 | 
156 | 
157 | void test_ahtable_sorted_iteration()
158 | {
159 |     fprintf(stderr, "iterating in order through %zu keys ... \n", k);
160 | 
161 |     ahtable_iter_t* i = ahtable_iter_begin(T, true);
162 | 
163 |     size_t count = 0;
164 |     value_t* u;
165 |     value_t  v;
166 | 
167 |     char* prev_key = malloc(m_high + 1);
168 |     size_t prev_len = 0;
169 | 
170 |     const char *key = NULL;
171 |     size_t len = 0;
172 | 
173 |     while (!ahtable_iter_finished(i)) {
174 |         memcpy(prev_key, key, len);
175 |         prev_len = len;
176 |         ++count;
177 | 
178 |         key = ahtable_iter_key(i, &len);
179 |         if (prev_key != NULL && cmpkey(prev_key, prev_len, key, len) > 0) {
180 |             fprintf(stderr, "[error] iteration is not correctly ordered.\n");
181 |         }
182 | 
183 |         u  = ahtable_iter_val(i);
184 |         v  = str_map_get(M, key, len);
185 | 
186 |         if (*u != v) {
187 |             if (v == 0) {
188 |                 fprintf(stderr, "[error] incorrect iteration (%lu, %lu)\n", *u, v);
189 |             }
190 |             else {
191 |                 fprintf(stderr, "[error] incorrect iteration tally (%lu, %lu)\n", *u, v);
192 |             }
193 |         }
194 | 
195 |         // this way we will see an error if the same key is iterated through
196 |         // twice
197 |         str_map_set(M, key, len, 0);
198 | 
199 |         ahtable_iter_next(i);
200 |     }
201 | 
202 |     ahtable_iter_free(i);
203 |     free(prev_key);
204 | 
205 |     fprintf(stderr, "done.\n");
206 | }
207 | 
208 | 
209 | int main()
210 | {
211 |     setup();
212 |     test_ahtable_insert();
213 |     test_ahtable_iteration();
214 |     teardown();
215 | 
216 |     setup();
217 |     test_ahtable_insert();
218 |     test_ahtable_sorted_iteration();
219 |     teardown();
220 | 
221 |     return 0;
222 | }
223 | 


--------------------------------------------------------------------------------
/hat-trie/test/str_map.c:
--------------------------------------------------------------------------------
  1 | 
  2 | /*
  3 |  * This file is part of fastq-tools.
  4 |  *
  5 |  * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
  6 |  *
  7 |  */
  8 | 
  9 | 
 10 | #include "str_map.h"
 11 | #include "misc.h"
 12 | #include <stdlib.h>
 13 | #include <stdint.h>
 14 | #include <string.h>
 15 | 
 16 | 
 17 | static const size_t INITIAL_TABLE_SIZE = 16;
 18 | static const double MAX_LOAD = 0.77;
 19 | 
 20 | 
 21 | /*
 22 |  * Paul Hsieh's SuperFastHash
 23 |  * http://www.azillionmonkeys.com/qed/hash.html
 24 |  */
 25 | 
 26 | 
 27 | #undef get16bits
 28 | #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \
 29 |     || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__)
 30 | #define get16bits(d) (*((const uint16_t *) (d)))
 31 | #endif
 32 | 
 33 | #if !defined (get16bits)
 34 | #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\
 35 |         +(uint32_t)(((const uint8_t *)(d))[0]) )
 36 | #endif
 37 | 
 38 | static uint32_t hash(const char * data, size_t len) {
 39 |     uint32_t hash = len, tmp;
 40 |     int rem;
 41 | 
 42 |     if (len <= 0 || data == NULL) return 0;
 43 | 
 44 |     rem = len & 3;
 45 |     len >>= 2;
 46 | 
 47 |     /* Main loop */
 48 |     for (;len > 0; len--) {
 49 |         hash  += get16bits (data);
 50 |         tmp    = (get16bits (data+2) << 11) ^ hash;
 51 |         hash   = (hash << 16) ^ tmp;
 52 |         data  += 2*sizeof (uint16_t);
 53 |         hash  += hash >> 11;
 54 |     }
 55 | 
 56 |     /* Handle end cases */
 57 |     switch (rem) {
 58 |         case 3: hash += get16bits (data);
 59 |                 hash ^= hash << 16;
 60 |                 hash ^= data[sizeof (uint16_t)] << 18;
 61 |                 hash += hash >> 11;
 62 |                 break;
 63 |         case 2: hash += get16bits (data);
 64 |                 hash ^= hash << 11;
 65 |                 hash += hash >> 17;
 66 |                 break;
 67 |         case 1: hash += *data;
 68 |                 hash ^= hash << 10;
 69 |                 hash += hash >> 1;
 70 |     }
 71 | 
 72 |     /* Force "avalanching" of final 127 bits */
 73 |     hash ^= hash << 3;
 74 |     hash += hash >> 5;
 75 |     hash ^= hash << 4;
 76 |     hash += hash >> 17;
 77 |     hash ^= hash << 25;
 78 |     hash += hash >> 6;
 79 | 
 80 |     return hash;
 81 | }
 82 | 
 83 | 
 84 | 
 85 | static void rehash(str_map* T, size_t new_n);
 86 | static void clear(str_map*);
 87 | 
 88 | 
 89 | 
 90 | str_map* str_map_create()
 91 | {
 92 |     str_map* T = malloc_or_die(sizeof(str_map));
 93 |     T->A = malloc_or_die(INITIAL_TABLE_SIZE * sizeof(str_map_pair*));
 94 |     memset(T->A, 0, INITIAL_TABLE_SIZE * sizeof(str_map_pair*));
 95 |     T->n = INITIAL_TABLE_SIZE;
 96 |     T->m = 0;
 97 |     T->max_m = T->n * MAX_LOAD;
 98 | 
 99 |     return T;
100 | }
101 | 
102 | 
103 | void str_map_destroy(str_map* T)
104 | {
105 |     if (T != NULL) {
106 |         clear(T);
107 |         free(T->A);
108 |         free(T);
109 |     }
110 | }
111 | 
112 | 
113 | 
114 | void clear(str_map* T)
115 | {
116 |     str_map_pair* u;
117 |     size_t i;
118 |     for (i = 0; i < T->n; i++) {
119 |         while (T->A[i]) {
120 |             u = T->A[i]->next;
121 |             free(T->A[i]->key);
122 |             free(T->A[i]);
123 |             T->A[i] = u;
124 |         }
125 |     }
126 | 
127 |     T->m = 0;
128 | }
129 | 
130 | 
131 | static void insert_without_copy(str_map* T, str_map_pair* V)
132 | {
133 |     uint32_t h = hash(V->key, V->keylen) % T->n;
134 |     V->next = T->A[h];
135 |     T->A[h] = V;
136 |     T->m++;
137 | }
138 | 
139 | 
140 | 
141 | static void rehash(str_map* T, size_t new_n)
142 | {
143 |     str_map U;
144 |     U.n = new_n;
145 |     U.m = 0;
146 |     U.max_m = U.n * MAX_LOAD;
147 |     U.A = malloc_or_die(U.n * sizeof(str_map_pair*));
148 |     memset(U.A, 0, U.n * sizeof(str_map_pair*));
149 | 
150 |     str_map_pair *j, *k;
151 |     size_t i;
152 |     for (i = 0; i < T->n; i++) {
153 |         j = T->A[i];
154 |         while (j) {
155 |             k = j->next;
156 |             insert_without_copy(&U, j);
157 |             j = k;
158 |         }
159 |         T->A[i] = NULL;
160 |     }
161 | 
162 |     free(T->A);
163 |     T->A = U.A;
164 |     T->n = U.n;
165 |     T->max_m = U.max_m;
166 | }
167 | 
168 | 
169 | void str_map_set(str_map* T, const char* key, size_t keylen, value_t value)
170 | {
171 |     if (T->m >= T->max_m) rehash(T, T->n * 2);
172 | 
173 |     uint32_t h = hash(key, keylen) % T->n;
174 | 
175 |     str_map_pair* u = T->A[h];
176 | 
177 |     while (u) {
178 |         if (u->keylen == keylen && memcmp(u->key, key, keylen) == 0) {
179 |             u->value = value;
180 |             return;
181 |         }
182 | 
183 |         u = u->next;
184 |     }
185 | 
186 |     u = malloc_or_die(sizeof(str_map_pair));
187 |     u->key = malloc_or_die(keylen);
188 |     memcpy(u->key, key, keylen);
189 |     u->keylen = keylen;
190 |     u->value  = value;
191 | 
192 |     u->next = T->A[h];
193 |     T->A[h] = u;
194 | 
195 |     T->m++;
196 | }
197 | 
198 | 
199 | value_t str_map_get(const str_map* T, const char* key, size_t keylen)
200 | {
201 |     uint32_t h = hash(key, keylen) % T->n;
202 | 
203 |     str_map_pair* u = T->A[h];
204 | 
205 |     while (u) {
206 |         if (u->keylen == keylen && memcmp(u->key, key, keylen) == 0) {
207 |             return u->value;
208 |         }
209 | 
210 |         u = u->next;
211 |     }
212 | 
213 |     return 0;
214 | }
215 | 
216 | void str_map_del(str_map* T, const char* key, size_t keylen)
217 | {
218 |     uint32_t h = hash(key, keylen) % T->n;
219 | 
220 |     str_map_pair* u = T->A[h];
221 |     str_map_pair* p = NULL;
222 |     while (u) {
223 |         
224 |         if (u->keylen == keylen && memcmp(u->key, key, keylen) == 0) {
225 |             if (p) {
226 |                 p->next = u->next;
227 |             } else {
228 |                 T->A[h] = u->next;
229 |             }
230 |             free(u->key);
231 |             free(u);
232 |             --T->m;
233 |             return;
234 |         }
235 | 
236 |         p = u;
237 |         u = u->next;
238 |     }
239 | 
240 | }
241 | 
242 | 


--------------------------------------------------------------------------------
/hat-trie/test/check_hattrie.c:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <stdlib.h>
  3 | #include <string.h>
  4 | #include <stdio.h>
  5 | 
  6 | #include "str_map.h"
  7 | #include "../src/hat-trie.h"
  8 | 
  9 | /* Simple random string generation. */
 10 | void randstr(char* x, size_t len)
 11 | {
 12 |     x[len] = '\0';
 13 |     while (len > 0) {
 14 |         x[--len] = '\x20' + (rand() % ('\x7e' - '\x20' + 1));
 15 |     }
 16 | }
 17 | 
 18 | const size_t n = 100000;  // how many unique strings
 19 | const size_t m_low  = 50;  // minimum length of each string
 20 | const size_t m_high = 500; // maximum length of each string
 21 | const size_t k = 200000;  // number of insertions
 22 | const size_t d = 50000;
 23 | 
 24 | char** xs;
 25 | char** ds;
 26 | 
 27 | hattrie_t* T;
 28 | str_map* M;
 29 | 
 30 | 
 31 | void setup()
 32 | {
 33 |     fprintf(stderr, "generating %zu keys ... ", n);
 34 |     xs = malloc(n * sizeof(char*));
 35 |     ds = malloc(d * sizeof(char*));
 36 |     size_t i;
 37 |     size_t m;
 38 |     for (i = 0; i < n; ++i) {
 39 |         m = m_low + rand() % (m_high - m_low);
 40 |         xs[i] = malloc(m + 1);
 41 |         randstr(xs[i], m);
 42 |     }
 43 |     for (i = 0; i < d; ++i) {
 44 |         m = rand()%n;
 45 |         ds[i] = xs[m];
 46 |     }
 47 | 
 48 |     T = hattrie_create();
 49 |     M = str_map_create();
 50 |     fprintf(stderr, "done.\n");
 51 | }
 52 | 
 53 | 
 54 | void teardown()
 55 | {
 56 |     hattrie_free(T);
 57 |     str_map_destroy(M);
 58 | 
 59 |     size_t i;
 60 |     for (i = 0; i < n; ++i) {
 61 |         free(xs[i]);
 62 |     }
 63 |     free(xs);
 64 |     free(ds);
 65 | }
 66 | 
 67 | 
 68 | void test_hattrie_insert()
 69 | {
 70 |     fprintf(stderr, "inserting %zu keys ... \n", k);
 71 | 
 72 |     size_t i, j;
 73 |     value_t* u;
 74 |     value_t  v;
 75 | 
 76 |     for (j = 0; j < k; ++j) {
 77 |         i = rand() % n;
 78 | 
 79 | 
 80 |         v = 1 + str_map_get(M, xs[i], strlen(xs[i]));
 81 |         str_map_set(M, xs[i], strlen(xs[i]), v);
 82 | 
 83 | 
 84 |         u = hattrie_get(T, xs[i], strlen(xs[i]));
 85 |         *u += 1;
 86 | 
 87 | 
 88 |         if (*u != v) {
 89 |             fprintf(stderr, "[error] tally mismatch (reported: %lu, correct: %lu)\n",
 90 |                             *u, v);
 91 |         }
 92 |     }
 93 | 
 94 |     fprintf(stderr, "sizeof: %zu\n", hattrie_sizeof(T));
 95 | 
 96 |     fprintf(stderr, "deleting %zu keys ... \n", d);
 97 |     for (j = 0; j < d; ++j) {
 98 |         str_map_del(M, ds[j], strlen(ds[j]));
 99 |         hattrie_del(T, ds[j], strlen(ds[j]));
100 |         u = hattrie_tryget(T, ds[j], strlen(ds[j]));
101 |         if (u) {
102 |             fprintf(stderr, "[error] item %zu still found in trie after delete\n",
103 |                     j);
104 |         }
105 |     }
106 | 
107 |     fprintf(stderr, "done.\n");
108 | }
109 | 
110 | 
111 | 
112 | void test_hattrie_iteration()
113 | {
114 |     fprintf(stderr, "iterating through %zu keys ... \n", k);
115 | 
116 |     hattrie_iter_t* i = hattrie_iter_begin(T, false);
117 | 
118 |     size_t count = 0;
119 |     value_t* u;
120 |     value_t  v;
121 | 
122 |     size_t len;
123 |     const char* key;
124 | 
125 |     while (!hattrie_iter_finished(i)) {
126 |         ++count;
127 | 
128 |         key = hattrie_iter_key(i, &len);
129 |         u   = hattrie_iter_val(i);
130 | 
131 |         v = str_map_get(M, key, len);
132 | 
133 |         if (*u != v) {
134 |             if (v == 0) {
135 |                 fprintf(stderr, "[error] incorrect iteration (%lu, %lu)\n", *u, v);
136 |             }
137 |             else {
138 |                 fprintf(stderr, "[error] incorrect iteration tally (%lu, %lu)\n", *u, v);
139 |             }
140 |         }
141 | 
142 |         // this way we will see an error if the same key is iterated through
143 |         // twice
144 |         str_map_set(M, key, len, 0);
145 | 
146 |         hattrie_iter_next(i);
147 |     }
148 | 
149 |     if (count != M->m) {
150 |         fprintf(stderr, "[error] iterated through %zu element, expected %zu\n",
151 |                 count, M->m);
152 |     }
153 | 
154 |     hattrie_iter_free(i);
155 | 
156 |     fprintf(stderr, "done.\n");
157 | }
158 | 
159 | 
160 | int cmpkey(const char* a, size_t ka, const char* b, size_t kb)
161 | {
162 |     int c = memcmp(a, b, ka < kb ? ka : kb);
163 |     return c == 0 ? (int) ka - (int) kb : c;
164 | }
165 | 
166 | 
167 | void test_hattrie_sorted_iteration()
168 | {
169 |     fprintf(stderr, "iterating in order through %zu keys ... \n", k);
170 | 
171 |     hattrie_iter_t* i = hattrie_iter_begin(T, true);
172 | 
173 |     size_t count = 0;
174 |     value_t* u;
175 |     value_t  v;
176 | 
177 |     char* key_copy = malloc(m_high + 1);
178 |     char* prev_key = malloc(m_high + 1);
179 |     memset(prev_key, 0, m_high + 1);
180 |     size_t prev_len = 0;
181 | 
182 |     const char *key = NULL;
183 |     size_t len = 0;
184 | 
185 |     while (!hattrie_iter_finished(i)) {
186 |         memcpy(prev_key, key_copy, len);
187 |         prev_key[len] = '\0';
188 |         prev_len = len;
189 |         ++count;
190 | 
191 |         key = hattrie_iter_key(i, &len);
192 | 
193 |         /* memory for key may be changed on iter, copy it */
194 |         strncpy(key_copy, key, len);
195 | 
196 |         if (prev_key != NULL && cmpkey(prev_key, prev_len, key, len) > 0) {
197 |             fprintf(stderr, "[error] iteration is not correctly ordered.\n");
198 |         }
199 | 
200 |         u = hattrie_iter_val(i);
201 |         v = str_map_get(M, key, len);
202 | 
203 |         if (*u != v) {
204 |             if (v == 0) {
205 |                 fprintf(stderr, "[error] incorrect iteration (%lu, %lu)\n", *u, v);
206 |             }
207 |             else {
208 |                 fprintf(stderr, "[error] incorrect iteration tally (%lu, %lu)\n", *u, v);
209 |             }
210 |         }
211 | 
212 |         // this way we will see an error if the same key is iterated through
213 |         // twice
214 |         str_map_set(M, key, len, 0);
215 | 
216 |         hattrie_iter_next(i);
217 |     }
218 | 
219 |     if (count != M->m) {
220 |         fprintf(stderr, "[error] iterated through %zu element, expected %zu\n",
221 |                 count, M->m);
222 |     }
223 | 
224 |     hattrie_iter_free(i);
225 |     free(prev_key);
226 |     free(key_copy);
227 | 
228 |     fprintf(stderr, "done.\n");
229 | }
230 | 
231 | 
232 | void test_trie_non_ascii()
233 | {
234 |     fprintf(stderr, "checking non-ascii... \n");
235 | 
236 |     value_t* u;
237 |     hattrie_t* T = hattrie_create();
238 |     char* txt = "\x81\x70";
239 | 
240 |     u = hattrie_get(T, txt, strlen(txt));
241 |     *u = 10;
242 | 
243 |     u = hattrie_tryget(T, txt, strlen(txt));
244 |     if (*u != 10){
245 |         fprintf(stderr, "can't store non-ascii strings\n");
246 |     }
247 |     hattrie_free(T);
248 | 
249 |     fprintf(stderr, "done.\n");
250 | }
251 | 
252 | 
253 | 
254 | 
255 | int main()
256 | {
257 |     test_trie_non_ascii();
258 | 
259 |     setup();
260 |     test_hattrie_insert();
261 |     test_hattrie_iteration();
262 |     teardown();
263 | 
264 |     setup();
265 |     test_hattrie_insert();
266 |     test_hattrie_sorted_iteration();
267 |     teardown();
268 | 
269 |     return 0;
270 | }
271 | 


--------------------------------------------------------------------------------
/src/hat_trie.pyx:
--------------------------------------------------------------------------------
  1 | # cython: profile=True
  2 | 
  3 | from libc.math cimport NAN
  4 | from chat_trie cimport *
  5 | 
  6 | cimport cpython
  7 | 
  8 | cdef class BaseTrie:
  9 |     """
 10 |     Base HAT-Trie wrapper.
 11 |     """
 12 | 
 13 |     cdef hattrie_t* _trie
 14 | 
 15 |     def __cinit__(self):
 16 |         self._trie = hattrie_create()
 17 | 
 18 |     def __dealloc__(self):
 19 |         if self._trie:
 20 |             hattrie_free(self._trie)
 21 | 
 22 | 
 23 |     def __getitem__(self, bytes key):
 24 |         return self._getitem(key)
 25 | 
 26 |     def __setitem__(self, bytes key, int value):
 27 |         self._setitem(key, value)
 28 | 
 29 |     def __contains__(self, bytes key):
 30 |         return self._contains(key)
 31 | 
 32 |     def __len__(self):
 33 |         return hattrie_size(self._trie)
 34 | 
 35 |     def get(self, bytes key, value=-1):
 36 |         try:
 37 |             return self._getitem(key)
 38 |         except KeyError:
 39 |             return value
 40 | 
 41 |     def setdefault(self, bytes key, int value):
 42 |         return self._setdefault(key, value)
 43 | 
 44 |     def keys(self):
 45 |         return list(self.iterkeys())
 46 | 
 47 |     def iterkeys(self):
 48 |         cdef:
 49 |             hattrie_iter_t* it = hattrie_iter_begin(self._trie, 0)
 50 |             char* c_key
 51 |             size_t val
 52 |             size_t length
 53 |             bytes py_str
 54 | 
 55 |         try:
 56 |             while not hattrie_iter_finished(it):
 57 |                 c_key = hattrie_iter_key(it, &length)
 58 |                 py_str = c_key[:length]
 59 |                 yield py_str
 60 |                 hattrie_iter_next(it)
 61 | 
 62 |         finally:
 63 |             hattrie_iter_free(it)
 64 | 
 65 | 
 66 |     cdef value_t _getitem(self, char* key) except -1:
 67 |         cdef value_t* value_ptr = hattrie_tryget(self._trie, key, len(key))
 68 |         if value_ptr == NULL:
 69 |             raise KeyError(key)
 70 |         return value_ptr[0]
 71 | 
 72 |     cdef void _setitem(self, char* key, value_t value):
 73 |         hattrie_get(self._trie, key, len(key))[0] = value
 74 | 
 75 |     cdef value_t _setdefault(self, char* key, value_t value):
 76 |         cdef value_t* value_ptr = hattrie_tryget(self._trie, key, len(key))
 77 |         if value_ptr == NULL:
 78 |             self._setitem(key, value)
 79 |             return value
 80 |         return value_ptr[0]
 81 | 
 82 |     cdef bint _contains(self, char* key):
 83 |         cdef value_t* value_ptr = hattrie_tryget(self._trie, key, len(key))
 84 |         return value_ptr != NULL
 85 | 
 86 | 
 87 | cdef class IntTrie(BaseTrie):
 88 |     """
 89 |     HAT-Trie with unicode support that stores int as value.
 90 |     """
 91 | 
 92 |     # XXX: Internal encoding is hardcoded as UTF8. This is the fastest
 93 |     # encoding that can handle all unicode symbols and doesn't have
 94 |     # zero bytes.
 95 | 
 96 |     # This may seem sub-optimal because it is multibyte encoding;
 97 |     # single-byte language-specific encoding (such as cp1251)
 98 |     # seems to be faster. But this is not the case because:
 99 | 
100 |     # 1) the bottleneck of this wrapper is string encoding, not trie traversal;
101 |     # 2) python's unicode encoding utilities are optimized for utf8;
102 |     # 3) users will have to select language-specific encoding for the trie;
103 |     # 4) non-hardcoded encoding causes extra overhead and prevents cython
104 |     #    optimizations.
105 | 
106 |     # That's why hardcoded utf8 is up to 9 times faster than configurable cp1251.
107 | 
108 |     # XXX: char-walking utilities may become tricky with multibyte
109 |     # internal encoding.
110 | 
111 |     def __getitem__(self, unicode key):
112 |         cdef bytes bkey = key.encode('utf8')
113 |         return self._getitem(bkey)
114 | 
115 |     def __contains__(self, unicode key):
116 |         cdef bytes bkey = key.encode('utf8')
117 |         return self._contains(bkey)
118 | 
119 |     def __setitem__(self, unicode key, int value):
120 |         cdef bytes bkey = key.encode('utf8')
121 |         self._setitem(bkey, value)
122 | 
123 |     def get(self, unicode key, value=-1):
124 |         cdef bytes bkey = key.encode('utf8')
125 |         try:
126 |             return self._getitem(bkey)
127 |         except KeyError:
128 |             return value
129 | 
130 |     def setdefault(self, unicode key, int value):
131 |         cdef bytes bkey = key.encode('utf8')
132 |         return self._setdefault(bkey, value)
133 | 
134 |     def iterkeys(self):
135 |         for key in BaseTrie.iterkeys(self):
136 |             yield key.decode('utf8')
137 | 
138 | 
139 | cdef class FloatTrie(BaseTrie):
140 |     """
141 |     HAT-Trie with unicode support that stores float as value.
142 |     """
143 | 
144 |     # XXX: uintptr_t is interpreted as a float32. This should work on all
145 |     # systems with 32-bit or larger pointers, e.g. the majority of modern
146 |     # computers. This will likely not work on embedded 8- and 16-bit
147 |     # systems.
148 | 
149 |     def __getitem__(self, unicode key):
150 |         cdef bytes bkey = key.encode('utf8')
151 |         return self._fromvalue(self._getitem(bkey))
152 | 
153 |     def __contains__(self, unicode key):
154 |         cdef bytes bkey = key.encode('utf8')
155 |         return self._contains(bkey)
156 | 
157 |     def __setitem__(self, unicode key, float value):
158 |         cdef bytes bkey = key.encode('utf8')
159 |         self._setitem(bkey, self._tovalue(value))
160 | 
161 |     def get(self, unicode key, value=float('nan')):
162 |         cdef bytes bkey = key.encode('utf8')
163 |         try:
164 |             return self._fromvalue(self._getitem(bkey))
165 |         except KeyError:
166 |             return value
167 | 
168 |     def setdefault(self, unicode key, float value):
169 |         cdef bytes bkey = key.encode('utf8')
170 |         return self._fromvalue(self._setdefault(bkey, self._tovalue(value)))
171 | 
172 |     def iterkeys(self):
173 |         for key in BaseTrie.iterkeys(self):
174 |             yield key.decode('utf8')
175 | 
176 |     cdef float _fromvalue(self, value_t value):
177 |         cdef float* float_ptr = <float *> &value
178 |         return float_ptr[0]
179 | 
180 |     cdef value_t _tovalue(self, float value):
181 |         cdef value_t* value_ptr = <value_t *> &value
182 |         return value_ptr[0]
183 | 
184 | 
185 | cdef class Trie(BaseTrie):
186 |     """
187 |     HAT-Trie with unicode support and arbitrary values.
188 |     """
189 | 
190 |     # XXX: Internal encoding is hardcoded as UTF8. See note in IntTrie
191 |     # for more details.
192 | 
193 |     def __dealloc__(self):
194 |         cdef hattrie_iter_t* it = hattrie_iter_begin(self._trie, 0)
195 |         cdef cpython.PyObject *o
196 | 
197 |         try:
198 |             while not hattrie_iter_finished(it):
199 |                 o = <cpython.PyObject *> hattrie_iter_val(it)[0]
200 |                 cpython.Py_XDECREF(o)
201 |                 hattrie_iter_next(it)
202 | 
203 |         finally:
204 |             hattrie_iter_free(it)
205 | 
206 | 
207 |     def __getitem__(self, unicode key):
208 |         cdef bytes bkey = key.encode('utf8')
209 |         return self._fromvalue(self._getitem(bkey))
210 | 
211 |     def __contains__(self, unicode key):
212 |         cdef bytes bkey = key.encode('utf8')
213 |         return self._contains(bkey)
214 | 
215 |     def __setitem__(self, unicode key, value):
216 |         cdef bytes bkey = key.encode('utf8')
217 |         self._setitem(bkey, self._tovalue(value))
218 | 
219 |     def get(self, unicode key, value=None):
220 |         cdef bytes bkey = key.encode('utf8')
221 |         try:
222 |             return self._fromvalue(self._getitem(bkey))
223 |         except KeyError:
224 |             return value
225 | 
226 |     def setdefault(self, unicode key, value):
227 |         cdef bytes bkey = key.encode('utf8')
228 |         return self._setdefault(bkey, self._tovalue(value))
229 | 
230 |     def iterkeys(self):
231 |         for key in BaseTrie.iterkeys(self):
232 |             yield key.decode('utf8')
233 | 
234 |     cdef void _setitem(self, char* key, value_t value):
235 |         cdef cpython.PyObject *o
236 |         cdef value_t* value_ptr = hattrie_tryget(self._trie, key, len(key))
237 |         if value_ptr != NULL:
238 |             o = <cpython.PyObject *> value_ptr[0]
239 |             cpython.Py_XDECREF(o)
240 |         hattrie_get(self._trie, key, len(key))[0] = value
241 | 
242 |     cdef object _fromvalue(self, value_t value):
243 |         cdef cpython.PyObject *o
244 |         o = <cpython.PyObject *> value
245 |         return <object> o
246 | 
247 |     cdef value_t _tovalue(self, object obj):
248 |         cdef cpython.PyObject *o
249 |         o = <cpython.PyObject *> obj
250 |         cpython.Py_XINCREF(o)
251 |         return <value_t> o
252 | 


--------------------------------------------------------------------------------
/bench/speed.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # -*- coding: utf-8 -*-
  3 | from __future__ import absolute_import, unicode_literals, division
  4 | import random
  5 | import string
  6 | import timeit
  7 | import os
  8 | import zipfile
  9 | #import pstats
 10 | #import cProfile
 11 | 
 12 | import hat_trie
 13 | 
 14 | def words100k():
 15 |     zip_name = os.path.join(
 16 |         os.path.abspath(os.path.dirname(__file__)),
 17 |         'words100k.txt.zip'
 18 |     )
 19 |     zf = zipfile.ZipFile(zip_name)
 20 |     txt = zf.open(zf.namelist()[0]).read().decode('utf8')
 21 |     return txt.splitlines()
 22 | 
 23 | def random_words(num):
 24 |     russian = 'абвгдеёжзиклмнопрстуфхцчъыьэюя'
 25 |     alphabet = russian + string.ascii_letters
 26 |     return [
 27 |         "".join([random.choice(alphabet) for x in range(random.randint(1,15))])
 28 |         for y in range(num)
 29 |     ]
 30 | 
 31 | def truncated_words(words):
 32 |     return [word[:3] for word in words]
 33 | 
 34 | def prefixes1k(words, prefix_len):
 35 |     words = [w for w in words if len(w) >= prefix_len]
 36 |     every_nth = int(len(words)/1000)
 37 |     _words = [w[:prefix_len] for w in words[::every_nth]]
 38 |     return _words[:1000]
 39 | 
 40 | WORDS100k = words100k()
 41 | MIXED_WORDS100k = truncated_words(WORDS100k)
 42 | NON_WORDS100k = random_words(100000)
 43 | PREFIXES_3_1k = prefixes1k(WORDS100k, 3)
 44 | PREFIXES_5_1k = prefixes1k(WORDS100k, 5)
 45 | PREFIXES_8_1k = prefixes1k(WORDS100k, 8)
 46 | PREFIXES_15_1k = prefixes1k(WORDS100k, 15)
 47 | 
 48 | 
 49 | def format_result(key, value, text_width):
 50 |     key = key.ljust(text_width)
 51 |     print("    %s %s" % (key, value))
 52 | 
 53 | 
 54 | def bench(name, timer, descr='M ops/sec', op_count=0.1, repeats=3, runs=5,
 55 |           text_width=28):
 56 |     try:
 57 |         times = []
 58 |         for x in range(runs):
 59 |             times.append(timer.timeit(repeats))
 60 | 
 61 |         def op_time(time):
 62 |             return op_count*repeats / time
 63 | 
 64 |         val = "%0.3f%s" % (op_time(min(times)), descr)
 65 |         format_result(name, val, text_width)
 66 |     except (AttributeError, TypeError) as e:
 67 |         format_result(name, "not supported", text_width)
 68 | 
 69 | 
 70 | def create_trie():
 71 |     words = words100k()
 72 |     trie = hat_trie.Trie()
 73 |     for word in words:
 74 |         trie[word] = 1
 75 |     return trie
 76 | 
 77 | def benchmark():
 78 |     print('\n====== Benchmarks (100k unique unicode words) =======\n')
 79 | 
 80 |     tests = [
 81 |         ('__getitem__ (hits)', "for word in words: data[word]", 'M ops/sec', 0.1, 3),
 82 |         ('__contains__ (hits)', "for word in words: word in data", 'M ops/sec', 0.1, 3),
 83 |         ('__contains__ (misses)', "for word in NON_WORDS100k: word in data", 'M ops/sec', 0.1, 3),
 84 |         ('__len__', 'len(data)', ' ops/sec', 1, 3),
 85 |         ('__setitem__ (updates)', 'for word in words: data[word]=1', 'M ops/sec',0.1, 3),
 86 |         ('__setitem__ (inserts)', 'for word in NON_WORDS_10k: data[word]=1', 'M ops/sec',0.01, 3),
 87 |         ('setdefault (updates)', 'for word in words: data.setdefault(word, 1)', 'M ops/sec', 0.1, 3),
 88 |         ('setdefault (inserts)', 'for word in  NON_WORDS_10k: data.setdefault(word, 1)', 'M ops/sec', 0.01, 3),
 89 | #        ('items()', 'list(data.items())', ' ops/sec', 1, 1),
 90 |         ('keys()', 'list(data.keys())', ' ops/sec', 1, 1),
 91 | #        ('values()', 'list(data.values())', ' ops/sec', 1, 1),
 92 |     ]
 93 | 
 94 |     common_setup = """
 95 | from __main__ import create_trie, WORDS100k, NON_WORDS100k, MIXED_WORDS100k
 96 | from __main__ import PREFIXES_3_1k, PREFIXES_5_1k, PREFIXES_8_1k, PREFIXES_15_1k
 97 | words = WORDS100k
 98 | NON_WORDS_10k = NON_WORDS100k[:10000]
 99 | NON_WORDS_1k = ['ыва', 'xyz', 'соы', 'Axx', 'avы']*200
100 | """
101 |     dict_setup = common_setup + 'data = dict((word, 1) for word in words);'
102 |     trie_setup = common_setup + 'data = create_trie();'
103 | 
104 |     for test_name, test, descr, op_count, repeats in tests:
105 |         t_dict = timeit.Timer(test, dict_setup)
106 |         t_trie = timeit.Timer(test, trie_setup)
107 | 
108 |         bench('dict '+test_name, t_dict, descr, op_count, repeats)
109 |         bench('trie '+test_name, t_trie, descr, op_count, repeats)
110 | 
111 | 
112 |     # trie-specific benchmarks
113 | 
114 | #    bench(
115 | #        'trie.iter_prefix_items (hits)',
116 | #        timeit.Timer(
117 | #            "for word in words:\n"
118 | #            "   for it in data.iter_prefix_items(word):\n"
119 | #            "       pass",
120 | #            trie_setup
121 | #        ),
122 | #    )
123 | #
124 | #    bench(
125 | #        'trie.prefix_items (hits)',
126 | #        timeit.Timer(
127 | #            "for word in words: data.prefix_items(word)",
128 | #            trie_setup
129 | #        )
130 | #    )
131 | #
132 | #    bench(
133 | #        'trie.prefix_items loop (hits)',
134 | #        timeit.Timer(
135 | #            "for word in words:\n"
136 | #            "    for it in data.prefix_items(word):pass",
137 | #            trie_setup
138 | #        )
139 | #    )
140 | #
141 | #    bench(
142 | #        'trie.iter_prefixes (hits)',
143 | #        timeit.Timer(
144 | #            "for word in words:\n"
145 | #            "   for it in data.iter_prefixes(word): pass",
146 | #            trie_setup
147 | #        )
148 | #    )
149 | #
150 | #    bench(
151 | #        'trie.iter_prefixes (misses)',
152 | #        timeit.Timer(
153 | #            "for word in NON_WORDS100k:\n"
154 | #            "   for it in data.iter_prefixes(word): pass",
155 | #            trie_setup
156 | #        )
157 | #    )
158 | #
159 | #    bench(
160 | #        'trie.iter_prefixes (mixed)',
161 | #        timeit.Timer(
162 | #            "for word in MIXED_WORDS100k:\n"
163 | #            "   for it in data.iter_prefixes(word): pass",
164 | #            trie_setup
165 | #        )
166 | #    )
167 | #
168 | #    bench(
169 | #        'trie.has_keys_with_prefix (hits)',
170 | #        timeit.Timer(
171 | #            "for word in words: data.has_keys_with_prefix(word)",
172 | #            trie_setup
173 | #        )
174 | #    )
175 | #
176 | #    bench(
177 | #        'trie.has_keys_with_prefix (misses)',
178 | #        timeit.Timer(
179 | #            "for word in NON_WORDS100k: data.has_keys_with_prefix(word)",
180 | #            trie_setup
181 | #        )
182 | #    )
183 | #
184 | #    for meth in ('longest_prefix', 'longest_prefix_item'):
185 | #        bench(
186 | #            'trie.%s (hits)' % meth,
187 | #            timeit.Timer(
188 | #                "for word in words: data.%s(word)" % meth,
189 | #                trie_setup
190 | #            )
191 | #        )
192 | #
193 | #        bench(
194 | #            'trie.%s (misses)' % meth,
195 | #            timeit.Timer(
196 | #                "for word in NON_WORDS100k: data.%s(word, default=None)" % meth,
197 | #                trie_setup
198 | #            )
199 | #        )
200 | #
201 | #        bench(
202 | #            'trie.%s (mixed)' % meth,
203 | #            timeit.Timer(
204 | #                "for word in MIXED_WORDS100k: data.%s(word, default=None)" % meth,
205 | #                trie_setup
206 | #            )
207 | #        )
208 | #
209 | #
210 | #    prefix_data = [
211 | #        ('xxx', 'avg_len(res)==415', 'PREFIXES_3_1k'),
212 | #        ('xxxxx', 'avg_len(res)==17', 'PREFIXES_5_1k'),
213 | #        ('xxxxxxxx', 'avg_len(res)==3', 'PREFIXES_8_1k'),
214 | #        ('xxxxx..xx', 'avg_len(res)==1.4', 'PREFIXES_15_1k'),
215 | #        ('xxx', 'NON_EXISTING', 'NON_WORDS_1k'),
216 | #    ]
217 | #    for xxx, avg, data in prefix_data:
218 | #        for meth in ('items', 'keys', 'values'):
219 | #            bench(
220 | #                'trie.%s(prefix="%s"), %s' % (meth, xxx, avg),
221 | #                timeit.Timer(
222 | #                    "for word in %s: data.%s(word)" % (data, meth),
223 | #                    trie_setup
224 | #                ),
225 | #                'K ops/sec',
226 | #                op_count=1,
227 | #            )
228 | 
229 | def check_trie(trie, words):
230 |     value = 0
231 |     for word in words:
232 |         value += trie[word]
233 |     if value != len(words):
234 |         raise Exception()
235 | 
236 | def profiling():
237 |     import pstats
238 |     import cProfile
239 |     print('\n====== Profiling =======\n')
240 |     trie = create_trie()
241 |     WORDS = words100k()
242 | 
243 | #    def check_prefixes(trie, words):
244 | #        for word in words:
245 | #            trie.keys(word)
246 | #    cProfile.runctx("check_prefixes(trie, NON_WORDS_1k)", globals(), locals(), "Profile.prof")
247 | #
248 |     cProfile.runctx("check_trie(trie, WORDS)", globals(), locals(), "Profile.prof")
249 | 
250 |     s = pstats.Stats("Profile.prof")
251 |     s.strip_dirs().sort_stats("time").print_stats(20)
252 | 
253 | #def memory():
254 | #    gc.collect()
255 | #    _memory = lambda: _get_memory(os.getpid())
256 | #    initial_memory = _memory()
257 | #    trie = create_trie()
258 | #    gc.collect()
259 | #    trie_memory = _memory()
260 | #
261 | #    del trie
262 | #    gc.collect()
263 | #    alphabet, words = words100k()
264 | #    words_dict = dict((word, 1) for word in words)
265 | #    del alphabet
266 | #    del words
267 | #    gc.collect()
268 | #
269 | #    dict_memory = _memory()
270 | #    print('initial: %s, trie: +%s, dict: +%s' % (
271 | #        initial_memory,
272 | #        trie_memory-initial_memory,
273 | #        dict_memory-initial_memory,
274 | #    ))
275 | 
276 | if __name__ == '__main__':
277 | #    trie = create_trie()
278 | #    def check_pref(prefixes):
279 | #        cntr = 0
280 | #        for w in prefixes:
281 | #            cntr += len(trie.keys(w))
282 | #        print(len(prefixes), cntr, cntr / len(prefixes))
283 | #    check_pref(prefixes1k(WORDS100k, 15))
284 | 
285 | 
286 |     benchmark()
287 |     #profiling()
288 |     #memory()
289 |     print('\n~~~~~~~~~~~~~~\n')


--------------------------------------------------------------------------------
/hat-trie/src/ahtable.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This file is part of hat-trie.
  3 |  *
  4 |  * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
  5 |  *
  6 |  * See ahtable.h for description of the Array Hash Table.
  7 |  *
  8 |  */
  9 | 
 10 | #include "ahtable.h"
 11 | #include "misc.h"
 12 | #include "murmurhash3.h"
 13 | #include <assert.h>
 14 | #include <string.h>
 15 | 
 16 | const double ahtable_max_load_factor = 100000.0; /* arbitrary large number => don't resize */
 17 | const size_t ahtable_initial_size = 4096;
 18 | 
 19 | static size_t keylen(slot_t s) {
 20 |     if (0x1 & *s) {
 21 |         return (size_t) (*((uint16_t*) s) >> 1);
 22 |     }
 23 |     else {
 24 |         return (size_t) (*s >> 1);
 25 |     }
 26 | }
 27 | 
 28 | 
 29 | ahtable_t* ahtable_create()
 30 | {
 31 |     return ahtable_create_n(ahtable_initial_size);
 32 | }
 33 | 
 34 | 
 35 | ahtable_t* ahtable_create_n(size_t n)
 36 | {
 37 |     ahtable_t* table = malloc_or_die(sizeof(ahtable_t));
 38 |     table->flag = 0;
 39 |     table->c0 = table->c1 = '\0';
 40 | 
 41 |     table->n = n;
 42 |     table->m = 0;
 43 |     table->max_m = (size_t) (ahtable_max_load_factor * (double) table->n);
 44 |     table->slots = malloc_or_die(n * sizeof(slot_t));
 45 |     memset(table->slots, 0, n * sizeof(slot_t));
 46 | 
 47 |     table->slot_sizes = malloc_or_die(n * sizeof(size_t));
 48 |     memset(table->slot_sizes, 0, n * sizeof(size_t));
 49 | 
 50 |     return table;
 51 | }
 52 | 
 53 | 
 54 | void ahtable_free(ahtable_t* table)
 55 | {
 56 |     if (table == NULL) return;
 57 |     size_t i;
 58 |     for (i = 0; i < table->n; ++i) free(table->slots[i]);
 59 |     free(table->slots);
 60 |     free(table->slot_sizes);
 61 |     free(table);
 62 | }
 63 | 
 64 | 
 65 | size_t ahtable_size(const ahtable_t* table)
 66 | {
 67 |     return table->m;
 68 | }
 69 | 
 70 | 
 71 | size_t ahtable_sizeof(const ahtable_t* table)
 72 | {
 73 |     size_t nbytes = sizeof(ahtable_t) +
 74 |                     table->n * (sizeof(size_t) + sizeof(slot_t));
 75 |     size_t i;
 76 |     for (i = 0; i < table->n; ++i) {
 77 |         nbytes += table->slot_sizes[i];
 78 |     }
 79 |     return nbytes;
 80 | }
 81 | 
 82 | 
 83 | void ahtable_clear(ahtable_t* table)
 84 | {
 85 |     size_t i;
 86 |     for (i = 0; i < table->n; ++i) free(table->slots[i]);
 87 |     table->n = ahtable_initial_size;
 88 |     table->slots = realloc_or_die(table->slots, table->n * sizeof(slot_t));
 89 |     memset(table->slots, 0, table->n * sizeof(slot_t));
 90 | 
 91 |     table->slot_sizes = realloc_or_die(table->slot_sizes, table->n * sizeof(size_t));
 92 |     memset(table->slot_sizes, 0, table->n * sizeof(size_t));
 93 | }
 94 | 
 95 | /** Inserts a key with value into slot s, and returns a pointer to the
 96 |   * space immediately after.
 97 |   */
 98 | static slot_t ins_key(slot_t s, const char* key, size_t len, value_t** val)
 99 | {
100 |     // key length
101 |     if (len < 128) {
102 |         s[0] = (unsigned char) (len << 1);
103 |         s += 1;
104 |     }
105 |     else {
106 |         /* The least significant bit is set to indicate that two bytes are
107 |          * being used to store the key length. */
108 |         *((uint16_t*) s) = ((uint16_t) len << 1) | 0x1;
109 |         s += 2;
110 |     }
111 | 
112 |     // key
113 |     memcpy(s, key, len * sizeof(unsigned char));
114 |     s += len;
115 | 
116 |     // value
117 |     *val = (value_t*) s;
118 |     **val = 0;
119 |     s += sizeof(value_t);
120 | 
121 |     return s;
122 | }
123 | 
124 | 
125 | static void ahtable_expand(ahtable_t* table)
126 | {
127 |     /* Resizing a table is essentially building a brand new one.
128 |      * One little shortcut we can take on the memory allocation front is to
129 |      * figure out how much memory each slot needs in advance.
130 |      */
131 |     assert(table->n > 0);
132 |     size_t new_n = 2 * table->n;
133 |     size_t* slot_sizes = malloc_or_die(new_n * sizeof(size_t));
134 |     memset(slot_sizes, 0, new_n * sizeof(size_t));
135 | 
136 |     const char* key;
137 |     size_t len = 0;
138 |     size_t m = 0;
139 |     ahtable_iter_t* i = ahtable_iter_begin(table, false);
140 |     while (!ahtable_iter_finished(i)) {
141 |         key = ahtable_iter_key(i, &len);
142 |         slot_sizes[hash(key, len) % new_n] +=
143 |             len + sizeof(value_t) + (len >= 128 ? 2 : 1);
144 | 
145 |         ++m;
146 |         ahtable_iter_next(i);
147 |     }
148 |     assert(m == table->m);
149 |     ahtable_iter_free(i);
150 | 
151 | 
152 |     /* allocate slots */
153 |     slot_t* slots = malloc_or_die(new_n * sizeof(slot_t));
154 |     size_t j;
155 |     for (j = 0; j < new_n; ++j) {
156 |         if (slot_sizes[j] > 0) {
157 |             slots[j] = malloc_or_die(slot_sizes[j]);
158 |         }
159 |         else slots[j] = NULL;
160 |     }
161 | 
162 |     /* rehash values. A few shortcuts can be taken here as well, as we know
163 |      * there will be no collisions. Instead of the regular insertion routine,
164 |      * we keep track of the ends of every slot and simply insert keys.
165 |      * */
166 |     slot_t* slots_next = malloc_or_die(new_n * sizeof(slot_t));
167 |     memcpy(slots_next, slots, new_n * sizeof(slot_t));
168 |     size_t h;
169 |     m = 0;
170 |     value_t* u;
171 |     value_t* v;
172 |     i = ahtable_iter_begin(table, false);
173 |     while (!ahtable_iter_finished(i)) {
174 | 
175 |         key = ahtable_iter_key(i, &len);
176 |         h = hash(key, len) % new_n;
177 | 
178 |         slots_next[h] = ins_key(slots_next[h], key, len, &u);
179 |         v = ahtable_iter_val(i);
180 |         *u = *v;
181 | 
182 |         ++m;
183 |         ahtable_iter_next(i);
184 |     }
185 |     assert(m == table->m);
186 |     ahtable_iter_free(i);
187 | 
188 | 
189 |     free(slots_next);
190 |     for (j = 0; j < table->n; ++j) free(table->slots[j]);
191 | 
192 |     free(table->slots);
193 |     table->slots = slots;
194 | 
195 |     free(table->slot_sizes);
196 |     table->slot_sizes = slot_sizes;
197 | 
198 |     table->n = new_n;
199 |     table->max_m = (size_t) (ahtable_max_load_factor * (double) table->n);
200 | }
201 | 
202 | 
203 | static value_t* get_key(ahtable_t* table, const char* key, size_t len, bool insert_missing)
204 | {
205 |     /* if we are at capacity, preemptively resize */
206 |     if (insert_missing && table->m >= table->max_m) {
207 |         ahtable_expand(table);
208 |     }
209 | 
210 | 
211 |     uint32_t i = hash(key, len) % table->n;
212 |     size_t k;
213 |     slot_t s;
214 |     value_t* val;
215 | 
216 |     /* search the array for our key */
217 |     s = table->slots[i];
218 |     while ((size_t) (s - table->slots[i]) < table->slot_sizes[i]) {
219 |         /* get the key length */
220 |         k = keylen(s);
221 |         s += k < 128 ? 1 : 2;
222 | 
223 |         /* skip keys that are longer than ours */
224 |         if (k != len) {
225 |             s += k + sizeof(value_t);
226 |             continue;
227 |         }
228 | 
229 |         /* key found. */
230 |         if (memcmp(s, key, len) == 0) {
231 |             return (value_t*) (s + len);
232 |         }
233 |         /* key not found. */
234 |         else {
235 |             s += k + sizeof(value_t);
236 |             continue;
237 |         }
238 |     }
239 | 
240 | 
241 |     if (insert_missing) {
242 |         /* the key was not found, so we must insert it. */
243 |         size_t new_size = table->slot_sizes[i];
244 |         new_size += 1 + (len >= 128 ? 1 : 0);    // key length
245 |         new_size += len * sizeof(unsigned char); // key
246 |         new_size += sizeof(value_t);             // value
247 | 
248 |         table->slots[i] = realloc_or_die(table->slots[i], new_size);
249 | 
250 |         ++table->m;
251 |         ins_key(table->slots[i] + table->slot_sizes[i], key, len, &val);
252 |         table->slot_sizes[i] = new_size;
253 | 
254 |         return val;
255 |     }
256 |     else return NULL;
257 | }
258 | 
259 | 
260 | value_t* ahtable_get(ahtable_t* table, const char* key, size_t len)
261 | {
262 |     return get_key(table, key, len, true);
263 | }
264 | 
265 | 
266 | value_t* ahtable_tryget(ahtable_t* table, const char* key, size_t len )
267 | {
268 |     return get_key(table, key, len, false);
269 | }
270 | 
271 | 
272 | int ahtable_del(ahtable_t* table, const char* key, size_t len)
273 | {
274 |     uint32_t i = hash(key, len) % table->n;
275 |     size_t k;
276 |     slot_t s;
277 | 
278 |     /* search the array for our key */
279 |     s = table->slots[i];
280 |     while ((size_t) (s - table->slots[i]) < table->slot_sizes[i]) {
281 |         /* get the key length */
282 |         k = keylen(s);
283 |         s += k < 128 ? 1 : 2;
284 | 
285 |         /* skip keys that are longer than ours */
286 |         if (k != len) {
287 |             s += k + sizeof(value_t);
288 |             continue;
289 |         }
290 | 
291 |         /* key found. */
292 |         if (memcmp(s, key, len) == 0) {
293 |             /* move everything over, resize the array */
294 |             unsigned char* t = s + len + sizeof(value_t);
295 |             s -= k < 128 ? 1 : 2;
296 |             memmove(s, t, table->slot_sizes[i] - (size_t) (t - table->slots[i]));
297 |             table->slot_sizes[i] -= (size_t) (t - s);
298 |             --table->m;
299 |             return 0;
300 |         }
301 |         /* key not found. */
302 |         else {
303 |             s += k + sizeof(value_t);
304 |             continue;
305 |         }
306 |     }
307 | 
308 |     // Key was not found. Do nothing.
309 |     return -1;
310 | }
311 | 
312 | 
313 | 
314 | static int cmpkey(const void* a_, const void* b_)
315 | {
316 |     slot_t a = *(slot_t*) a_;
317 |     slot_t b = *(slot_t*) b_;
318 | 
319 |     size_t ka = keylen(a), kb = keylen(b);
320 | 
321 |     a += ka < 128 ? 1 : 2;
322 |     b += kb < 128 ? 1 : 2;
323 | 
324 |     int c = memcmp(a, b, ka < kb ? ka : kb);
325 |     return c == 0 ? (int) ka - (int) kb : c;
326 | }
327 | 
328 | 
329 | /* Sorted/unsorted iterators are kept private and exposed by passing the
330 | sorted flag to ahtable_iter_begin. */
331 | 
332 | typedef struct ahtable_sorted_iter_t_
333 | {
334 |     const ahtable_t* table; // parent
335 |     slot_t* xs; // pointers to keys
336 |     size_t i; // current key
337 | } ahtable_sorted_iter_t;
338 | 
339 | 
340 | static ahtable_sorted_iter_t* ahtable_sorted_iter_begin(const ahtable_t* table)
341 | {
342 |     ahtable_sorted_iter_t* i = malloc_or_die(sizeof(ahtable_sorted_iter_t));
343 |     i->table = table;
344 |     i->xs = malloc_or_die(table->m * sizeof(slot_t));
345 |     i->i = 0;
346 | 
347 |     slot_t s;
348 |     size_t j, k, u;
349 |     for (j = 0, u = 0; j < table->n; ++j) {
350 |         s = table->slots[j];
351 |         while (s < table->slots[j] + table->slot_sizes[j]) {
352 |             i->xs[u++] = s;
353 |             k = keylen(s);
354 |             s += k < 128 ? 1 : 2;
355 |             s += k + sizeof(value_t);
356 |         }
357 |     }
358 | 
359 |     qsort(i->xs, table->m, sizeof(slot_t), cmpkey);
360 | 
361 |     return i;
362 | }
363 | 
364 | 
365 | static bool ahtable_sorted_iter_finished(ahtable_sorted_iter_t* i)
366 | {
367 |     return i->i >= i->table->m;
368 | }
369 | 
370 | 
371 | static void ahtable_sorted_iter_next(ahtable_sorted_iter_t* i)
372 | {
373 |     if (ahtable_sorted_iter_finished(i)) return;
374 |     ++i->i;
375 | }
376 | 
377 | 
378 | static void ahtable_sorted_iter_free(ahtable_sorted_iter_t* i)
379 | {
380 |     if (i == NULL) return;
381 |     free(i->xs);
382 |     free(i);
383 | }
384 | 
385 | 
386 | static const char* ahtable_sorted_iter_key(ahtable_sorted_iter_t* i, size_t* len)
387 | {
388 |     if (ahtable_sorted_iter_finished(i)) return NULL;
389 | 
390 |     slot_t s = i->xs[i->i];
391 |     if (len) *len = keylen(s);
392 | 
393 |     return (const char*) (s + (*len < 128 ? 1 : 2));
394 | }
395 | 
396 | 
397 | static value_t*  ahtable_sorted_iter_val(ahtable_sorted_iter_t* i)
398 | {
399 |     if (ahtable_sorted_iter_finished(i)) return NULL;
400 | 
401 |     slot_t s = i->xs[i->i];
402 |     size_t k = keylen(s);
403 | 
404 |     s += k < 128 ? 1 : 2;
405 |     s += k;
406 | 
407 |     return (value_t*) s;
408 | }
409 | 
410 | 
411 | typedef struct ahtable_unsorted_iter_t_
412 | {
413 |     const ahtable_t* table; // parent
414 |     size_t i;           // slot index
415 |     slot_t s;           // slot position
416 | } ahtable_unsorted_iter_t;
417 | 
418 | 
419 | static ahtable_unsorted_iter_t* ahtable_unsorted_iter_begin(const ahtable_t* table)
420 | {
421 |     ahtable_unsorted_iter_t* i = malloc_or_die(sizeof(ahtable_unsorted_iter_t));
422 |     i->table = table;
423 | 
424 |     for (i->i = 0; i->i < i->table->n; ++i->i) {
425 |         i->s = table->slots[i->i];
426 |         if ((size_t) (i->s - table->slots[i->i]) >= table->slot_sizes[i->i]) continue;
427 |         break;
428 |     }
429 | 
430 |     return i;
431 | }
432 | 
433 | 
434 | static bool ahtable_unsorted_iter_finished(ahtable_unsorted_iter_t* i)
435 | {
436 |     return i->i >= i->table->n;
437 | }
438 | 
439 | 
440 | static void ahtable_unsorted_iter_next(ahtable_unsorted_iter_t* i)
441 | {
442 |     if (ahtable_unsorted_iter_finished(i)) return;
443 | 
444 |     /* get the key length */
445 |     size_t k = keylen(i->s);
446 |     i->s += k < 128 ? 1 : 2;
447 | 
448 |     /* skip to the next key */
449 |     i->s += k + sizeof(value_t);
450 | 
451 |     if ((size_t) (i->s - i->table->slots[i->i]) >= i->table->slot_sizes[i->i]) {
452 |         do {
453 |             ++i->i;
454 |         } while(i->i < i->table->n &&
455 |                 i->table->slot_sizes[i->i] == 0);
456 | 
457 |         if (i->i < i->table->n) i->s = i->table->slots[i->i];
458 |         else i->s = NULL;
459 |     }
460 | }
461 | 
462 | 
463 | static void ahtable_unsorted_iter_free(ahtable_unsorted_iter_t* i)
464 | {
465 |     free(i);
466 | }
467 | 
468 | 
469 | static const char* ahtable_unsorted_iter_key(ahtable_unsorted_iter_t* i, size_t* len)
470 | {
471 |     if (ahtable_unsorted_iter_finished(i)) return NULL;
472 | 
473 |     slot_t s = i->s;
474 |     size_t k;
475 |     if (0x1 & *s) {
476 |         k = (size_t) (*((uint16_t*) s)) >> 1;
477 |         s += 2;
478 |     }
479 |     else {
480 |         k = (size_t) (*s >> 1);
481 |         s += 1;
482 |     }
483 | 
484 |     if(len) *len = k;
485 |     return (const char*) s;
486 | }
487 | 
488 | 
489 | static value_t* ahtable_unsorted_iter_val(ahtable_unsorted_iter_t* i)
490 | {
491 |     if (ahtable_unsorted_iter_finished(i)) return NULL;
492 | 
493 |     slot_t s = i->s;
494 | 
495 |     size_t k;
496 |     if (0x1 & *s) {
497 |         k = (size_t) (*((uint16_t*) s)) >> 1;
498 |         s += 2;
499 |     }
500 |     else {
501 |         k = (size_t) (*s >> 1);
502 |         s += 1;
503 |     }
504 | 
505 |     s += k;
506 |     return (value_t*) s;
507 | }
508 | 
509 | 
510 | struct ahtable_iter_t_
511 | {
512 |     bool sorted;
513 |     union {
514 |         ahtable_unsorted_iter_t* unsorted;
515 |         ahtable_sorted_iter_t* sorted;
516 |     } i;
517 | };
518 | 
519 | 
520 | ahtable_iter_t* ahtable_iter_begin(const ahtable_t* table, bool sorted) {
521 |     ahtable_iter_t* i = malloc_or_die(sizeof(ahtable_iter_t));
522 |     i->sorted = sorted;
523 |     if (sorted) i->i.sorted   = ahtable_sorted_iter_begin(table);
524 |     else        i->i.unsorted = ahtable_unsorted_iter_begin(table);
525 |     return i;
526 | }
527 | 
528 | 
529 | void ahtable_iter_next(ahtable_iter_t* i)
530 | {
531 |     if (i->sorted) ahtable_sorted_iter_next(i->i.sorted);
532 |     else           ahtable_unsorted_iter_next(i->i.unsorted);
533 | }
534 | 
535 | 
536 | bool ahtable_iter_finished(ahtable_iter_t* i)
537 | {
538 |     if (i->sorted) return ahtable_sorted_iter_finished(i->i.sorted);
539 |     else           return ahtable_unsorted_iter_finished(i->i.unsorted);
540 | }
541 | 
542 | 
543 | void ahtable_iter_free(ahtable_iter_t* i)
544 | {
545 |     if (i == NULL) return;
546 |     if (i->sorted) ahtable_sorted_iter_free(i->i.sorted);
547 |     else           ahtable_unsorted_iter_free(i->i.unsorted);
548 |     free(i);
549 | }
550 | 
551 | 
552 | const char* ahtable_iter_key(ahtable_iter_t* i, size_t* len)
553 | {
554 |     if (i->sorted) return ahtable_sorted_iter_key(i->i.sorted, len);
555 |     else           return ahtable_unsorted_iter_key(i->i.unsorted, len);
556 | }
557 | 
558 | 
559 | value_t* ahtable_iter_val(ahtable_iter_t* i)
560 | {
561 |     if (i->sorted) return ahtable_sorted_iter_val(i->i.sorted);
562 |     else           return ahtable_unsorted_iter_val(i->i.unsorted);
563 | }
564 | 
565 | 


--------------------------------------------------------------------------------
/hat-trie/src/hat-trie.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This file is part of hat-trie.
  3 |  *
  4 |  * Copyright (c) 2011 by Daniel C. Jones <dcjones@cs.washington.edu>
  5 |  *
  6 |  */
  7 | 
  8 | #include "hat-trie.h"
  9 | #include "ahtable.h"
 10 | #include "misc.h"
 11 | #include "pstdint.h"
 12 | #include <assert.h>
 13 | #include <string.h>
 14 | 
 15 | #define HT_UNUSED(x) x=x
 16 | 
 17 | /* maximum number of keys that may be stored in a bucket before it is burst */
 18 | static const size_t MAX_BUCKET_SIZE = 16384;
 19 | #define NODE_MAXCHAR 0xff // 0x7f for 7-bit ASCII
 20 | #define NODE_CHILDS (NODE_MAXCHAR+1)
 21 | 
 22 | static const uint8_t NODE_TYPE_TRIE          = 0x1;
 23 | static const uint8_t NODE_TYPE_PURE_BUCKET   = 0x2;
 24 | static const uint8_t NODE_TYPE_HYBRID_BUCKET = 0x4;
 25 | static const uint8_t NODE_HAS_VAL            = 0x8;
 26 | 
 27 | 
 28 | struct trie_node_t_;
 29 | 
 30 | /* Node's may be trie nodes or buckets. This union allows us to keep
 31 |  * non-specific pointer. */
 32 | typedef union node_ptr_
 33 | {
 34 |     ahtable_t*           b;
 35 |     struct trie_node_t_* t;
 36 |     uint8_t*             flag;
 37 | } node_ptr;
 38 | 
 39 | 
 40 | typedef struct trie_node_t_
 41 | {
 42 |     uint8_t flag;
 43 | 
 44 |     /* the value for the key that is consumed on a trie node */
 45 |     value_t val;
 46 | 
 47 |     /* Map a character to either a trie_node_t or a ahtable_t. The first byte
 48 |      * must be examined to determine which. */
 49 |     node_ptr xs[NODE_CHILDS];
 50 | 
 51 | } trie_node_t;
 52 | 
 53 | struct hattrie_t_
 54 | {
 55 |     node_ptr root; // root node
 56 |     size_t m;      // number of stored keys
 57 | };
 58 | 
 59 | 
 60 | 
 61 | size_t hattrie_size(const hattrie_t* T)
 62 | {
 63 |     return T->m;
 64 | }
 65 | 
 66 | 
 67 | static size_t node_sizeof(node_ptr node)
 68 | {
 69 |     if (*node.flag & NODE_TYPE_TRIE) {
 70 |         size_t nbytes = sizeof(trie_node_t);
 71 |         size_t i;
 72 |         nbytes += node_sizeof(node.t->xs[0]);
 73 |         for (i = 1; i < NODE_CHILDS; ++i) {
 74 |             if (node.t->xs[i].t != node.t->xs[i-1].t) nbytes += node_sizeof(node.t->xs[i]);
 75 |         }
 76 |         return nbytes;
 77 |     }
 78 |     else {
 79 |         return ahtable_sizeof(node.b);
 80 |     }
 81 | }
 82 | 
 83 | 
 84 | size_t hattrie_sizeof(const hattrie_t* T)
 85 | {
 86 |     return sizeof(hattrie_t) + node_sizeof(T->root);
 87 | }
 88 | 
 89 | 
 90 | /* Create a new trie node with all pointers pointing to the given child (which
 91 |  * can be NULL). */
 92 | static trie_node_t* alloc_trie_node(hattrie_t* T, node_ptr child)
 93 | {
 94 |     trie_node_t* node = malloc_or_die(sizeof(trie_node_t));
 95 |     node->flag = NODE_TYPE_TRIE;
 96 |     node->val  = 0;
 97 | 
 98 |     /* pass T to allow custom allocator for trie. */
 99 |     HT_UNUSED(T); /* unused now */
100 | 
101 |     size_t i;
102 |     for (i = 0; i < NODE_CHILDS; ++i) node->xs[i] = child;
103 |     return node;
104 | }
105 | 
106 | /* iterate trie nodes until string is consumed or bucket is found */
107 | static node_ptr hattrie_consume(node_ptr *p, const char **k, size_t *l, unsigned brk)
108 | {
109 |     node_ptr node = p->t->xs[(unsigned char) **k];
110 |     while (*node.flag & NODE_TYPE_TRIE && *l > brk) {
111 |         ++*k;
112 |         --*l;
113 |         *p   = node;
114 |         node = node.t->xs[(unsigned char) **k];
115 |     }
116 | 
117 |     /* copy and writeback variables if it's faster */
118 | 
119 |     assert(*p->flag & NODE_TYPE_TRIE);
120 |     return node;
121 | }
122 | 
123 | /* use node value and return pointer to it */
124 | static inline value_t* hattrie_useval(hattrie_t *T, node_ptr n)
125 | {
126 |     if (!(n.t->flag & NODE_HAS_VAL)) {
127 |         n.t->flag |= NODE_HAS_VAL;
128 |         ++T->m;
129 |     }
130 |     return &n.t->val;
131 | }
132 | 
133 | /* clear node value if exists */
134 | static inline int hattrie_clrval(hattrie_t *T, node_ptr n)
135 | {
136 |     if (n.t->flag & NODE_HAS_VAL) {
137 |         n.t->flag &= ~NODE_HAS_VAL;
138 |         n.t->val = 0;
139 |         --T->m;
140 |         return 0;
141 |     }
142 |     return -1;
143 | }
144 | 
145 | /* find node in trie */
146 | static node_ptr hattrie_find(hattrie_t* T, const char **key, size_t *len)
147 | {
148 |     node_ptr parent = T->root;
149 |     assert(*parent.flag & NODE_TYPE_TRIE);
150 | 
151 |     if (*len == 0) return parent;
152 | 
153 |     node_ptr node = hattrie_consume(&parent, key, len, 1);
154 | 
155 |     /* if the trie node consumes value, use it */
156 |     if (*node.flag & NODE_TYPE_TRIE) {
157 |         if (!(node.t->flag & NODE_HAS_VAL)) {
158 |             node.flag = NULL;
159 |         }
160 |         return node;
161 |     }
162 | 
163 |     /* pure bucket holds only key suffixes, skip current char */
164 |     if (*node.flag & NODE_TYPE_PURE_BUCKET) {
165 |         *key += 1;
166 |         *len -= 1;
167 |     }
168 | 
169 |     /* do not scan bucket, it's not needed for this operation */
170 |     return node;
171 | }
172 | 
173 | hattrie_t* hattrie_create()
174 | {
175 |     hattrie_t* T = malloc_or_die(sizeof(hattrie_t));
176 |     T->m = 0;
177 | 
178 |     node_ptr node;
179 |     node.b = ahtable_create();
180 |     node.b->flag = NODE_TYPE_HYBRID_BUCKET;
181 |     node.b->c0 = 0x00;
182 |     node.b->c1 = NODE_MAXCHAR;
183 |     T->root.t = alloc_trie_node(T, node);
184 | 
185 |     return T;
186 | }
187 | 
188 | 
189 | static void hattrie_free_node(node_ptr node)
190 | {
191 |     if (*node.flag & NODE_TYPE_TRIE) {
192 |         size_t i;
193 |         for (i = 0; i < NODE_CHILDS; ++i) {
194 |             if (i > 0 && node.t->xs[i].t == node.t->xs[i - 1].t) continue;
195 | 
196 |             /* XXX: recursion might not be the best choice here. It is possible
197 |              * to build a very deep trie. */
198 |             if (node.t->xs[i].t) hattrie_free_node(node.t->xs[i]);
199 |         }
200 |         free(node.t);
201 |     }
202 |     else {
203 |         ahtable_free(node.b);
204 |     }
205 | }
206 | 
207 | 
208 | void hattrie_free(hattrie_t* T)
209 | {
210 |     hattrie_free_node(T->root);
211 |     free(T);
212 | }
213 | 
214 | 
215 | void hattrie_clear(hattrie_t* T)
216 | {
217 |     hattrie_free_node(T->root);
218 |     node_ptr node;
219 |     node.b = ahtable_create();
220 |     node.b->flag = NODE_TYPE_HYBRID_BUCKET;
221 |     node.b->c0 = 0x00;
222 |     node.b->c1 = 0xff;
223 |     T->root.t = alloc_trie_node(T, node);
224 | }
225 | 
226 | 
227 | /* Perform one split operation on the given node with the given parent.
228 |  */
229 | static void hattrie_split(hattrie_t* T, node_ptr parent, node_ptr node)
230 | {
231 |     /* only buckets may be split */
232 |     assert(*node.flag & NODE_TYPE_PURE_BUCKET ||
233 |            *node.flag & NODE_TYPE_HYBRID_BUCKET);
234 | 
235 |     assert(*parent.flag & NODE_TYPE_TRIE);
236 | 
237 |     if (*node.flag & NODE_TYPE_PURE_BUCKET) {
238 |         /* turn the pure bucket into a hybrid bucket */
239 |         parent.t->xs[node.b->c0].t = alloc_trie_node(T, node);
240 | 
241 |         /* if the bucket had an empty key, move it to the new trie node */
242 |         value_t* val = ahtable_tryget(node.b, NULL, 0);
243 |         if (val) {
244 |             parent.t->xs[node.b->c0].t->val     = *val;
245 |             parent.t->xs[node.b->c0].t->flag |= NODE_HAS_VAL;
246 |             *val = 0;
247 |             ahtable_del(node.b, NULL, 0);
248 |         }
249 | 
250 |         node.b->c0   = 0x00;
251 |         node.b->c1   = NODE_MAXCHAR;
252 |         node.b->flag = NODE_TYPE_HYBRID_BUCKET;
253 | 
254 |         return;
255 |     }
256 | 
257 |     /* This is a hybrid bucket. Perform a proper split. */
258 | 
259 |     /* count the number of occourances of every leading character */
260 |     unsigned int cs[NODE_CHILDS]; // occurance count for leading chars
261 |     memset(cs, 0, NODE_CHILDS * sizeof(unsigned int));
262 |     size_t len;
263 |     const char* key;
264 | 
265 |     ahtable_iter_t* i = ahtable_iter_begin(node.b, false);
266 |     while (!ahtable_iter_finished(i)) {
267 |         key = ahtable_iter_key(i, &len);
268 |         assert(len > 0);
269 |         cs[(unsigned char) key[0]] += 1;
270 |         ahtable_iter_next(i);
271 |     }
272 |     ahtable_iter_free(i);
273 | 
274 |     /* choose a split point */
275 |     unsigned int left_m, right_m, all_m;
276 |     unsigned char j = node.b->c0;
277 |     all_m   = ahtable_size(node.b);
278 |     left_m  = cs[j];
279 |     right_m = all_m - left_m;
280 |     int d;
281 | 
282 |     while (j + 1 < node.b->c1) {
283 |         d = abs((int) (left_m + cs[j + 1]) - (int) (right_m - cs[j + 1]));
284 |         if (d <= abs(left_m - right_m) && left_m + cs[j + 1] < all_m) {
285 |             j += 1;
286 |             left_m  += cs[j];
287 |             right_m -= cs[j];
288 |         }
289 |         else break;
290 |     }
291 | 
292 |     /* now split into two node cooresponding to ranges [0, j] and
293 |      * [j + 1, NODE_MAXCHAR], respectively. */
294 | 
295 | 
296 |     /* create new left and right nodes */
297 | 
298 |     /* TODO: Add a special case if either node is a hybrid bucket containing all
299 |      * the keys. In such a case, do not build a new table, just use the old one.
300 |      * */
301 |     size_t num_slots;
302 | 
303 | 
304 |     for (num_slots = ahtable_initial_size;
305 |             (double) left_m > ahtable_max_load_factor * (double) num_slots;
306 |             num_slots *= 2);
307 | 
308 |     node_ptr left, right;
309 |     left.b  = ahtable_create_n(num_slots);
310 |     left.b->c0   = node.b->c0;
311 |     left.b->c1   = j;
312 |     left.b->flag = left.b->c0 == left.b->c1 ?
313 |                       NODE_TYPE_PURE_BUCKET : NODE_TYPE_HYBRID_BUCKET;
314 | 
315 | 
316 |     for (num_slots = ahtable_initial_size;
317 |             (double) right_m > ahtable_max_load_factor * (double) num_slots;
318 |             num_slots *= 2);
319 | 
320 |     right.b = ahtable_create_n(num_slots);
321 |     right.b->c0   = j + 1;
322 |     right.b->c1   = node.b->c1;
323 |     right.b->flag = right.b->c0 == right.b->c1 ?
324 |                       NODE_TYPE_PURE_BUCKET : NODE_TYPE_HYBRID_BUCKET;
325 | 
326 | 
327 |     /* update the parent's pointer */
328 | 
329 |     unsigned int c;
330 |     for (c = node.b->c0; c <= j; ++c) parent.t->xs[c] = left;
331 |     for (; c <= node.b->c1; ++c)      parent.t->xs[c] = right;
332 | 
333 | 
334 | 
335 |     /* distribute keys to the new left or right node */
336 |     value_t* u;
337 |     value_t* v;
338 |     i = ahtable_iter_begin(node.b, false);
339 |     while (!ahtable_iter_finished(i)) {
340 |         key = ahtable_iter_key(i, &len);
341 |         u   = ahtable_iter_val(i);
342 |         assert(len > 0);
343 | 
344 |         /* left */
345 |         if ((unsigned char) key[0] <= j) {
346 |             if (*left.flag & NODE_TYPE_PURE_BUCKET) {
347 |                 v = ahtable_get(left.b, key + 1, len - 1);
348 |             }
349 |             else {
350 |                 v = ahtable_get(left.b, key, len);
351 |             }
352 |             *v = *u;
353 |         }
354 | 
355 |         /* right */
356 |         else {
357 |             if (*right.flag & NODE_TYPE_PURE_BUCKET) {
358 |                 v = ahtable_get(right.b, key + 1, len - 1);
359 |             }
360 |             else {
361 |                 v = ahtable_get(right.b, key, len);
362 |             }
363 |             *v = *u;
364 |         }
365 | 
366 |         ahtable_iter_next(i);
367 |     }
368 | 
369 |     ahtable_iter_free(i);
370 |     ahtable_free(node.b);
371 | }
372 | 
373 | value_t* hattrie_get(hattrie_t* T, const char* key, size_t len)
374 | {
375 |     node_ptr parent = T->root;
376 |     assert(*parent.flag & NODE_TYPE_TRIE);
377 | 
378 |     if (len == 0) return &parent.t->val;
379 | 
380 |     /* consume all trie nodes, now parent must be trie and child anything */
381 |     node_ptr node = hattrie_consume(&parent, &key, &len, 0);
382 |     assert(*parent.flag & NODE_TYPE_TRIE);
383 | 
384 |     /* if the key has been consumed on a trie node, use its value */
385 |     if (len == 0) {
386 |         if (*node.flag & NODE_TYPE_TRIE) {
387 |             return hattrie_useval(T, node);
388 |         }
389 |         else if (*node.flag & NODE_TYPE_HYBRID_BUCKET) {
390 |             return hattrie_useval(T, parent);
391 |         }
392 |     }
393 | 
394 | 
395 |     /* preemptively split the bucket if it is full */
396 |     while (ahtable_size(node.b) >= MAX_BUCKET_SIZE) {
397 |         hattrie_split(T, parent, node);
398 | 
399 |         /* after the split, the node pointer is invalidated, so we search from
400 |          * the parent again. */
401 |         node = hattrie_consume(&parent, &key, &len, 0);
402 | 
403 |         /* if the key has been consumed on a trie node, use its value */
404 |         if (len == 0) {
405 |             if (*node.flag & NODE_TYPE_TRIE) {
406 |                 return hattrie_useval(T, node);
407 |             }
408 |             else if (*node.flag & NODE_TYPE_HYBRID_BUCKET) {
409 |                 return hattrie_useval(T, parent);
410 |             }
411 |         }
412 |     }
413 | 
414 |     assert(*node.flag & NODE_TYPE_PURE_BUCKET || *node.flag & NODE_TYPE_HYBRID_BUCKET);
415 | 
416 |     assert(len > 0);
417 |     size_t m_old = node.b->m;
418 |     value_t* val;
419 |     if (*node.flag & NODE_TYPE_PURE_BUCKET) {
420 |         val = ahtable_get(node.b, key + 1, len - 1);
421 |     }
422 |     else {
423 |         val = ahtable_get(node.b, key, len);
424 |     }
425 |     T->m += (node.b->m - m_old);
426 | 
427 |     return val;
428 | }
429 | 
430 | 
431 | value_t* hattrie_tryget(hattrie_t* T, const char* key, size_t len)
432 | {
433 |     /* find node for given key */
434 |     node_ptr node = hattrie_find(T, &key, &len);
435 |     if (node.flag == NULL) {
436 |         return NULL;
437 |     }
438 | 
439 |     /* if the trie node consumes value, use it */
440 |     if (*node.flag & NODE_TYPE_TRIE) {
441 |         return &node.t->val;
442 |     }
443 | 
444 |     return ahtable_tryget(node.b, key, len);
445 | }
446 | 
447 | 
448 | int hattrie_del(hattrie_t* T, const char* key, size_t len)
449 | {
450 |     node_ptr parent = T->root;
451 |     HT_UNUSED(parent);
452 |     assert(*parent.flag & NODE_TYPE_TRIE);
453 | 
454 |     /* find node for deletion */
455 |     node_ptr node = hattrie_find(T, &key, &len);
456 |     if (node.flag == NULL) {
457 |         return -1;
458 |     }
459 | 
460 |     /* if consumed on a trie node, clear the value */
461 |     if (*node.flag & NODE_TYPE_TRIE) {
462 |         return hattrie_clrval(T, node);
463 |     }
464 | 
465 |     /* remove from bucket */
466 |     size_t m_old = ahtable_size(node.b);
467 |     int ret =  ahtable_del(node.b, key, len);
468 |     T->m -= (m_old - ahtable_size(node.b));
469 | 
470 |     /* merge empty buckets */
471 |     /*! \todo */
472 | 
473 |     return ret;
474 | }
475 | 
476 | 
477 | /* plan for iteration:
478 |  * This is tricky, as we have no parent pointers currently, and I would like to
479 |  * avoid adding them. That means maintaining a stack
480 |  *
481 |  */
482 | 
483 | typedef struct hattrie_node_stack_t_
484 | {
485 |     unsigned char   c;
486 |     size_t level;
487 | 
488 |     node_ptr node;
489 |     struct hattrie_node_stack_t_* next;
490 | 
491 | } hattrie_node_stack_t;
492 | 
493 | 
494 | struct hattrie_iter_t_
495 | {
496 |     char* key;
497 |     size_t keysize; // space reserved for the key
498 |     size_t level;
499 | 
500 |     /* keep track of keys stored in trie nodes */
501 |     bool    has_nil_key;
502 |     value_t nil_val;
503 | 
504 |     const hattrie_t* T;
505 |     bool sorted;
506 |     ahtable_iter_t* i;
507 |     hattrie_node_stack_t* stack;
508 | };
509 | 
510 | 
511 | static void hattrie_iter_pushchar(hattrie_iter_t* i, size_t level, char c)
512 | {
513 |     if (i->keysize < level) {
514 |         i->keysize *= 2;
515 |         i->key = realloc_or_die(i->key, i->keysize * sizeof(char));
516 |     }
517 | 
518 |     if (level > 0) {
519 |         i->key[level - 1] = c;
520 |     }
521 | 
522 |     i->level = level;
523 | }
524 | 
525 | 
526 | static void hattrie_iter_nextnode(hattrie_iter_t* i)
527 | {
528 |     if (i->stack == NULL) return;
529 | 
530 |     /* pop the stack */
531 |     node_ptr node;
532 |     hattrie_node_stack_t* next;
533 |     unsigned char   c;
534 |     size_t level;
535 | 
536 |     node  = i->stack->node;
537 |     next  = i->stack->next;
538 |     c     = i->stack->c;
539 |     level = i->stack->level;
540 | 
541 |     free(i->stack);
542 |     i->stack = next;
543 | 
544 |     if (*node.flag & NODE_TYPE_TRIE) {
545 |         hattrie_iter_pushchar(i, level, c);
546 | 
547 |         if(node.t->flag & NODE_HAS_VAL) {
548 |             i->has_nil_key = true;
549 |             i->nil_val = node.t->val;
550 |         }
551 | 
552 |         /* push all child nodes from right to left */
553 |         int j;
554 |         for (j = NODE_MAXCHAR; j >= 0; --j) {
555 | 
556 |             /* skip repeated pointers to hybrid bucket */
557 |             if (j < NODE_MAXCHAR && node.t->xs[j].t == node.t->xs[j + 1].t) continue;
558 | 
559 |             // push stack
560 |             next = i->stack;
561 |             i->stack = malloc_or_die(sizeof(hattrie_node_stack_t));
562 |             i->stack->node  = node.t->xs[j];
563 |             i->stack->next  = next;
564 |             i->stack->level = level + 1;
565 |             i->stack->c     = (unsigned char) j;
566 |         }
567 |     }
568 |     else {
569 |         if (*node.flag & NODE_TYPE_PURE_BUCKET) {
570 |             hattrie_iter_pushchar(i, level, c);
571 |         }
572 |         else {
573 |             i->level = level - 1;
574 |         }
575 | 
576 |         i->i = ahtable_iter_begin(node.b, i->sorted);
577 |     }
578 | }
579 | 
580 | 
581 | hattrie_iter_t* hattrie_iter_begin(const hattrie_t* T, bool sorted)
582 | {
583 |     hattrie_iter_t* i = malloc_or_die(sizeof(hattrie_iter_t));
584 |     i->T = T;
585 |     i->sorted = sorted;
586 |     i->i = NULL;
587 |     i->keysize = 16;
588 |     i->key = malloc_or_die(i->keysize * sizeof(char));
589 |     i->level   = 0;
590 |     i->has_nil_key = false;
591 |     i->nil_val     = 0;
592 | 
593 |     i->stack = malloc_or_die(sizeof(hattrie_node_stack_t));
594 |     i->stack->next   = NULL;
595 |     i->stack->node   = T->root;
596 |     i->stack->c      = '\0';
597 |     i->stack->level  = 0;
598 | 
599 | 
600 |     while (((i->i == NULL || ahtable_iter_finished(i->i)) && !i->has_nil_key) &&
601 |            i->stack != NULL ) {
602 | 
603 |         ahtable_iter_free(i->i);
604 |         i->i = NULL;
605 |         hattrie_iter_nextnode(i);
606 |     }
607 | 
608 |     if (i->i != NULL && ahtable_iter_finished(i->i)) {
609 |         ahtable_iter_free(i->i);
610 |         i->i = NULL;
611 |     }
612 | 
613 |     return i;
614 | }
615 | 
616 | 
617 | void hattrie_iter_next(hattrie_iter_t* i)
618 | {
619 |     if (hattrie_iter_finished(i)) return;
620 | 
621 |     if (i->i != NULL && !ahtable_iter_finished(i->i)) {
622 |         ahtable_iter_next(i->i);
623 |     }
624 |     else if (i->has_nil_key) {
625 |         i->has_nil_key = false;
626 |         i->nil_val = 0;
627 |         hattrie_iter_nextnode(i);
628 |     }
629 | 
630 |     while (((i->i == NULL || ahtable_iter_finished(i->i)) && !i->has_nil_key) &&
631 |            i->stack != NULL ) {
632 | 
633 |         ahtable_iter_free(i->i);
634 |         i->i = NULL;
635 |         hattrie_iter_nextnode(i);
636 |     }
637 | 
638 |     if (i->i != NULL && ahtable_iter_finished(i->i)) {
639 |         ahtable_iter_free(i->i);
640 |         i->i = NULL;
641 |     }
642 | }
643 | 
644 | 
645 | bool hattrie_iter_finished(hattrie_iter_t* i)
646 | {
647 |     return i->stack == NULL && i->i == NULL && !i->has_nil_key;
648 | }
649 | 
650 | 
651 | void hattrie_iter_free(hattrie_iter_t* i)
652 | {
653 |     if (i == NULL) return;
654 |     if (i->i) ahtable_iter_free(i->i);
655 | 
656 |     hattrie_node_stack_t* next;
657 |     while (i->stack) {
658 |         next = i->stack->next;
659 |         free(i->stack);
660 |         i->stack = next;
661 |     }
662 | 
663 |     free(i->key);
664 |     free(i);
665 | }
666 | 
667 | 
668 | const char* hattrie_iter_key(hattrie_iter_t* i, size_t* len)
669 | {
670 |     if (hattrie_iter_finished(i)) return NULL;
671 | 
672 |     size_t sublen;
673 |     const char* subkey;
674 | 
675 |     if (i->has_nil_key) {
676 |         subkey = NULL;
677 |         sublen = 0;
678 |     }
679 |     else subkey = ahtable_iter_key(i->i, &sublen);
680 | 
681 |     if (i->keysize < i->level + sublen + 1) {
682 |         while (i->keysize < i->level + sublen + 1) i->keysize *= 2;
683 |         i->key = realloc_or_die(i->key, i->keysize * sizeof(char));
684 |     }
685 | 
686 |     memcpy(i->key + i->level, subkey, sublen);
687 |     i->key[i->level + sublen] = '\0';
688 | 
689 |     if (len) *len = i->level + sublen;
690 |     return i->key;
691 | }
692 | 
693 | 
694 | value_t* hattrie_iter_val(hattrie_iter_t* i)
695 | {
696 |     if (i->has_nil_key) return &i->nil_val;
697 | 
698 |     if (hattrie_iter_finished(i)) return NULL;
699 | 
700 |     return ahtable_iter_val(i->i);
701 | }
702 | 
703 | 
704 | 
705 | bool hattrie_iter_equal(const hattrie_iter_t* a,
706 |                         const hattrie_iter_t* b)
707 | {
708 |     return a->T      == b->T &&
709 |            a->sorted == b->sorted &&
710 |            a->i      == b->i;
711 | }
712 | 


--------------------------------------------------------------------------------
/hat-trie/src/pstdint.h:
--------------------------------------------------------------------------------
  1 | /*  A portable stdint.h
  2 |  ****************************************************************************
  3 |  *  BSD License:
  4 |  ****************************************************************************
  5 |  *
  6 |  *  Copyright (c) 2005-2014 Paul Hsieh
  7 |  *  All rights reserved.
  8 |  *
  9 |  *  Redistribution and use in source and binary forms, with or without
 10 |  *  modification, are permitted provided that the following conditions
 11 |  *  are met:
 12 |  *
 13 |  *  1. Redistributions of source code must retain the above copyright
 14 |  *     notice, this list of conditions and the following disclaimer.
 15 |  *  2. Redistributions in binary form must reproduce the above copyright
 16 |  *     notice, this list of conditions and the following disclaimer in the
 17 |  *     documentation and/or other materials provided with the distribution.
 18 |  *  3. The name of the author may not be used to endorse or promote products
 19 |  *     derived from this software without specific prior written permission.
 20 |  *
 21 |  *  THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 22 |  *  IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 23 |  *  OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 24 |  *  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 25 |  *  INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 26 |  *  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 27 |  *  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 28 |  *  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 29 |  *  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 30 |  *  THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 31 |  *
 32 |  ****************************************************************************
 33 |  *
 34 |  *  Version 0.1.14
 35 |  *
 36 |  *  The ANSI C standard committee, for the C99 standard, specified the
 37 |  *  inclusion of a new standard include file called stdint.h.  This is
 38 |  *  a very useful and long desired include file which contains several
 39 |  *  very precise definitions for integer scalar types that is
 40 |  *  critically important for making portable several classes of
 41 |  *  applications including cryptography, hashing, variable length
 42 |  *  integer libraries and so on.  But for most developers its likely
 43 |  *  useful just for programming sanity.
 44 |  *
 45 |  *  The problem is that most compiler vendors have decided not to
 46 |  *  implement the C99 standard, and the next C++ language standard
 47 |  *  (which has a lot more mindshare these days) will be a long time in
 48 |  *  coming and its unknown whether or not it will include stdint.h or
 49 |  *  how much adoption it will have.  Either way, it will be a long time
 50 |  *  before all compilers come with a stdint.h and it also does nothing
 51 |  *  for the extremely large number of compilers available today which
 52 |  *  do not include this file, or anything comparable to it.
 53 |  *
 54 |  *  So that's what this file is all about.  Its an attempt to build a
 55 |  *  single universal include file that works on as many platforms as
 56 |  *  possible to deliver what stdint.h is supposed to.  A few things
 57 |  *  that should be noted about this file:
 58 |  *
 59 |  *    1) It is not guaranteed to be portable and/or present an identical
 60 |  *       interface on all platforms.  The extreme variability of the
 61 |  *       ANSI C standard makes this an impossibility right from the
 62 |  *       very get go. Its really only meant to be useful for the vast
 63 |  *       majority of platforms that possess the capability of
 64 |  *       implementing usefully and precisely defined, standard sized
 65 |  *       integer scalars.  Systems which are not intrinsically 2s
 66 |  *       complement may produce invalid constants.
 67 |  *
 68 |  *    2) There is an unavoidable use of non-reserved symbols.
 69 |  *
 70 |  *    3) Other standard include files are invoked.
 71 |  *
 72 |  *    4) This file may come in conflict with future platforms that do
 73 |  *       include stdint.h.  The hope is that one or the other can be
 74 |  *       used with no real difference.
 75 |  *
 76 |  *    5) In the current verison, if your platform can't represent
 77 |  *       int32_t, int16_t and int8_t, it just dumps out with a compiler
 78 |  *       error.
 79 |  *
 80 |  *    6) 64 bit integers may or may not be defined.  Test for their
 81 |  *       presence with the test: #ifdef INT64_MAX or #ifdef UINT64_MAX.
 82 |  *       Note that this is different from the C99 specification which
 83 |  *       requires the existence of 64 bit support in the compiler.  If
 84 |  *       this is not defined for your platform, yet it is capable of
 85 |  *       dealing with 64 bits then it is because this file has not yet
 86 |  *       been extended to cover all of your system's capabilities.
 87 |  *
 88 |  *    7) (u)intptr_t may or may not be defined.  Test for its presence
 89 |  *       with the test: #ifdef PTRDIFF_MAX.  If this is not defined
 90 |  *       for your platform, then it is because this file has not yet
 91 |  *       been extended to cover all of your system's capabilities, not
 92 |  *       because its optional.
 93 |  *
 94 |  *    8) The following might not been defined even if your platform is
 95 |  *       capable of defining it:
 96 |  *
 97 |  *       WCHAR_MIN
 98 |  *       WCHAR_MAX
 99 |  *       (u)int64_t
100 |  *       PTRDIFF_MIN
101 |  *       PTRDIFF_MAX
102 |  *       (u)intptr_t
103 |  *
104 |  *    9) The following have not been defined:
105 |  *
106 |  *       WINT_MIN
107 |  *       WINT_MAX
108 |  *
109 |  *   10) The criteria for defining (u)int_least(*)_t isn't clear,
110 |  *       except for systems which don't have a type that precisely
111 |  *       defined 8, 16, or 32 bit types (which this include file does
112 |  *       not support anyways). Default definitions have been given.
113 |  *
114 |  *   11) The criteria for defining (u)int_fast(*)_t isn't something I
115 |  *       would trust to any particular compiler vendor or the ANSI C
116 |  *       committee.  It is well known that "compatible systems" are
117 |  *       commonly created that have very different performance
118 |  *       characteristics from the systems they are compatible with,
119 |  *       especially those whose vendors make both the compiler and the
120 |  *       system.  Default definitions have been given, but its strongly
121 |  *       recommended that users never use these definitions for any
122 |  *       reason (they do *NOT* deliver any serious guarantee of
123 |  *       improved performance -- not in this file, nor any vendor's
124 |  *       stdint.h).
125 |  *
126 |  *   12) The following macros:
127 |  *
128 |  *       PRINTF_INTMAX_MODIFIER
129 |  *       PRINTF_INT64_MODIFIER
130 |  *       PRINTF_INT32_MODIFIER
131 |  *       PRINTF_INT16_MODIFIER
132 |  *       PRINTF_LEAST64_MODIFIER
133 |  *       PRINTF_LEAST32_MODIFIER
134 |  *       PRINTF_LEAST16_MODIFIER
135 |  *       PRINTF_INTPTR_MODIFIER
136 |  *
137 |  *       are strings which have been defined as the modifiers required
138 |  *       for the "d", "u" and "x" printf formats to correctly output
139 |  *       (u)intmax_t, (u)int64_t, (u)int32_t, (u)int16_t, (u)least64_t,
140 |  *       (u)least32_t, (u)least16_t and (u)intptr_t types respectively.
141 |  *       PRINTF_INTPTR_MODIFIER is not defined for some systems which
142 |  *       provide their own stdint.h.  PRINTF_INT64_MODIFIER is not
143 |  *       defined if INT64_MAX is not defined.  These are an extension
144 |  *       beyond what C99 specifies must be in stdint.h.
145 |  *
146 |  *       In addition, the following macros are defined:
147 |  *
148 |  *       PRINTF_INTMAX_HEX_WIDTH
149 |  *       PRINTF_INT64_HEX_WIDTH
150 |  *       PRINTF_INT32_HEX_WIDTH
151 |  *       PRINTF_INT16_HEX_WIDTH
152 |  *       PRINTF_INT8_HEX_WIDTH
153 |  *       PRINTF_INTMAX_DEC_WIDTH
154 |  *       PRINTF_INT64_DEC_WIDTH
155 |  *       PRINTF_INT32_DEC_WIDTH
156 |  *       PRINTF_INT16_DEC_WIDTH
157 |  *       PRINTF_INT8_DEC_WIDTH
158 |  *
159 |  *       Which specifies the maximum number of characters required to
160 |  *       print the number of that type in either hexadecimal or decimal.
161 |  *       These are an extension beyond what C99 specifies must be in
162 |  *       stdint.h.
163 |  *
164 |  *  Compilers tested (all with 0 warnings at their highest respective
165 |  *  settings): Borland Turbo C 2.0, WATCOM C/C++ 11.0 (16 bits and 32
166 |  *  bits), Microsoft Visual C++ 6.0 (32 bit), Microsoft Visual Studio
167 |  *  .net (VC7), Intel C++ 4.0, GNU gcc v3.3.3
168 |  *
169 |  *  This file should be considered a work in progress.  Suggestions for
170 |  *  improvements, especially those which increase coverage are strongly
171 |  *  encouraged.
172 |  *
173 |  *  Acknowledgements
174 |  *
175 |  *  The following people have made significant contributions to the
176 |  *  development and testing of this file:
177 |  *
178 |  *  Chris Howie
179 |  *  John Steele Scott
180 |  *  Dave Thorup
181 |  *  John Dill
182 |  *  Florian Wobbe
183 |  *  Christopher Sean Morrison
184 |  *
185 |  */
186 | 
187 | #include <stddef.h>
188 | #include <limits.h>
189 | #include <signal.h>
190 | 
191 | /*
192 |  *  For gcc with _STDINT_H, fill in the PRINTF_INT*_MODIFIER macros, and
193 |  *  do nothing else.  On the Mac OS X version of gcc this is _STDINT_H_.
194 |  */
195 | 
196 | #if ((defined(__STDC__) && __STDC__ && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || (defined (__WATCOMC__) && (defined (_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (__GNUC__ > 3 || defined(_STDINT_H) || defined(_STDINT_H_) || defined (__UINT_FAST64_TYPE__)) )) && !defined (_PSTDINT_H_INCLUDED)
197 | #include <stdint.h>
198 | #define _PSTDINT_H_INCLUDED
199 | # if defined(__GNUC__) && (defined(__x86_64__) || defined(__ppc64__))
200 | #  ifndef PRINTF_INT64_MODIFIER
201 | #   define PRINTF_INT64_MODIFIER "l"
202 | #  endif
203 | #  ifndef PRINTF_INT32_MODIFIER
204 | #   define PRINTF_INT32_MODIFIER ""
205 | #  endif
206 | # else
207 | #  ifndef PRINTF_INT64_MODIFIER
208 | #   define PRINTF_INT64_MODIFIER "ll"
209 | #  endif
210 | #  ifndef PRINTF_INT32_MODIFIER
211 | #   define PRINTF_INT32_MODIFIER "l"
212 | #  endif
213 | # endif
214 | # ifndef PRINTF_INT16_MODIFIER
215 | #  define PRINTF_INT16_MODIFIER "h"
216 | # endif
217 | # ifndef PRINTF_INTMAX_MODIFIER
218 | #  define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER
219 | # endif
220 | # ifndef PRINTF_INT64_HEX_WIDTH
221 | #  define PRINTF_INT64_HEX_WIDTH "16"
222 | # endif
223 | # ifndef PRINTF_INT32_HEX_WIDTH
224 | #  define PRINTF_INT32_HEX_WIDTH "8"
225 | # endif
226 | # ifndef PRINTF_INT16_HEX_WIDTH
227 | #  define PRINTF_INT16_HEX_WIDTH "4"
228 | # endif
229 | # ifndef PRINTF_INT8_HEX_WIDTH
230 | #  define PRINTF_INT8_HEX_WIDTH "2"
231 | # endif
232 | # ifndef PRINTF_INT64_DEC_WIDTH
233 | #  define PRINTF_INT64_DEC_WIDTH "20"
234 | # endif
235 | # ifndef PRINTF_INT32_DEC_WIDTH
236 | #  define PRINTF_INT32_DEC_WIDTH "10"
237 | # endif
238 | # ifndef PRINTF_INT16_DEC_WIDTH
239 | #  define PRINTF_INT16_DEC_WIDTH "5"
240 | # endif
241 | # ifndef PRINTF_INT8_DEC_WIDTH
242 | #  define PRINTF_INT8_DEC_WIDTH "3"
243 | # endif
244 | # ifndef PRINTF_INTMAX_HEX_WIDTH
245 | #  define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH
246 | # endif
247 | # ifndef PRINTF_INTMAX_DEC_WIDTH
248 | #  define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH
249 | # endif
250 | 
251 | /*
252 |  *  Something really weird is going on with Open Watcom.  Just pull some of
253 |  *  these duplicated definitions from Open Watcom's stdint.h file for now.
254 |  */
255 | 
256 | # if defined (__WATCOMC__) && __WATCOMC__ >= 1250
257 | #  if !defined (INT64_C)
258 | #   define INT64_C(x)   (x + (INT64_MAX - INT64_MAX))
259 | #  endif
260 | #  if !defined (UINT64_C)
261 | #   define UINT64_C(x)  (x + (UINT64_MAX - UINT64_MAX))
262 | #  endif
263 | #  if !defined (INT32_C)
264 | #   define INT32_C(x)   (x + (INT32_MAX - INT32_MAX))
265 | #  endif
266 | #  if !defined (UINT32_C)
267 | #   define UINT32_C(x)  (x + (UINT32_MAX - UINT32_MAX))
268 | #  endif
269 | #  if !defined (INT16_C)
270 | #   define INT16_C(x)   (x)
271 | #  endif
272 | #  if !defined (UINT16_C)
273 | #   define UINT16_C(x)  (x)
274 | #  endif
275 | #  if !defined (INT8_C)
276 | #   define INT8_C(x)   (x)
277 | #  endif
278 | #  if !defined (UINT8_C)
279 | #   define UINT8_C(x)  (x)
280 | #  endif
281 | #  if !defined (UINT64_MAX)
282 | #   define UINT64_MAX  18446744073709551615ULL
283 | #  endif
284 | #  if !defined (INT64_MAX)
285 | #   define INT64_MAX  9223372036854775807LL
286 | #  endif
287 | #  if !defined (UINT32_MAX)
288 | #   define UINT32_MAX  4294967295UL
289 | #  endif
290 | #  if !defined (INT32_MAX)
291 | #   define INT32_MAX  2147483647L
292 | #  endif
293 | #  if !defined (INTMAX_MAX)
294 | #   define INTMAX_MAX INT64_MAX
295 | #  endif
296 | #  if !defined (INTMAX_MIN)
297 | #   define INTMAX_MIN INT64_MIN
298 | #  endif
299 | # endif
300 | #endif
301 | 
302 | #ifndef _PSTDINT_H_INCLUDED
303 | #define _PSTDINT_H_INCLUDED
304 | 
305 | #ifndef SIZE_MAX
306 | # define SIZE_MAX (~(size_t)0)
307 | #endif
308 | 
309 | /*
310 |  *  Deduce the type assignments from limits.h under the assumption that
311 |  *  integer sizes in bits are powers of 2, and follow the ANSI
312 |  *  definitions.
313 |  */
314 | 
315 | #ifndef UINT8_MAX
316 | # define UINT8_MAX 0xff
317 | #endif
318 | #if !defined(uint8_t) && !defined(_UINT8_T)
319 | # if (UCHAR_MAX == UINT8_MAX) || defined (S_SPLINT_S)
320 |     typedef unsigned char uint8_t;
321 | #   define UINT8_C(v) ((uint8_t) v)
322 | # else
323 | #   error "Platform not supported"
324 | # endif
325 | #endif
326 | 
327 | #ifndef INT8_MAX
328 | # define INT8_MAX 0x7f
329 | #endif
330 | #ifndef INT8_MIN
331 | # define INT8_MIN INT8_C(0x80)
332 | #endif
333 | #if !defined(int8_t) && !defined(_INT8_T)
334 | # if (SCHAR_MAX == INT8_MAX) || defined (S_SPLINT_S)
335 |     typedef signed char int8_t;
336 | #   define INT8_C(v) ((int8_t) v)
337 | # else
338 | #   error "Platform not supported"
339 | # endif
340 | #endif
341 | 
342 | #ifndef UINT16_MAX
343 | # define UINT16_MAX 0xffff
344 | #endif
345 | #if !defined(uint16_t) && !defined(_UINT16_T)
346 | #if (UINT_MAX == UINT16_MAX) || defined (S_SPLINT_S)
347 |   typedef unsigned int uint16_t;
348 | # ifndef PRINTF_INT16_MODIFIER
349 | #  define PRINTF_INT16_MODIFIER ""
350 | # endif
351 | # define UINT16_C(v) ((uint16_t) (v))
352 | #elif (USHRT_MAX == UINT16_MAX)
353 |   typedef unsigned short uint16_t;
354 | # define UINT16_C(v) ((uint16_t) (v))
355 | # ifndef PRINTF_INT16_MODIFIER
356 | #  define PRINTF_INT16_MODIFIER "h"
357 | # endif
358 | #else
359 | #error "Platform not supported"
360 | #endif
361 | #endif
362 | 
363 | #ifndef INT16_MAX
364 | # define INT16_MAX 0x7fff
365 | #endif
366 | #ifndef INT16_MIN
367 | # define INT16_MIN INT16_C(0x8000)
368 | #endif
369 | #if !defined(int16_t) && !defined(_INT16_T)
370 | #if (INT_MAX == INT16_MAX) || defined (S_SPLINT_S)
371 |   typedef signed int int16_t;
372 | # define INT16_C(v) ((int16_t) (v))
373 | # ifndef PRINTF_INT16_MODIFIER
374 | #  define PRINTF_INT16_MODIFIER ""
375 | # endif
376 | #elif (SHRT_MAX == INT16_MAX)
377 |   typedef signed short int16_t;
378 | # define INT16_C(v) ((int16_t) (v))
379 | # ifndef PRINTF_INT16_MODIFIER
380 | #  define PRINTF_INT16_MODIFIER "h"
381 | # endif
382 | #else
383 | #error "Platform not supported"
384 | #endif
385 | #endif
386 | 
387 | #ifndef UINT32_MAX
388 | # define UINT32_MAX (0xffffffffUL)
389 | #endif
390 | #if !defined(uint32_t) && !defined(_UINT32_T)
391 | #if (ULONG_MAX == UINT32_MAX) || defined (S_SPLINT_S)
392 |   typedef unsigned long uint32_t;
393 | # define UINT32_C(v) v ## UL
394 | # ifndef PRINTF_INT32_MODIFIER
395 | #  define PRINTF_INT32_MODIFIER "l"
396 | # endif
397 | #elif (UINT_MAX == UINT32_MAX)
398 |   typedef unsigned int uint32_t;
399 | # ifndef PRINTF_INT32_MODIFIER
400 | #  define PRINTF_INT32_MODIFIER ""
401 | # endif
402 | # define UINT32_C(v) v ## U
403 | #elif (USHRT_MAX == UINT32_MAX)
404 |   typedef unsigned short uint32_t;
405 | # define UINT32_C(v) ((unsigned short) (v))
406 | # ifndef PRINTF_INT32_MODIFIER
407 | #  define PRINTF_INT32_MODIFIER ""
408 | # endif
409 | #else
410 | #error "Platform not supported"
411 | #endif
412 | #endif
413 | 
414 | #ifndef INT32_MAX
415 | # define INT32_MAX (0x7fffffffL)
416 | #endif
417 | #ifndef INT32_MIN
418 | # define INT32_MIN INT32_C(0x80000000)
419 | #endif
420 | #if !defined(int32_t) && !defined(_INT32_T)
421 | #if (LONG_MAX == INT32_MAX) || defined (S_SPLINT_S)
422 |   typedef signed long int32_t;
423 | # define INT32_C(v) v ## L
424 | # ifndef PRINTF_INT32_MODIFIER
425 | #  define PRINTF_INT32_MODIFIER "l"
426 | # endif
427 | #elif (INT_MAX == INT32_MAX)
428 |   typedef signed int int32_t;
429 | # define INT32_C(v) v
430 | # ifndef PRINTF_INT32_MODIFIER
431 | #  define PRINTF_INT32_MODIFIER ""
432 | # endif
433 | #elif (SHRT_MAX == INT32_MAX)
434 |   typedef signed short int32_t;
435 | # define INT32_C(v) ((short) (v))
436 | # ifndef PRINTF_INT32_MODIFIER
437 | #  define PRINTF_INT32_MODIFIER ""
438 | # endif
439 | #else
440 | #error "Platform not supported"
441 | #endif
442 | #endif
443 | 
444 | /*
445 |  *  The macro stdint_int64_defined is temporarily used to record
446 |  *  whether or not 64 integer support is available.  It must be
447 |  *  defined for any 64 integer extensions for new platforms that are
448 |  *  added.
449 |  */
450 | 
451 | #undef stdint_int64_defined
452 | #if (defined(__STDC__) && defined(__STDC_VERSION__)) || defined (S_SPLINT_S)
453 | # if (__STDC__ && __STDC_VERSION__ >= 199901L) || defined (S_SPLINT_S)
454 | #  define stdint_int64_defined
455 |    typedef long long int64_t;
456 |    typedef unsigned long long uint64_t;
457 | #  define UINT64_C(v) v ## ULL
458 | #  define  INT64_C(v) v ## LL
459 | #  ifndef PRINTF_INT64_MODIFIER
460 | #   define PRINTF_INT64_MODIFIER "ll"
461 | #  endif
462 | # endif
463 | #endif
464 | 
465 | #if !defined (stdint_int64_defined)
466 | # if defined(__GNUC__)
467 | #  define stdint_int64_defined
468 |    __extension__ typedef long long int64_t;
469 |    __extension__ typedef unsigned long long uint64_t;
470 | #  define UINT64_C(v) v ## ULL
471 | #  define  INT64_C(v) v ## LL
472 | #  ifndef PRINTF_INT64_MODIFIER
473 | #   define PRINTF_INT64_MODIFIER "ll"
474 | #  endif
475 | # elif defined(__MWERKS__) || defined (__SUNPRO_C) || defined (__SUNPRO_CC) || defined (__APPLE_CC__) || defined (_LONG_LONG) || defined (_CRAYC) || defined (S_SPLINT_S)
476 | #  define stdint_int64_defined
477 |    typedef long long int64_t;
478 |    typedef unsigned long long uint64_t;
479 | #  define UINT64_C(v) v ## ULL
480 | #  define  INT64_C(v) v ## LL
481 | #  ifndef PRINTF_INT64_MODIFIER
482 | #   define PRINTF_INT64_MODIFIER "ll"
483 | #  endif
484 | # elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined (__BORLANDC__) && __BORLANDC__ > 0x460) || defined (__alpha) || defined (__DECC)
485 | #  define stdint_int64_defined
486 |    typedef __int64 int64_t;
487 |    typedef unsigned __int64 uint64_t;
488 | #  define UINT64_C(v) v ## UI64
489 | #  define  INT64_C(v) v ## I64
490 | #  ifndef PRINTF_INT64_MODIFIER
491 | #   define PRINTF_INT64_MODIFIER "I64"
492 | #  endif
493 | # endif
494 | #endif
495 | 
496 | #if !defined (LONG_LONG_MAX) && defined (INT64_C)
497 | # define LONG_LONG_MAX INT64_C (9223372036854775807)
498 | #endif
499 | #ifndef ULONG_LONG_MAX
500 | # define ULONG_LONG_MAX UINT64_C (18446744073709551615)
501 | #endif
502 | 
503 | #if !defined (INT64_MAX) && defined (INT64_C)
504 | # define INT64_MAX INT64_C (9223372036854775807)
505 | #endif
506 | #if !defined (INT64_MIN) && defined (INT64_C)
507 | # define INT64_MIN INT64_C (-9223372036854775808)
508 | #endif
509 | #if !defined (UINT64_MAX) && defined (INT64_C)
510 | # define UINT64_MAX UINT64_C (18446744073709551615)
511 | #endif
512 | 
513 | /*
514 |  *  Width of hexadecimal for number field.
515 |  */
516 | 
517 | #ifndef PRINTF_INT64_HEX_WIDTH
518 | # define PRINTF_INT64_HEX_WIDTH "16"
519 | #endif
520 | #ifndef PRINTF_INT32_HEX_WIDTH
521 | # define PRINTF_INT32_HEX_WIDTH "8"
522 | #endif
523 | #ifndef PRINTF_INT16_HEX_WIDTH
524 | # define PRINTF_INT16_HEX_WIDTH "4"
525 | #endif
526 | #ifndef PRINTF_INT8_HEX_WIDTH
527 | # define PRINTF_INT8_HEX_WIDTH "2"
528 | #endif
529 | 
530 | #ifndef PRINTF_INT64_DEC_WIDTH
531 | # define PRINTF_INT64_DEC_WIDTH "20"
532 | #endif
533 | #ifndef PRINTF_INT32_DEC_WIDTH
534 | # define PRINTF_INT32_DEC_WIDTH "10"
535 | #endif
536 | #ifndef PRINTF_INT16_DEC_WIDTH
537 | # define PRINTF_INT16_DEC_WIDTH "5"
538 | #endif
539 | #ifndef PRINTF_INT8_DEC_WIDTH
540 | # define PRINTF_INT8_DEC_WIDTH "3"
541 | #endif
542 | 
543 | /*
544 |  *  Ok, lets not worry about 128 bit integers for now.  Moore's law says
545 |  *  we don't need to worry about that until about 2040 at which point
546 |  *  we'll have bigger things to worry about.
547 |  */
548 | 
549 | #ifdef stdint_int64_defined
550 |   typedef int64_t intmax_t;
551 |   typedef uint64_t uintmax_t;
552 | # define  INTMAX_MAX   INT64_MAX
553 | # define  INTMAX_MIN   INT64_MIN
554 | # define UINTMAX_MAX  UINT64_MAX
555 | # define UINTMAX_C(v) UINT64_C(v)
556 | # define  INTMAX_C(v)  INT64_C(v)
557 | # ifndef PRINTF_INTMAX_MODIFIER
558 | #   define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER
559 | # endif
560 | # ifndef PRINTF_INTMAX_HEX_WIDTH
561 | #  define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH
562 | # endif
563 | # ifndef PRINTF_INTMAX_DEC_WIDTH
564 | #  define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH
565 | # endif
566 | #else
567 |   typedef int32_t intmax_t;
568 |   typedef uint32_t uintmax_t;
569 | # define  INTMAX_MAX   INT32_MAX
570 | # define UINTMAX_MAX  UINT32_MAX
571 | # define UINTMAX_C(v) UINT32_C(v)
572 | # define  INTMAX_C(v)  INT32_C(v)
573 | # ifndef PRINTF_INTMAX_MODIFIER
574 | #   define PRINTF_INTMAX_MODIFIER PRINTF_INT32_MODIFIER
575 | # endif
576 | # ifndef PRINTF_INTMAX_HEX_WIDTH
577 | #  define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT32_HEX_WIDTH
578 | # endif
579 | # ifndef PRINTF_INTMAX_DEC_WIDTH
580 | #  define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT32_DEC_WIDTH
581 | # endif
582 | #endif
583 | 
584 | /*
585 |  *  Because this file currently only supports platforms which have
586 |  *  precise powers of 2 as bit sizes for the default integers, the
587 |  *  least definitions are all trivial.  Its possible that a future
588 |  *  version of this file could have different definitions.
589 |  */
590 | 
591 | #ifndef stdint_least_defined
592 |   typedef   int8_t   int_least8_t;
593 |   typedef  uint8_t  uint_least8_t;
594 |   typedef  int16_t  int_least16_t;
595 |   typedef uint16_t uint_least16_t;
596 |   typedef  int32_t  int_least32_t;
597 |   typedef uint32_t uint_least32_t;
598 | # define PRINTF_LEAST32_MODIFIER PRINTF_INT32_MODIFIER
599 | # define PRINTF_LEAST16_MODIFIER PRINTF_INT16_MODIFIER
600 | # define  UINT_LEAST8_MAX  UINT8_MAX
601 | # define   INT_LEAST8_MAX   INT8_MAX
602 | # define UINT_LEAST16_MAX UINT16_MAX
603 | # define  INT_LEAST16_MAX  INT16_MAX
604 | # define UINT_LEAST32_MAX UINT32_MAX
605 | # define  INT_LEAST32_MAX  INT32_MAX
606 | # define   INT_LEAST8_MIN   INT8_MIN
607 | # define  INT_LEAST16_MIN  INT16_MIN
608 | # define  INT_LEAST32_MIN  INT32_MIN
609 | # ifdef stdint_int64_defined
610 |     typedef  int64_t  int_least64_t;
611 |     typedef uint64_t uint_least64_t;
612 | #   define PRINTF_LEAST64_MODIFIER PRINTF_INT64_MODIFIER
613 | #   define UINT_LEAST64_MAX UINT64_MAX
614 | #   define  INT_LEAST64_MAX  INT64_MAX
615 | #   define  INT_LEAST64_MIN  INT64_MIN
616 | # endif
617 | #endif
618 | #undef stdint_least_defined
619 | 
620 | /*
621 |  *  The ANSI C committee pretending to know or specify anything about
622 |  *  performance is the epitome of misguided arrogance.  The mandate of
623 |  *  this file is to *ONLY* ever support that absolute minimum
624 |  *  definition of the fast integer types, for compatibility purposes.
625 |  *  No extensions, and no attempt to suggest what may or may not be a
626 |  *  faster integer type will ever be made in this file.  Developers are
627 |  *  warned to stay away from these types when using this or any other
628 |  *  stdint.h.
629 |  */
630 | 
631 | typedef   int_least8_t   int_fast8_t;
632 | typedef  uint_least8_t  uint_fast8_t;
633 | typedef  int_least16_t  int_fast16_t;
634 | typedef uint_least16_t uint_fast16_t;
635 | typedef  int_least32_t  int_fast32_t;
636 | typedef uint_least32_t uint_fast32_t;
637 | #define  UINT_FAST8_MAX  UINT_LEAST8_MAX
638 | #define   INT_FAST8_MAX   INT_LEAST8_MAX
639 | #define UINT_FAST16_MAX UINT_LEAST16_MAX
640 | #define  INT_FAST16_MAX  INT_LEAST16_MAX
641 | #define UINT_FAST32_MAX UINT_LEAST32_MAX
642 | #define  INT_FAST32_MAX  INT_LEAST32_MAX
643 | #define   INT_FAST8_MIN   INT_LEAST8_MIN
644 | #define  INT_FAST16_MIN  INT_LEAST16_MIN
645 | #define  INT_FAST32_MIN  INT_LEAST32_MIN
646 | #ifdef stdint_int64_defined
647 |   typedef  int_least64_t  int_fast64_t;
648 |   typedef uint_least64_t uint_fast64_t;
649 | # define UINT_FAST64_MAX UINT_LEAST64_MAX
650 | # define  INT_FAST64_MAX  INT_LEAST64_MAX
651 | # define  INT_FAST64_MIN  INT_LEAST64_MIN
652 | #endif
653 | 
654 | #undef stdint_int64_defined
655 | 
656 | /*
657 |  *  Whatever piecemeal, per compiler thing we can do about the wchar_t
658 |  *  type limits.
659 |  */
660 | 
661 | #if defined(__WATCOMC__) || defined(_MSC_VER) || defined (__GNUC__)
662 | # include <wchar.h>
663 | # ifndef WCHAR_MIN
664 | #  define WCHAR_MIN 0
665 | # endif
666 | # ifndef WCHAR_MAX
667 | #  define WCHAR_MAX ((wchar_t)-1)
668 | # endif
669 | #endif
670 | 
671 | /*
672 |  *  Whatever piecemeal, per compiler/platform thing we can do about the
673 |  *  (u)intptr_t types and limits.
674 |  */
675 | 
676 | #if (defined (_MSC_VER) && defined (_UINTPTR_T_DEFINED)) || defined (_UINTPTR_T)
677 | # define STDINT_H_UINTPTR_T_DEFINED
678 | #endif
679 | 
680 | #ifndef STDINT_H_UINTPTR_T_DEFINED
681 | # if defined (__alpha__) || defined (__ia64__) || defined (__x86_64__) || defined (_WIN64) || defined (__ppc64__)
682 | #  define stdint_intptr_bits 64
683 | # elif defined (__WATCOMC__) || defined (__TURBOC__)
684 | #  if defined(__TINY__) || defined(__SMALL__) || defined(__MEDIUM__)
685 | #    define stdint_intptr_bits 16
686 | #  else
687 | #    define stdint_intptr_bits 32
688 | #  endif
689 | # elif defined (__i386__) || defined (_WIN32) || defined (WIN32) || defined (__ppc64__)
690 | #  define stdint_intptr_bits 32
691 | # elif defined (__INTEL_COMPILER)
692 | /* TODO -- what did Intel do about x86-64? */
693 | # else
694 | /* #error "This platform might not be supported yet" */
695 | # endif
696 | 
697 | # ifdef stdint_intptr_bits
698 | #  define stdint_intptr_glue3_i(a,b,c)  a##b##c
699 | #  define stdint_intptr_glue3(a,b,c)    stdint_intptr_glue3_i(a,b,c)
700 | #  ifndef PRINTF_INTPTR_MODIFIER
701 | #    define PRINTF_INTPTR_MODIFIER      stdint_intptr_glue3(PRINTF_INT,stdint_intptr_bits,_MODIFIER)
702 | #  endif
703 | #  ifndef PTRDIFF_MAX
704 | #    define PTRDIFF_MAX                 stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX)
705 | #  endif
706 | #  ifndef PTRDIFF_MIN
707 | #    define PTRDIFF_MIN                 stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN)
708 | #  endif
709 | #  ifndef UINTPTR_MAX
710 | #    define UINTPTR_MAX                 stdint_intptr_glue3(UINT,stdint_intptr_bits,_MAX)
711 | #  endif
712 | #  ifndef INTPTR_MAX
713 | #    define INTPTR_MAX                  stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX)
714 | #  endif
715 | #  ifndef INTPTR_MIN
716 | #    define INTPTR_MIN                  stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN)
717 | #  endif
718 | #  ifndef INTPTR_C
719 | #    define INTPTR_C(x)                 stdint_intptr_glue3(INT,stdint_intptr_bits,_C)(x)
720 | #  endif
721 | #  ifndef UINTPTR_C
722 | #    define UINTPTR_C(x)                stdint_intptr_glue3(UINT,stdint_intptr_bits,_C)(x)
723 | #  endif
724 |   typedef stdint_intptr_glue3(uint,stdint_intptr_bits,_t) uintptr_t;
725 |   typedef stdint_intptr_glue3( int,stdint_intptr_bits,_t)  intptr_t;
726 | # else
727 | /* TODO -- This following is likely wrong for some platforms, and does
728 |    nothing for the definition of uintptr_t. */
729 |   typedef ptrdiff_t intptr_t;
730 | # endif
731 | # define STDINT_H_UINTPTR_T_DEFINED
732 | #endif
733 | 
734 | /*
735 |  *  Assumes sig_atomic_t is signed and we have a 2s complement machine.
736 |  */
737 | 
738 | #ifndef SIG_ATOMIC_MAX
739 | # define SIG_ATOMIC_MAX ((((sig_atomic_t) 1) << (sizeof (sig_atomic_t)*CHAR_BIT-1)) - 1)
740 | #endif
741 | 
742 | #endif
743 | 
744 | #if defined (__TEST_PSTDINT_FOR_CORRECTNESS)
745 | 
746 | /*
747 |  *  Please compile with the maximum warning settings to make sure macros are not
748 |  *  defined more than once.
749 |  */
750 | 
751 | #include <stdlib.h>
752 | #include <stdio.h>
753 | #include <string.h>
754 | 
755 | #define glue3_aux(x,y,z) x ## y ## z
756 | #define glue3(x,y,z) glue3_aux(x,y,z)
757 | 
758 | #define DECLU(bits) glue3(uint,bits,_t) glue3(u,bits,) = glue3(UINT,bits,_C) (0);
759 | #define DECLI(bits) glue3(int,bits,_t) glue3(i,bits,) = glue3(INT,bits,_C) (0);
760 | 
761 | #define DECL(us,bits) glue3(DECL,us,) (bits)
762 | 
763 | #define TESTUMAX(bits) glue3(u,bits,) = ~glue3(u,bits,); if (glue3(UINT,bits,_MAX) != glue3(u,bits,)) printf ("Something wrong with UINT%d_MAX\n", bits)
764 | 
765 | int main () {
766 | 	DECL(I,8)
767 | 	DECL(U,8)
768 | 	DECL(I,16)
769 | 	DECL(U,16)
770 | 	DECL(I,32)
771 | 	DECL(U,32)
772 | #ifdef INT64_MAX
773 | 	DECL(I,64)
774 | 	DECL(U,64)
775 | #endif
776 | 	intmax_t imax = INTMAX_C(0);
777 | 	uintmax_t umax = UINTMAX_C(0);
778 | 	char str0[256], str1[256];
779 | 
780 | 	sprintf (str0, "%d %x\n", 0, ~0);
781 | 
782 | 	sprintf (str1, "%d %x\n",  i8, ~0);
783 | 	if (0 != strcmp (str0, str1)) printf ("Something wrong with i8 : %s\n", str1);
784 | 	sprintf (str1, "%u %x\n",  u8, ~0);
785 | 	if (0 != strcmp (str0, str1)) printf ("Something wrong with u8 : %s\n", str1);
786 | 	sprintf (str1, "%d %x\n",  i16, ~0);
787 | 	if (0 != strcmp (str0, str1)) printf ("Something wrong with i16 : %s\n", str1);
788 | 	sprintf (str1, "%u %x\n",  u16, ~0);
789 | 	if (0 != strcmp (str0, str1)) printf ("Something wrong with u16 : %s\n", str1);
790 | 	sprintf (str1, "%" PRINTF_INT32_MODIFIER "d %x\n",  i32, ~0);
791 | 	if (0 != strcmp (str0, str1)) printf ("Something wrong with i32 : %s\n", str1);
792 | 	sprintf (str1, "%" PRINTF_INT32_MODIFIER "u %x\n",  u32, ~0);
793 | 	if (0 != strcmp (str0, str1)) printf ("Something wrong with u32 : %s\n", str1);
794 | #ifdef INT64_MAX
795 | 	sprintf (str1, "%" PRINTF_INT64_MODIFIER "d %x\n",  i64, ~0);
796 | 	if (0 != strcmp (str0, str1)) printf ("Something wrong with i64 : %s\n", str1);
797 | #endif
798 | 	sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "d %x\n",  imax, ~0);
799 | 	if (0 != strcmp (str0, str1)) printf ("Something wrong with imax : %s\n", str1);
800 | 	sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "u %x\n",  umax, ~0);
801 | 	if (0 != strcmp (str0, str1)) printf ("Something wrong with umax : %s\n", str1);
802 | 
803 | 	TESTUMAX(8);
804 | 	TESTUMAX(16);
805 | 	TESTUMAX(32);
806 | #ifdef INT64_MAX
807 | 	TESTUMAX(64);
808 | #endif
809 | 
810 | 	return EXIT_SUCCESS;
811 | }
812 | 
813 | #endif
814 | 


--------------------------------------------------------------------------------
/src/chat_trie.c:
--------------------------------------------------------------------------------
   1 | /* Generated by Cython 0.23.4 */
   2 | 
   3 | #define PY_SSIZE_T_CLEAN
   4 | #include "Python.h"
   5 | #ifndef Py_PYTHON_H
   6 |     #error Python headers needed to compile C extensions, please install development version of Python.
   7 | #elif PY_VERSION_HEX < 0x02060000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03020000)
   8 |     #error Cython requires Python 2.6+ or Python 3.2+.
   9 | #else
  10 | #define CYTHON_ABI "0_23_4"
  11 | #include <stddef.h>
  12 | #ifndef offsetof
  13 | #define offsetof(type, member) ( (size_t) & ((type*)0) -> member )
  14 | #endif
  15 | #if !defined(WIN32) && !defined(MS_WINDOWS)
  16 |   #ifndef __stdcall
  17 |     #define __stdcall
  18 |   #endif
  19 |   #ifndef __cdecl
  20 |     #define __cdecl
  21 |   #endif
  22 |   #ifndef __fastcall
  23 |     #define __fastcall
  24 |   #endif
  25 | #endif
  26 | #ifndef DL_IMPORT
  27 |   #define DL_IMPORT(t) t
  28 | #endif
  29 | #ifndef DL_EXPORT
  30 |   #define DL_EXPORT(t) t
  31 | #endif
  32 | #ifndef PY_LONG_LONG
  33 |   #define PY_LONG_LONG LONG_LONG
  34 | #endif
  35 | #ifndef Py_HUGE_VAL
  36 |   #define Py_HUGE_VAL HUGE_VAL
  37 | #endif
  38 | #ifdef PYPY_VERSION
  39 | #define CYTHON_COMPILING_IN_PYPY 1
  40 | #define CYTHON_COMPILING_IN_CPYTHON 0
  41 | #else
  42 | #define CYTHON_COMPILING_IN_PYPY 0
  43 | #define CYTHON_COMPILING_IN_CPYTHON 1
  44 | #endif
  45 | #if !defined(CYTHON_USE_PYLONG_INTERNALS) && CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x02070000
  46 | #define CYTHON_USE_PYLONG_INTERNALS 1
  47 | #endif
  48 | #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX < 0x02070600 && !defined(Py_OptimizeFlag)
  49 | #define Py_OptimizeFlag 0
  50 | #endif
  51 | #define __PYX_BUILD_PY_SSIZE_T "n"
  52 | #define CYTHON_FORMAT_SSIZE_T "z"
  53 | #if PY_MAJOR_VERSION < 3
  54 |   #define __Pyx_BUILTIN_MODULE_NAME "__builtin__"
  55 |   #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\
  56 |           PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
  57 |   #define __Pyx_DefaultClassType PyClass_Type
  58 | #else
  59 |   #define __Pyx_BUILTIN_MODULE_NAME "builtins"
  60 |   #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\
  61 |           PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)
  62 |   #define __Pyx_DefaultClassType PyType_Type
  63 | #endif
  64 | #ifndef Py_TPFLAGS_CHECKTYPES
  65 |   #define Py_TPFLAGS_CHECKTYPES 0
  66 | #endif
  67 | #ifndef Py_TPFLAGS_HAVE_INDEX
  68 |   #define Py_TPFLAGS_HAVE_INDEX 0
  69 | #endif
  70 | #ifndef Py_TPFLAGS_HAVE_NEWBUFFER
  71 |   #define Py_TPFLAGS_HAVE_NEWBUFFER 0
  72 | #endif
  73 | #ifndef Py_TPFLAGS_HAVE_FINALIZE
  74 |   #define Py_TPFLAGS_HAVE_FINALIZE 0
  75 | #endif
  76 | #if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND)
  77 |   #define CYTHON_PEP393_ENABLED 1
  78 |   #define __Pyx_PyUnicode_READY(op)       (likely(PyUnicode_IS_READY(op)) ?\
  79 |                                               0 : _PyUnicode_Ready((PyObject *)(op)))
  80 |   #define __Pyx_PyUnicode_GET_LENGTH(u)   PyUnicode_GET_LENGTH(u)
  81 |   #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i)
  82 |   #define __Pyx_PyUnicode_KIND(u)         PyUnicode_KIND(u)
  83 |   #define __Pyx_PyUnicode_DATA(u)         PyUnicode_DATA(u)
  84 |   #define __Pyx_PyUnicode_READ(k, d, i)   PyUnicode_READ(k, d, i)
  85 | #else
  86 |   #define CYTHON_PEP393_ENABLED 0
  87 |   #define __Pyx_PyUnicode_READY(op)       (0)
  88 |   #define __Pyx_PyUnicode_GET_LENGTH(u)   PyUnicode_GET_SIZE(u)
  89 |   #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i]))
  90 |   #define __Pyx_PyUnicode_KIND(u)         (sizeof(Py_UNICODE))
  91 |   #define __Pyx_PyUnicode_DATA(u)         ((void*)PyUnicode_AS_UNICODE(u))
  92 |   #define __Pyx_PyUnicode_READ(k, d, i)   ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i]))
  93 | #endif
  94 | #if CYTHON_COMPILING_IN_PYPY
  95 |   #define __Pyx_PyUnicode_Concat(a, b)      PyNumber_Add(a, b)
  96 |   #define __Pyx_PyUnicode_ConcatSafe(a, b)  PyNumber_Add(a, b)
  97 | #else
  98 |   #define __Pyx_PyUnicode_Concat(a, b)      PyUnicode_Concat(a, b)
  99 |   #define __Pyx_PyUnicode_ConcatSafe(a, b)  ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ?\
 100 |       PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b))
 101 | #endif
 102 | #if CYTHON_COMPILING_IN_PYPY && !defined(PyUnicode_Contains)
 103 |   #define PyUnicode_Contains(u, s)  PySequence_Contains(u, s)
 104 | #endif
 105 | #define __Pyx_PyString_FormatSafe(a, b)   ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b))
 106 | #define __Pyx_PyUnicode_FormatSafe(a, b)  ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b))
 107 | #if PY_MAJOR_VERSION >= 3
 108 |   #define __Pyx_PyString_Format(a, b)  PyUnicode_Format(a, b)
 109 | #else
 110 |   #define __Pyx_PyString_Format(a, b)  PyString_Format(a, b)
 111 | #endif
 112 | #if PY_MAJOR_VERSION >= 3
 113 |   #define PyBaseString_Type            PyUnicode_Type
 114 |   #define PyStringObject               PyUnicodeObject
 115 |   #define PyString_Type                PyUnicode_Type
 116 |   #define PyString_Check               PyUnicode_Check
 117 |   #define PyString_CheckExact          PyUnicode_CheckExact
 118 | #endif
 119 | #if PY_MAJOR_VERSION >= 3
 120 |   #define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj)
 121 |   #define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj)
 122 | #else
 123 |   #define __Pyx_PyBaseString_Check(obj) (PyString_Check(obj) || PyUnicode_Check(obj))
 124 |   #define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj))
 125 | #endif
 126 | #ifndef PySet_CheckExact
 127 |   #define PySet_CheckExact(obj)        (Py_TYPE(obj) == &PySet_Type)
 128 | #endif
 129 | #define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type)
 130 | #if PY_MAJOR_VERSION >= 3
 131 |   #define PyIntObject                  PyLongObject
 132 |   #define PyInt_Type                   PyLong_Type
 133 |   #define PyInt_Check(op)              PyLong_Check(op)
 134 |   #define PyInt_CheckExact(op)         PyLong_CheckExact(op)
 135 |   #define PyInt_FromString             PyLong_FromString
 136 |   #define PyInt_FromUnicode            PyLong_FromUnicode
 137 |   #define PyInt_FromLong               PyLong_FromLong
 138 |   #define PyInt_FromSize_t             PyLong_FromSize_t
 139 |   #define PyInt_FromSsize_t            PyLong_FromSsize_t
 140 |   #define PyInt_AsLong                 PyLong_AsLong
 141 |   #define PyInt_AS_LONG                PyLong_AS_LONG
 142 |   #define PyInt_AsSsize_t              PyLong_AsSsize_t
 143 |   #define PyInt_AsUnsignedLongMask     PyLong_AsUnsignedLongMask
 144 |   #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask
 145 |   #define PyNumber_Int                 PyNumber_Long
 146 | #endif
 147 | #if PY_MAJOR_VERSION >= 3
 148 |   #define PyBoolObject                 PyLongObject
 149 | #endif
 150 | #if PY_MAJOR_VERSION >= 3 && CYTHON_COMPILING_IN_PYPY
 151 |   #ifndef PyUnicode_InternFromString
 152 |     #define PyUnicode_InternFromString(s) PyUnicode_FromString(s)
 153 |   #endif
 154 | #endif
 155 | #if PY_VERSION_HEX < 0x030200A4
 156 |   typedef long Py_hash_t;
 157 |   #define __Pyx_PyInt_FromHash_t PyInt_FromLong
 158 |   #define __Pyx_PyInt_AsHash_t   PyInt_AsLong
 159 | #else
 160 |   #define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t
 161 |   #define __Pyx_PyInt_AsHash_t   PyInt_AsSsize_t
 162 | #endif
 163 | #if PY_MAJOR_VERSION >= 3
 164 |   #define __Pyx_PyMethod_New(func, self, klass) ((self) ? PyMethod_New(func, self) : PyInstanceMethod_New(func))
 165 | #else
 166 |   #define __Pyx_PyMethod_New(func, self, klass) PyMethod_New(func, self, klass)
 167 | #endif
 168 | #if PY_VERSION_HEX >= 0x030500B1
 169 | #define __Pyx_PyAsyncMethodsStruct PyAsyncMethods
 170 | #define __Pyx_PyType_AsAsync(obj) (Py_TYPE(obj)->tp_as_async)
 171 | #elif CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3
 172 | typedef struct {
 173 |     unaryfunc am_await;
 174 |     unaryfunc am_aiter;
 175 |     unaryfunc am_anext;
 176 | } __Pyx_PyAsyncMethodsStruct;
 177 | #define __Pyx_PyType_AsAsync(obj) ((__Pyx_PyAsyncMethodsStruct*) (Py_TYPE(obj)->tp_reserved))
 178 | #else
 179 | #define __Pyx_PyType_AsAsync(obj) NULL
 180 | #endif
 181 | #ifndef CYTHON_RESTRICT
 182 |   #if defined(__GNUC__)
 183 |     #define CYTHON_RESTRICT __restrict__
 184 |   #elif defined(_MSC_VER) && _MSC_VER >= 1400
 185 |     #define CYTHON_RESTRICT __restrict
 186 |   #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
 187 |     #define CYTHON_RESTRICT restrict
 188 |   #else
 189 |     #define CYTHON_RESTRICT
 190 |   #endif
 191 | #endif
 192 | #define __Pyx_void_to_None(void_result) ((void)(void_result), Py_INCREF(Py_None), Py_None)
 193 | 
 194 | #ifndef CYTHON_INLINE
 195 |   #if defined(__GNUC__)
 196 |     #define CYTHON_INLINE __inline__
 197 |   #elif defined(_MSC_VER)
 198 |     #define CYTHON_INLINE __inline
 199 |   #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
 200 |     #define CYTHON_INLINE inline
 201 |   #else
 202 |     #define CYTHON_INLINE
 203 |   #endif
 204 | #endif
 205 | 
 206 | #if defined(WIN32) || defined(MS_WINDOWS)
 207 |   #define _USE_MATH_DEFINES
 208 | #endif
 209 | #include <math.h>
 210 | #ifdef NAN
 211 | #define __PYX_NAN() ((float) NAN)
 212 | #else
 213 | static CYTHON_INLINE float __PYX_NAN() {
 214 |   float value;
 215 |   memset(&value, 0xFF, sizeof(value));
 216 |   return value;
 217 | }
 218 | #endif
 219 | 
 220 | 
 221 | #if PY_MAJOR_VERSION >= 3
 222 |   #define __Pyx_PyNumber_Divide(x,y)         PyNumber_TrueDivide(x,y)
 223 |   #define __Pyx_PyNumber_InPlaceDivide(x,y)  PyNumber_InPlaceTrueDivide(x,y)
 224 | #else
 225 |   #define __Pyx_PyNumber_Divide(x,y)         PyNumber_Divide(x,y)
 226 |   #define __Pyx_PyNumber_InPlaceDivide(x,y)  PyNumber_InPlaceDivide(x,y)
 227 | #endif
 228 | 
 229 | #ifndef __PYX_EXTERN_C
 230 |   #ifdef __cplusplus
 231 |     #define __PYX_EXTERN_C extern "C"
 232 |   #else
 233 |     #define __PYX_EXTERN_C extern
 234 |   #endif
 235 | #endif
 236 | 
 237 | #define __PYX_HAVE__chat_trie
 238 | #define __PYX_HAVE_API__chat_trie
 239 | #include "../hat-trie/src/hat-trie.h"
 240 | #ifdef _OPENMP
 241 | #include <omp.h>
 242 | #endif /* _OPENMP */
 243 | 
 244 | #ifdef PYREX_WITHOUT_ASSERTIONS
 245 | #define CYTHON_WITHOUT_ASSERTIONS
 246 | #endif
 247 | 
 248 | #ifndef CYTHON_UNUSED
 249 | # if defined(__GNUC__)
 250 | #   if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
 251 | #     define CYTHON_UNUSED __attribute__ ((__unused__))
 252 | #   else
 253 | #     define CYTHON_UNUSED
 254 | #   endif
 255 | # elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER))
 256 | #   define CYTHON_UNUSED __attribute__ ((__unused__))
 257 | # else
 258 | #   define CYTHON_UNUSED
 259 | # endif
 260 | #endif
 261 | #ifndef CYTHON_NCP_UNUSED
 262 | # if CYTHON_COMPILING_IN_CPYTHON
 263 | #  define CYTHON_NCP_UNUSED
 264 | # else
 265 | #  define CYTHON_NCP_UNUSED CYTHON_UNUSED
 266 | # endif
 267 | #endif
 268 | typedef struct {PyObject **p; char *s; const Py_ssize_t n; const char* encoding;
 269 |                 const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry;
 270 | 
 271 | #define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 0
 272 | #define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 0
 273 | #define __PYX_DEFAULT_STRING_ENCODING ""
 274 | #define __Pyx_PyObject_FromString __Pyx_PyBytes_FromString
 275 | #define __Pyx_PyObject_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
 276 | #define __Pyx_uchar_cast(c) ((unsigned char)c)
 277 | #define __Pyx_long_cast(x) ((long)x)
 278 | #define __Pyx_fits_Py_ssize_t(v, type, is_signed)  (\
 279 |     (sizeof(type) < sizeof(Py_ssize_t))  ||\
 280 |     (sizeof(type) > sizeof(Py_ssize_t) &&\
 281 |           likely(v < (type)PY_SSIZE_T_MAX ||\
 282 |                  v == (type)PY_SSIZE_T_MAX)  &&\
 283 |           (!is_signed || likely(v > (type)PY_SSIZE_T_MIN ||\
 284 |                                 v == (type)PY_SSIZE_T_MIN)))  ||\
 285 |     (sizeof(type) == sizeof(Py_ssize_t) &&\
 286 |           (is_signed || likely(v < (type)PY_SSIZE_T_MAX ||\
 287 |                                v == (type)PY_SSIZE_T_MAX)))  )
 288 | #if defined (__cplusplus) && __cplusplus >= 201103L
 289 |     #include <cstdlib>
 290 |     #define __Pyx_sst_abs(value) std::abs(value)
 291 | #elif SIZEOF_INT >= SIZEOF_SIZE_T
 292 |     #define __Pyx_sst_abs(value) abs(value)
 293 | #elif SIZEOF_LONG >= SIZEOF_SIZE_T
 294 |     #define __Pyx_sst_abs(value) labs(value)
 295 | #elif defined (_MSC_VER) && defined (_M_X64)
 296 |     #define __Pyx_sst_abs(value) _abs64(value)
 297 | #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L
 298 |     #define __Pyx_sst_abs(value) llabs(value)
 299 | #elif defined (__GNUC__)
 300 |     #define __Pyx_sst_abs(value) __builtin_llabs(value)
 301 | #else
 302 |     #define __Pyx_sst_abs(value) ((value<0) ? -value : value)
 303 | #endif
 304 | static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject*);
 305 | static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length);
 306 | #define __Pyx_PyByteArray_FromString(s) PyByteArray_FromStringAndSize((const char*)s, strlen((const char*)s))
 307 | #define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l)
 308 | #define __Pyx_PyBytes_FromString        PyBytes_FromString
 309 | #define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize
 310 | static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char*);
 311 | #if PY_MAJOR_VERSION < 3
 312 |     #define __Pyx_PyStr_FromString        __Pyx_PyBytes_FromString
 313 |     #define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize
 314 | #else
 315 |     #define __Pyx_PyStr_FromString        __Pyx_PyUnicode_FromString
 316 |     #define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize
 317 | #endif
 318 | #define __Pyx_PyObject_AsSString(s)    ((signed char*) __Pyx_PyObject_AsString(s))
 319 | #define __Pyx_PyObject_AsUString(s)    ((unsigned char*) __Pyx_PyObject_AsString(s))
 320 | #define __Pyx_PyObject_FromCString(s)  __Pyx_PyObject_FromString((const char*)s)
 321 | #define __Pyx_PyBytes_FromCString(s)   __Pyx_PyBytes_FromString((const char*)s)
 322 | #define __Pyx_PyByteArray_FromCString(s)   __Pyx_PyByteArray_FromString((const char*)s)
 323 | #define __Pyx_PyStr_FromCString(s)     __Pyx_PyStr_FromString((const char*)s)
 324 | #define __Pyx_PyUnicode_FromCString(s) __Pyx_PyUnicode_FromString((const char*)s)
 325 | #if PY_MAJOR_VERSION < 3
 326 | static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen(const Py_UNICODE *u)
 327 | {
 328 |     const Py_UNICODE *u_end = u;
 329 |     while (*u_end++) ;
 330 |     return (size_t)(u_end - u - 1);
 331 | }
 332 | #else
 333 | #define __Pyx_Py_UNICODE_strlen Py_UNICODE_strlen
 334 | #endif
 335 | #define __Pyx_PyUnicode_FromUnicode(u)       PyUnicode_FromUnicode(u, __Pyx_Py_UNICODE_strlen(u))
 336 | #define __Pyx_PyUnicode_FromUnicodeAndLength PyUnicode_FromUnicode
 337 | #define __Pyx_PyUnicode_AsUnicode            PyUnicode_AsUnicode
 338 | #define __Pyx_NewRef(obj) (Py_INCREF(obj), obj)
 339 | #define __Pyx_Owned_Py_None(b) __Pyx_NewRef(Py_None)
 340 | #define __Pyx_PyBool_FromLong(b) ((b) ? __Pyx_NewRef(Py_True) : __Pyx_NewRef(Py_False))
 341 | static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*);
 342 | static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x);
 343 | static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*);
 344 | static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t);
 345 | #if CYTHON_COMPILING_IN_CPYTHON
 346 | #define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x))
 347 | #else
 348 | #define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x)
 349 | #endif
 350 | #define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x))
 351 | #if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
 352 | static int __Pyx_sys_getdefaultencoding_not_ascii;
 353 | static int __Pyx_init_sys_getdefaultencoding_params(void) {
 354 |     PyObject* sys;
 355 |     PyObject* default_encoding = NULL;
 356 |     PyObject* ascii_chars_u = NULL;
 357 |     PyObject* ascii_chars_b = NULL;
 358 |     const char* default_encoding_c;
 359 |     sys = PyImport_ImportModule("sys");
 360 |     if (!sys) goto bad;
 361 |     default_encoding = PyObject_CallMethod(sys, (char*) "getdefaultencoding", NULL);
 362 |     Py_DECREF(sys);
 363 |     if (!default_encoding) goto bad;
 364 |     default_encoding_c = PyBytes_AsString(default_encoding);
 365 |     if (!default_encoding_c) goto bad;
 366 |     if (strcmp(default_encoding_c, "ascii") == 0) {
 367 |         __Pyx_sys_getdefaultencoding_not_ascii = 0;
 368 |     } else {
 369 |         char ascii_chars[128];
 370 |         int c;
 371 |         for (c = 0; c < 128; c++) {
 372 |             ascii_chars[c] = c;
 373 |         }
 374 |         __Pyx_sys_getdefaultencoding_not_ascii = 1;
 375 |         ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL);
 376 |         if (!ascii_chars_u) goto bad;
 377 |         ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL);
 378 |         if (!ascii_chars_b || !PyBytes_Check(ascii_chars_b) || memcmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) {
 379 |             PyErr_Format(
 380 |                 PyExc_ValueError,
 381 |                 "This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.",
 382 |                 default_encoding_c);
 383 |             goto bad;
 384 |         }
 385 |         Py_DECREF(ascii_chars_u);
 386 |         Py_DECREF(ascii_chars_b);
 387 |     }
 388 |     Py_DECREF(default_encoding);
 389 |     return 0;
 390 | bad:
 391 |     Py_XDECREF(default_encoding);
 392 |     Py_XDECREF(ascii_chars_u);
 393 |     Py_XDECREF(ascii_chars_b);
 394 |     return -1;
 395 | }
 396 | #endif
 397 | #if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3
 398 | #define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL)
 399 | #else
 400 | #define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL)
 401 | #if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT
 402 | static char* __PYX_DEFAULT_STRING_ENCODING;
 403 | static int __Pyx_init_sys_getdefaultencoding_params(void) {
 404 |     PyObject* sys;
 405 |     PyObject* default_encoding = NULL;
 406 |     char* default_encoding_c;
 407 |     sys = PyImport_ImportModule("sys");
 408 |     if (!sys) goto bad;
 409 |     default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL);
 410 |     Py_DECREF(sys);
 411 |     if (!default_encoding) goto bad;
 412 |     default_encoding_c = PyBytes_AsString(default_encoding);
 413 |     if (!default_encoding_c) goto bad;
 414 |     __PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c));
 415 |     if (!__PYX_DEFAULT_STRING_ENCODING) goto bad;
 416 |     strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c);
 417 |     Py_DECREF(default_encoding);
 418 |     return 0;
 419 | bad:
 420 |     Py_XDECREF(default_encoding);
 421 |     return -1;
 422 | }
 423 | #endif
 424 | #endif
 425 | 
 426 | 
 427 | /* Test for GCC > 2.95 */
 428 | #if defined(__GNUC__)     && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)))
 429 |   #define likely(x)   __builtin_expect(!!(x), 1)
 430 |   #define unlikely(x) __builtin_expect(!!(x), 0)
 431 | #else /* !__GNUC__ or GCC < 2.95 */
 432 |   #define likely(x)   (x)
 433 |   #define unlikely(x) (x)
 434 | #endif /* __GNUC__ */
 435 | 
 436 | static PyObject *__pyx_m;
 437 | static PyObject *__pyx_d;
 438 | static PyObject *__pyx_b;
 439 | static PyObject *__pyx_empty_tuple;
 440 | static PyObject *__pyx_empty_bytes;
 441 | static int __pyx_lineno;
 442 | static int __pyx_clineno = 0;
 443 | static const char * __pyx_cfilenm= __FILE__;
 444 | static const char *__pyx_filename;
 445 | 
 446 | 
 447 | static const char *__pyx_f[] = {
 448 |   "src/chat_trie.pxd",
 449 | };
 450 | 
 451 | /*--- Type declarations ---*/
 452 | struct __pyx_t_9chat_trie_hattrie_t_;
 453 | 
 454 | /* "chat_trie.pxd":36
 455 |  *     value_t*        hattrie_iter_val       (hattrie_iter_t*)
 456 |  * 
 457 |  * cdef struct hattrie_t_:             # <<<<<<<<<<<<<<
 458 |  *     void* root
 459 |  *     size_t m      # number of stored keys
 460 |  */
 461 | struct __pyx_t_9chat_trie_hattrie_t_ {
 462 |   void *root;
 463 |   size_t m;
 464 | };
 465 | 
 466 | /* --- Runtime support code (head) --- */
 467 | #ifndef CYTHON_REFNANNY
 468 |   #define CYTHON_REFNANNY 0
 469 | #endif
 470 | #if CYTHON_REFNANNY
 471 |   typedef struct {
 472 |     void (*INCREF)(void*, PyObject*, int);
 473 |     void (*DECREF)(void*, PyObject*, int);
 474 |     void (*GOTREF)(void*, PyObject*, int);
 475 |     void (*GIVEREF)(void*, PyObject*, int);
 476 |     void* (*SetupContext)(const char*, int, const char*);
 477 |     void (*FinishContext)(void**);
 478 |   } __Pyx_RefNannyAPIStruct;
 479 |   static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL;
 480 |   static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname);
 481 |   #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL;
 482 | #ifdef WITH_THREAD
 483 |   #define __Pyx_RefNannySetupContext(name, acquire_gil)\
 484 |           if (acquire_gil) {\
 485 |               PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\
 486 |               __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__);\
 487 |               PyGILState_Release(__pyx_gilstate_save);\
 488 |           } else {\
 489 |               __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__);\
 490 |           }
 491 | #else
 492 |   #define __Pyx_RefNannySetupContext(name, acquire_gil)\
 493 |           __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__)
 494 | #endif
 495 |   #define __Pyx_RefNannyFinishContext()\
 496 |           __Pyx_RefNanny->FinishContext(&__pyx_refnanny)
 497 |   #define __Pyx_INCREF(r)  __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
 498 |   #define __Pyx_DECREF(r)  __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
 499 |   #define __Pyx_GOTREF(r)  __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
 500 |   #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), __LINE__)
 501 |   #define __Pyx_XINCREF(r)  do { if((r) != NULL) {__Pyx_INCREF(r); }} while(0)
 502 |   #define __Pyx_XDECREF(r)  do { if((r) != NULL) {__Pyx_DECREF(r); }} while(0)
 503 |   #define __Pyx_XGOTREF(r)  do { if((r) != NULL) {__Pyx_GOTREF(r); }} while(0)
 504 |   #define __Pyx_XGIVEREF(r) do { if((r) != NULL) {__Pyx_GIVEREF(r);}} while(0)
 505 | #else
 506 |   #define __Pyx_RefNannyDeclarations
 507 |   #define __Pyx_RefNannySetupContext(name, acquire_gil)
 508 |   #define __Pyx_RefNannyFinishContext()
 509 |   #define __Pyx_INCREF(r) Py_INCREF(r)
 510 |   #define __Pyx_DECREF(r) Py_DECREF(r)
 511 |   #define __Pyx_GOTREF(r)
 512 |   #define __Pyx_GIVEREF(r)
 513 |   #define __Pyx_XINCREF(r) Py_XINCREF(r)
 514 |   #define __Pyx_XDECREF(r) Py_XDECREF(r)
 515 |   #define __Pyx_XGOTREF(r)
 516 |   #define __Pyx_XGIVEREF(r)
 517 | #endif
 518 | #define __Pyx_XDECREF_SET(r, v) do {\
 519 |         PyObject *tmp = (PyObject *) r;\
 520 |         r = v; __Pyx_XDECREF(tmp);\
 521 |     } while (0)
 522 | #define __Pyx_DECREF_SET(r, v) do {\
 523 |         PyObject *tmp = (PyObject *) r;\
 524 |         r = v; __Pyx_DECREF(tmp);\
 525 |     } while (0)
 526 | #define __Pyx_CLEAR(r)    do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0)
 527 | #define __Pyx_XCLEAR(r)   do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0)
 528 | 
 529 | typedef struct {
 530 |     int code_line;
 531 |     PyCodeObject* code_object;
 532 | } __Pyx_CodeObjectCacheEntry;
 533 | struct __Pyx_CodeObjectCache {
 534 |     int count;
 535 |     int max_count;
 536 |     __Pyx_CodeObjectCacheEntry* entries;
 537 | };
 538 | static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL};
 539 | static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line);
 540 | static PyCodeObject *__pyx_find_code_object(int code_line);
 541 | static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object);
 542 | 
 543 | static void __Pyx_AddTraceback(const char *funcname, int c_line,
 544 |                                int py_line, const char *filename);
 545 | 
 546 | static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value);
 547 | 
 548 | static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *);
 549 | 
 550 | static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *);
 551 | 
 552 | static int __Pyx_check_binary_version(void);
 553 | 
 554 | static int __Pyx_InitStrings(__Pyx_StringTabEntry *t);
 555 | 
 556 | 
 557 | /* Module declarations from 'chat_trie' */
 558 | #define __Pyx_MODULE_NAME "chat_trie"
 559 | int __pyx_module_is_main_chat_trie = 0;
 560 | 
 561 | /* Implementation of 'chat_trie' */
 562 | static char __pyx_k_main[] = "__main__";
 563 | static char __pyx_k_test[] = "__test__";
 564 | static PyObject *__pyx_n_s_main;
 565 | static PyObject *__pyx_n_s_test;
 566 | 
 567 | static PyMethodDef __pyx_methods[] = {
 568 |   {0, 0, 0, 0}
 569 | };
 570 | 
 571 | #if PY_MAJOR_VERSION >= 3
 572 | static struct PyModuleDef __pyx_moduledef = {
 573 |   #if PY_VERSION_HEX < 0x03020000
 574 |     { PyObject_HEAD_INIT(NULL) NULL, 0, NULL },
 575 |   #else
 576 |     PyModuleDef_HEAD_INIT,
 577 |   #endif
 578 |     "chat_trie",
 579 |     0, /* m_doc */
 580 |     -1, /* m_size */
 581 |     __pyx_methods /* m_methods */,
 582 |     NULL, /* m_reload */
 583 |     NULL, /* m_traverse */
 584 |     NULL, /* m_clear */
 585 |     NULL /* m_free */
 586 | };
 587 | #endif
 588 | 
 589 | static __Pyx_StringTabEntry __pyx_string_tab[] = {
 590 |   {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1},
 591 |   {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1},
 592 |   {0, 0, 0, 0, 0, 0, 0}
 593 | };
 594 | static int __Pyx_InitCachedBuiltins(void) {
 595 |   return 0;
 596 | }
 597 | 
 598 | static int __Pyx_InitCachedConstants(void) {
 599 |   __Pyx_RefNannyDeclarations
 600 |   __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0);
 601 |   __Pyx_RefNannyFinishContext();
 602 |   return 0;
 603 | }
 604 | 
 605 | static int __Pyx_InitGlobals(void) {
 606 |   if (__Pyx_InitStrings(__pyx_string_tab) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
 607 |   return 0;
 608 |   __pyx_L1_error:;
 609 |   return -1;
 610 | }
 611 | 
 612 | #if PY_MAJOR_VERSION < 3
 613 | PyMODINIT_FUNC initchat_trie(void); /*proto*/
 614 | PyMODINIT_FUNC initchat_trie(void)
 615 | #else
 616 | PyMODINIT_FUNC PyInit_chat_trie(void); /*proto*/
 617 | PyMODINIT_FUNC PyInit_chat_trie(void)
 618 | #endif
 619 | {
 620 |   PyObject *__pyx_t_1 = NULL;
 621 |   int __pyx_lineno = 0;
 622 |   const char *__pyx_filename = NULL;
 623 |   int __pyx_clineno = 0;
 624 |   __Pyx_RefNannyDeclarations
 625 |   #if CYTHON_REFNANNY
 626 |   __Pyx_RefNanny = __Pyx_RefNannyImportAPI("refnanny");
 627 |   if (!__Pyx_RefNanny) {
 628 |       PyErr_Clear();
 629 |       __Pyx_RefNanny = __Pyx_RefNannyImportAPI("Cython.Runtime.refnanny");
 630 |       if (!__Pyx_RefNanny)
 631 |           Py_FatalError("failed to import 'refnanny' module");
 632 |   }
 633 |   #endif
 634 |   __Pyx_RefNannySetupContext("PyMODINIT_FUNC PyInit_chat_trie(void)", 0);
 635 |   if (__Pyx_check_binary_version() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 636 |   __pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 637 |   __pyx_empty_bytes = PyBytes_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_bytes)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 638 |   #ifdef __Pyx_CyFunction_USED
 639 |   if (__pyx_CyFunction_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 640 |   #endif
 641 |   #ifdef __Pyx_FusedFunction_USED
 642 |   if (__pyx_FusedFunction_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 643 |   #endif
 644 |   #ifdef __Pyx_Coroutine_USED
 645 |   if (__pyx_Coroutine_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 646 |   #endif
 647 |   #ifdef __Pyx_Generator_USED
 648 |   if (__pyx_Generator_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 649 |   #endif
 650 |   #ifdef __Pyx_StopAsyncIteration_USED
 651 |   if (__pyx_StopAsyncIteration_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 652 |   #endif
 653 |   /*--- Library function declarations ---*/
 654 |   /*--- Threads initialization code ---*/
 655 |   #if defined(__PYX_FORCE_INIT_THREADS) && __PYX_FORCE_INIT_THREADS
 656 |   #ifdef WITH_THREAD /* Python build with threading support? */
 657 |   PyEval_InitThreads();
 658 |   #endif
 659 |   #endif
 660 |   /*--- Module creation code ---*/
 661 |   #if PY_MAJOR_VERSION < 3
 662 |   __pyx_m = Py_InitModule4("chat_trie", __pyx_methods, 0, 0, PYTHON_API_VERSION); Py_XINCREF(__pyx_m);
 663 |   #else
 664 |   __pyx_m = PyModule_Create(&__pyx_moduledef);
 665 |   #endif
 666 |   if (unlikely(!__pyx_m)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 667 |   __pyx_d = PyModule_GetDict(__pyx_m); if (unlikely(!__pyx_d)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 668 |   Py_INCREF(__pyx_d);
 669 |   __pyx_b = PyImport_AddModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_b)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 670 |   #if CYTHON_COMPILING_IN_PYPY
 671 |   Py_INCREF(__pyx_b);
 672 |   #endif
 673 |   if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;};
 674 |   /*--- Initialize various global constants etc. ---*/
 675 |   if (__Pyx_InitGlobals() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 676 |   #if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT)
 677 |   if (__Pyx_init_sys_getdefaultencoding_params() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 678 |   #endif
 679 |   if (__pyx_module_is_main_chat_trie) {
 680 |     if (PyObject_SetAttrString(__pyx_m, "__name__", __pyx_n_s_main) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 681 |   }
 682 |   #if PY_MAJOR_VERSION >= 3
 683 |   {
 684 |     PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 685 |     if (!PyDict_GetItemString(modules, "chat_trie")) {
 686 |       if (unlikely(PyDict_SetItemString(modules, "chat_trie", __pyx_m) < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 687 |     }
 688 |   }
 689 |   #endif
 690 |   /*--- Builtin init code ---*/
 691 |   if (__Pyx_InitCachedBuiltins() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 692 |   /*--- Constants init code ---*/
 693 |   if (__Pyx_InitCachedConstants() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 694 |   /*--- Global init code ---*/
 695 |   /*--- Variable export code ---*/
 696 |   /*--- Function export code ---*/
 697 |   /*--- Type init code ---*/
 698 |   /*--- Type import code ---*/
 699 |   /*--- Variable import code ---*/
 700 |   /*--- Function import code ---*/
 701 |   /*--- Execution code ---*/
 702 |   #if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED)
 703 |   if (__Pyx_patch_abc() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 704 |   #endif
 705 | 
 706 |   /* "chat_trie.pxd":1
 707 |  * cdef extern from "../hat-trie/src/hat-trie.h":             # <<<<<<<<<<<<<<
 708 |  * 
 709 |  *     ctypedef int value_t
 710 |  */
 711 |   __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 712 |   __Pyx_GOTREF(__pyx_t_1);
 713 |   if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}
 714 |   __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0;
 715 | 
 716 |   /*--- Wrapped vars code ---*/
 717 | 
 718 |   goto __pyx_L0;
 719 |   __pyx_L1_error:;
 720 |   __Pyx_XDECREF(__pyx_t_1);
 721 |   if (__pyx_m) {
 722 |     if (__pyx_d) {
 723 |       __Pyx_AddTraceback("init chat_trie", __pyx_clineno, __pyx_lineno, __pyx_filename);
 724 |     }
 725 |     Py_DECREF(__pyx_m); __pyx_m = 0;
 726 |   } else if (!PyErr_Occurred()) {
 727 |     PyErr_SetString(PyExc_ImportError, "init chat_trie");
 728 |   }
 729 |   __pyx_L0:;
 730 |   __Pyx_RefNannyFinishContext();
 731 |   #if PY_MAJOR_VERSION < 3
 732 |   return;
 733 |   #else
 734 |   return __pyx_m;
 735 |   #endif
 736 | }
 737 | 
 738 | /* --- Runtime support code --- */
 739 | #if CYTHON_REFNANNY
 740 | static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname) {
 741 |     PyObject *m = NULL, *p = NULL;
 742 |     void *r = NULL;
 743 |     m = PyImport_ImportModule((char *)modname);
 744 |     if (!m) goto end;
 745 |     p = PyObject_GetAttrString(m, (char *)"RefNannyAPI");
 746 |     if (!p) goto end;
 747 |     r = PyLong_AsVoidPtr(p);
 748 | end:
 749 |     Py_XDECREF(p);
 750 |     Py_XDECREF(m);
 751 |     return (__Pyx_RefNannyAPIStruct *)r;
 752 | }
 753 | #endif
 754 | 
 755 | static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) {
 756 |     int start = 0, mid = 0, end = count - 1;
 757 |     if (end >= 0 && code_line > entries[end].code_line) {
 758 |         return count;
 759 |     }
 760 |     while (start < end) {
 761 |         mid = start + (end - start) / 2;
 762 |         if (code_line < entries[mid].code_line) {
 763 |             end = mid;
 764 |         } else if (code_line > entries[mid].code_line) {
 765 |              start = mid + 1;
 766 |         } else {
 767 |             return mid;
 768 |         }
 769 |     }
 770 |     if (code_line <= entries[mid].code_line) {
 771 |         return mid;
 772 |     } else {
 773 |         return mid + 1;
 774 |     }
 775 | }
 776 | static PyCodeObject *__pyx_find_code_object(int code_line) {
 777 |     PyCodeObject* code_object;
 778 |     int pos;
 779 |     if (unlikely(!code_line) || unlikely(!__pyx_code_cache.entries)) {
 780 |         return NULL;
 781 |     }
 782 |     pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line);
 783 |     if (unlikely(pos >= __pyx_code_cache.count) || unlikely(__pyx_code_cache.entries[pos].code_line != code_line)) {
 784 |         return NULL;
 785 |     }
 786 |     code_object = __pyx_code_cache.entries[pos].code_object;
 787 |     Py_INCREF(code_object);
 788 |     return code_object;
 789 | }
 790 | static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) {
 791 |     int pos, i;
 792 |     __Pyx_CodeObjectCacheEntry* entries = __pyx_code_cache.entries;
 793 |     if (unlikely(!code_line)) {
 794 |         return;
 795 |     }
 796 |     if (unlikely(!entries)) {
 797 |         entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Malloc(64*sizeof(__Pyx_CodeObjectCacheEntry));
 798 |         if (likely(entries)) {
 799 |             __pyx_code_cache.entries = entries;
 800 |             __pyx_code_cache.max_count = 64;
 801 |             __pyx_code_cache.count = 1;
 802 |             entries[0].code_line = code_line;
 803 |             entries[0].code_object = code_object;
 804 |             Py_INCREF(code_object);
 805 |         }
 806 |         return;
 807 |     }
 808 |     pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line);
 809 |     if ((pos < __pyx_code_cache.count) && unlikely(__pyx_code_cache.entries[pos].code_line == code_line)) {
 810 |         PyCodeObject* tmp = entries[pos].code_object;
 811 |         entries[pos].code_object = code_object;
 812 |         Py_DECREF(tmp);
 813 |         return;
 814 |     }
 815 |     if (__pyx_code_cache.count == __pyx_code_cache.max_count) {
 816 |         int new_max = __pyx_code_cache.max_count + 64;
 817 |         entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc(
 818 |             __pyx_code_cache.entries, (size_t)new_max*sizeof(__Pyx_CodeObjectCacheEntry));
 819 |         if (unlikely(!entries)) {
 820 |             return;
 821 |         }
 822 |         __pyx_code_cache.entries = entries;
 823 |         __pyx_code_cache.max_count = new_max;
 824 |     }
 825 |     for (i=__pyx_code_cache.count; i>pos; i--) {
 826 |         entries[i] = entries[i-1];
 827 |     }
 828 |     entries[pos].code_line = code_line;
 829 |     entries[pos].code_object = code_object;
 830 |     __pyx_code_cache.count++;
 831 |     Py_INCREF(code_object);
 832 | }
 833 | 
 834 | #include "compile.h"
 835 | #include "frameobject.h"
 836 | #include "traceback.h"
 837 | static PyCodeObject* __Pyx_CreateCodeObjectForTraceback(
 838 |             const char *funcname, int c_line,
 839 |             int py_line, const char *filename) {
 840 |     PyCodeObject *py_code = 0;
 841 |     PyObject *py_srcfile = 0;
 842 |     PyObject *py_funcname = 0;
 843 |     #if PY_MAJOR_VERSION < 3
 844 |     py_srcfile = PyString_FromString(filename);
 845 |     #else
 846 |     py_srcfile = PyUnicode_FromString(filename);
 847 |     #endif
 848 |     if (!py_srcfile) goto bad;
 849 |     if (c_line) {
 850 |         #if PY_MAJOR_VERSION < 3
 851 |         py_funcname = PyString_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line);
 852 |         #else
 853 |         py_funcname = PyUnicode_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line);
 854 |         #endif
 855 |     }
 856 |     else {
 857 |         #if PY_MAJOR_VERSION < 3
 858 |         py_funcname = PyString_FromString(funcname);
 859 |         #else
 860 |         py_funcname = PyUnicode_FromString(funcname);
 861 |         #endif
 862 |     }
 863 |     if (!py_funcname) goto bad;
 864 |     py_code = __Pyx_PyCode_New(
 865 |         0,
 866 |         0,
 867 |         0,
 868 |         0,
 869 |         0,
 870 |         __pyx_empty_bytes, /*PyObject *code,*/
 871 |         __pyx_empty_tuple, /*PyObject *consts,*/
 872 |         __pyx_empty_tuple, /*PyObject *names,*/
 873 |         __pyx_empty_tuple, /*PyObject *varnames,*/
 874 |         __pyx_empty_tuple, /*PyObject *freevars,*/
 875 |         __pyx_empty_tuple, /*PyObject *cellvars,*/
 876 |         py_srcfile,   /*PyObject *filename,*/
 877 |         py_funcname,  /*PyObject *name,*/
 878 |         py_line,
 879 |         __pyx_empty_bytes  /*PyObject *lnotab*/
 880 |     );
 881 |     Py_DECREF(py_srcfile);
 882 |     Py_DECREF(py_funcname);
 883 |     return py_code;
 884 | bad:
 885 |     Py_XDECREF(py_srcfile);
 886 |     Py_XDECREF(py_funcname);
 887 |     return NULL;
 888 | }
 889 | static void __Pyx_AddTraceback(const char *funcname, int c_line,
 890 |                                int py_line, const char *filename) {
 891 |     PyCodeObject *py_code = 0;
 892 |     PyFrameObject *py_frame = 0;
 893 |     py_code = __pyx_find_code_object(c_line ? c_line : py_line);
 894 |     if (!py_code) {
 895 |         py_code = __Pyx_CreateCodeObjectForTraceback(
 896 |             funcname, c_line, py_line, filename);
 897 |         if (!py_code) goto bad;
 898 |         __pyx_insert_code_object(c_line ? c_line : py_line, py_code);
 899 |     }
 900 |     py_frame = PyFrame_New(
 901 |         PyThreadState_GET(), /*PyThreadState *tstate,*/
 902 |         py_code,             /*PyCodeObject *code,*/
 903 |         __pyx_d,      /*PyObject *globals,*/
 904 |         0                    /*PyObject *locals*/
 905 |     );
 906 |     if (!py_frame) goto bad;
 907 |     py_frame->f_lineno = py_line;
 908 |     PyTraceBack_Here(py_frame);
 909 | bad:
 910 |     Py_XDECREF(py_code);
 911 |     Py_XDECREF(py_frame);
 912 | }
 913 | 
 914 | static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) {
 915 |     const long neg_one = (long) -1, const_zero = (long) 0;
 916 |     const int is_unsigned = neg_one > const_zero;
 917 |     if (is_unsigned) {
 918 |         if (sizeof(long) < sizeof(long)) {
 919 |             return PyInt_FromLong((long) value);
 920 |         } else if (sizeof(long) <= sizeof(unsigned long)) {
 921 |             return PyLong_FromUnsignedLong((unsigned long) value);
 922 |         } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) {
 923 |             return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value);
 924 |         }
 925 |     } else {
 926 |         if (sizeof(long) <= sizeof(long)) {
 927 |             return PyInt_FromLong((long) value);
 928 |         } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) {
 929 |             return PyLong_FromLongLong((PY_LONG_LONG) value);
 930 |         }
 931 |     }
 932 |     {
 933 |         int one = 1; int little = (int)*(unsigned char *)&one;
 934 |         unsigned char *bytes = (unsigned char *)&value;
 935 |         return _PyLong_FromByteArray(bytes, sizeof(long),
 936 |                                      little, !is_unsigned);
 937 |     }
 938 | }
 939 | 
 940 | #define __PYX_VERIFY_RETURN_INT(target_type, func_type, func_value)\
 941 |     __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 0)
 942 | #define __PYX_VERIFY_RETURN_INT_EXC(target_type, func_type, func_value)\
 943 |     __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 1)
 944 | #define __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, exc)\
 945 |     {\
 946 |         func_type value = func_value;\
 947 |         if (sizeof(target_type) < sizeof(func_type)) {\
 948 |             if (unlikely(value != (func_type) (target_type) value)) {\
 949 |                 func_type zero = 0;\
 950 |                 if (exc && unlikely(value == (func_type)-1 && PyErr_Occurred()))\
 951 |                     return (target_type) -1;\
 952 |                 if (is_unsigned && unlikely(value < zero))\
 953 |                     goto raise_neg_overflow;\
 954 |                 else\
 955 |                     goto raise_overflow;\
 956 |             }\
 957 |         }\
 958 |         return (target_type) value;\
 959 |     }
 960 | 
 961 | #if CYTHON_USE_PYLONG_INTERNALS
 962 |   #include "longintrepr.h"
 963 | #endif
 964 | 
 965 | static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *x) {
 966 |     const long neg_one = (long) -1, const_zero = (long) 0;
 967 |     const int is_unsigned = neg_one > const_zero;
 968 | #if PY_MAJOR_VERSION < 3
 969 |     if (likely(PyInt_Check(x))) {
 970 |         if (sizeof(long) < sizeof(long)) {
 971 |             __PYX_VERIFY_RETURN_INT(long, long, PyInt_AS_LONG(x))
 972 |         } else {
 973 |             long val = PyInt_AS_LONG(x);
 974 |             if (is_unsigned && unlikely(val < 0)) {
 975 |                 goto raise_neg_overflow;
 976 |             }
 977 |             return (long) val;
 978 |         }
 979 |     } else
 980 | #endif
 981 |     if (likely(PyLong_Check(x))) {
 982 |         if (is_unsigned) {
 983 | #if CYTHON_USE_PYLONG_INTERNALS
 984 |             const digit* digits = ((PyLongObject*)x)->ob_digit;
 985 |             switch (Py_SIZE(x)) {
 986 |                 case  0: return (long) 0;
 987 |                 case  1: __PYX_VERIFY_RETURN_INT(long, digit, digits[0])
 988 |                 case 2:
 989 |                     if (8 * sizeof(long) > 1 * PyLong_SHIFT) {
 990 |                         if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
 991 |                             __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
 992 |                         } else if (8 * sizeof(long) >= 2 * PyLong_SHIFT) {
 993 |                             return (long) (((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]));
 994 |                         }
 995 |                     }
 996 |                     break;
 997 |                 case 3:
 998 |                     if (8 * sizeof(long) > 2 * PyLong_SHIFT) {
 999 |                         if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
1000 |                             __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
1001 |                         } else if (8 * sizeof(long) >= 3 * PyLong_SHIFT) {
1002 |                             return (long) (((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]));
1003 |                         }
1004 |                     }
1005 |                     break;
1006 |                 case 4:
1007 |                     if (8 * sizeof(long) > 3 * PyLong_SHIFT) {
1008 |                         if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
1009 |                             __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
1010 |                         } else if (8 * sizeof(long) >= 4 * PyLong_SHIFT) {
1011 |                             return (long) (((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]));
1012 |                         }
1013 |                     }
1014 |                     break;
1015 |             }
1016 | #endif
1017 | #if CYTHON_COMPILING_IN_CPYTHON
1018 |             if (unlikely(Py_SIZE(x) < 0)) {
1019 |                 goto raise_neg_overflow;
1020 |             }
1021 | #else
1022 |             {
1023 |                 int result = PyObject_RichCompareBool(x, Py_False, Py_LT);
1024 |                 if (unlikely(result < 0))
1025 |                     return (long) -1;
1026 |                 if (unlikely(result == 1))
1027 |                     goto raise_neg_overflow;
1028 |             }
1029 | #endif
1030 |             if (sizeof(long) <= sizeof(unsigned long)) {
1031 |                 __PYX_VERIFY_RETURN_INT_EXC(long, unsigned long, PyLong_AsUnsignedLong(x))
1032 |             } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) {
1033 |                 __PYX_VERIFY_RETURN_INT_EXC(long, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x))
1034 |             }
1035 |         } else {
1036 | #if CYTHON_USE_PYLONG_INTERNALS
1037 |             const digit* digits = ((PyLongObject*)x)->ob_digit;
1038 |             switch (Py_SIZE(x)) {
1039 |                 case  0: return (long) 0;
1040 |                 case -1: __PYX_VERIFY_RETURN_INT(long, sdigit, -(sdigit) digits[0])
1041 |                 case  1: __PYX_VERIFY_RETURN_INT(long,  digit, +digits[0])
1042 |                 case -2:
1043 |                     if (8 * sizeof(long) - 1 > 1 * PyLong_SHIFT) {
1044 |                         if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
1045 |                             __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
1046 |                         } else if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) {
1047 |                             return (long) (((long)-1)*(((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
1048 |                         }
1049 |                     }
1050 |                     break;
1051 |                 case 2:
1052 |                     if (8 * sizeof(long) > 1 * PyLong_SHIFT) {
1053 |                         if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
1054 |                             __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
1055 |                         } else if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) {
1056 |                             return (long) ((((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
1057 |                         }
1058 |                     }
1059 |                     break;
1060 |                 case -3:
1061 |                     if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) {
1062 |                         if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
1063 |                             __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
1064 |                         } else if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) {
1065 |                             return (long) (((long)-1)*(((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
1066 |                         }
1067 |                     }
1068 |                     break;
1069 |                 case 3:
1070 |                     if (8 * sizeof(long) > 2 * PyLong_SHIFT) {
1071 |                         if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
1072 |                             __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
1073 |                         } else if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) {
1074 |                             return (long) ((((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
1075 |                         }
1076 |                     }
1077 |                     break;
1078 |                 case -4:
1079 |                     if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) {
1080 |                         if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
1081 |                             __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
1082 |                         } else if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) {
1083 |                             return (long) (((long)-1)*(((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
1084 |                         }
1085 |                     }
1086 |                     break;
1087 |                 case 4:
1088 |                     if (8 * sizeof(long) > 3 * PyLong_SHIFT) {
1089 |                         if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
1090 |                             __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
1091 |                         } else if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) {
1092 |                             return (long) ((((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])));
1093 |                         }
1094 |                     }
1095 |                     break;
1096 |             }
1097 | #endif
1098 |             if (sizeof(long) <= sizeof(long)) {
1099 |                 __PYX_VERIFY_RETURN_INT_EXC(long, long, PyLong_AsLong(x))
1100 |             } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) {
1101 |                 __PYX_VERIFY_RETURN_INT_EXC(long, PY_LONG_LONG, PyLong_AsLongLong(x))
1102 |             }
1103 |         }
1104 |         {
1105 | #if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray)
1106 |             PyErr_SetString(PyExc_RuntimeError,
1107 |                             "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers");
1108 | #else
1109 |             long val;
1110 |             PyObject *v = __Pyx_PyNumber_Int(x);
1111 |  #if PY_MAJOR_VERSION < 3
1112 |             if (likely(v) && !PyLong_Check(v)) {
1113 |                 PyObject *tmp = v;
1114 |                 v = PyNumber_Long(tmp);
1115 |                 Py_DECREF(tmp);
1116 |             }
1117 |  #endif
1118 |             if (likely(v)) {
1119 |                 int one = 1; int is_little = (int)*(unsigned char *)&one;
1120 |                 unsigned char *bytes = (unsigned char *)&val;
1121 |                 int ret = _PyLong_AsByteArray((PyLongObject *)v,
1122 |                                               bytes, sizeof(val),
1123 |                                               is_little, !is_unsigned);
1124 |                 Py_DECREF(v);
1125 |                 if (likely(!ret))
1126 |                     return val;
1127 |             }
1128 | #endif
1129 |             return (long) -1;
1130 |         }
1131 |     } else {
1132 |         long val;
1133 |         PyObject *tmp = __Pyx_PyNumber_Int(x);
1134 |         if (!tmp) return (long) -1;
1135 |         val = __Pyx_PyInt_As_long(tmp);
1136 |         Py_DECREF(tmp);
1137 |         return val;
1138 |     }
1139 | raise_overflow:
1140 |     PyErr_SetString(PyExc_OverflowError,
1141 |         "value too large to convert to long");
1142 |     return (long) -1;
1143 | raise_neg_overflow:
1144 |     PyErr_SetString(PyExc_OverflowError,
1145 |         "can't convert negative value to long");
1146 |     return (long) -1;
1147 | }
1148 | 
1149 | static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *x) {
1150 |     const int neg_one = (int) -1, const_zero = (int) 0;
1151 |     const int is_unsigned = neg_one > const_zero;
1152 | #if PY_MAJOR_VERSION < 3
1153 |     if (likely(PyInt_Check(x))) {
1154 |         if (sizeof(int) < sizeof(long)) {
1155 |             __PYX_VERIFY_RETURN_INT(int, long, PyInt_AS_LONG(x))
1156 |         } else {
1157 |             long val = PyInt_AS_LONG(x);
1158 |             if (is_unsigned && unlikely(val < 0)) {
1159 |                 goto raise_neg_overflow;
1160 |             }
1161 |             return (int) val;
1162 |         }
1163 |     } else
1164 | #endif
1165 |     if (likely(PyLong_Check(x))) {
1166 |         if (is_unsigned) {
1167 | #if CYTHON_USE_PYLONG_INTERNALS
1168 |             const digit* digits = ((PyLongObject*)x)->ob_digit;
1169 |             switch (Py_SIZE(x)) {
1170 |                 case  0: return (int) 0;
1171 |                 case  1: __PYX_VERIFY_RETURN_INT(int, digit, digits[0])
1172 |                 case 2:
1173 |                     if (8 * sizeof(int) > 1 * PyLong_SHIFT) {
1174 |                         if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
1175 |                             __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
1176 |                         } else if (8 * sizeof(int) >= 2 * PyLong_SHIFT) {
1177 |                             return (int) (((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]));
1178 |                         }
1179 |                     }
1180 |                     break;
1181 |                 case 3:
1182 |                     if (8 * sizeof(int) > 2 * PyLong_SHIFT) {
1183 |                         if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
1184 |                             __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
1185 |                         } else if (8 * sizeof(int) >= 3 * PyLong_SHIFT) {
1186 |                             return (int) (((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]));
1187 |                         }
1188 |                     }
1189 |                     break;
1190 |                 case 4:
1191 |                     if (8 * sizeof(int) > 3 * PyLong_SHIFT) {
1192 |                         if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
1193 |                             __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
1194 |                         } else if (8 * sizeof(int) >= 4 * PyLong_SHIFT) {
1195 |                             return (int) (((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]));
1196 |                         }
1197 |                     }
1198 |                     break;
1199 |             }
1200 | #endif
1201 | #if CYTHON_COMPILING_IN_CPYTHON
1202 |             if (unlikely(Py_SIZE(x) < 0)) {
1203 |                 goto raise_neg_overflow;
1204 |             }
1205 | #else
1206 |             {
1207 |                 int result = PyObject_RichCompareBool(x, Py_False, Py_LT);
1208 |                 if (unlikely(result < 0))
1209 |                     return (int) -1;
1210 |                 if (unlikely(result == 1))
1211 |                     goto raise_neg_overflow;
1212 |             }
1213 | #endif
1214 |             if (sizeof(int) <= sizeof(unsigned long)) {
1215 |                 __PYX_VERIFY_RETURN_INT_EXC(int, unsigned long, PyLong_AsUnsignedLong(x))
1216 |             } else if (sizeof(int) <= sizeof(unsigned PY_LONG_LONG)) {
1217 |                 __PYX_VERIFY_RETURN_INT_EXC(int, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x))
1218 |             }
1219 |         } else {
1220 | #if CYTHON_USE_PYLONG_INTERNALS
1221 |             const digit* digits = ((PyLongObject*)x)->ob_digit;
1222 |             switch (Py_SIZE(x)) {
1223 |                 case  0: return (int) 0;
1224 |                 case -1: __PYX_VERIFY_RETURN_INT(int, sdigit, -(sdigit) digits[0])
1225 |                 case  1: __PYX_VERIFY_RETURN_INT(int,  digit, +digits[0])
1226 |                 case -2:
1227 |                     if (8 * sizeof(int) - 1 > 1 * PyLong_SHIFT) {
1228 |                         if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
1229 |                             __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
1230 |                         } else if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) {
1231 |                             return (int) (((int)-1)*(((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
1232 |                         }
1233 |                     }
1234 |                     break;
1235 |                 case 2:
1236 |                     if (8 * sizeof(int) > 1 * PyLong_SHIFT) {
1237 |                         if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) {
1238 |                             __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
1239 |                         } else if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) {
1240 |                             return (int) ((((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
1241 |                         }
1242 |                     }
1243 |                     break;
1244 |                 case -3:
1245 |                     if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) {
1246 |                         if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
1247 |                             __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
1248 |                         } else if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) {
1249 |                             return (int) (((int)-1)*(((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
1250 |                         }
1251 |                     }
1252 |                     break;
1253 |                 case 3:
1254 |                     if (8 * sizeof(int) > 2 * PyLong_SHIFT) {
1255 |                         if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) {
1256 |                             __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
1257 |                         } else if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) {
1258 |                             return (int) ((((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
1259 |                         }
1260 |                     }
1261 |                     break;
1262 |                 case -4:
1263 |                     if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) {
1264 |                         if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
1265 |                             __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
1266 |                         } else if (8 * sizeof(int) - 1 > 4 * PyLong_SHIFT) {
1267 |                             return (int) (((int)-1)*(((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
1268 |                         }
1269 |                     }
1270 |                     break;
1271 |                 case 4:
1272 |                     if (8 * sizeof(int) > 3 * PyLong_SHIFT) {
1273 |                         if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) {
1274 |                             __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0])))
1275 |                         } else if (8 * sizeof(int) - 1 > 4 * PyLong_SHIFT) {
1276 |                             return (int) ((((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])));
1277 |                         }
1278 |                     }
1279 |                     break;
1280 |             }
1281 | #endif
1282 |             if (sizeof(int) <= sizeof(long)) {
1283 |                 __PYX_VERIFY_RETURN_INT_EXC(int, long, PyLong_AsLong(x))
1284 |             } else if (sizeof(int) <= sizeof(PY_LONG_LONG)) {
1285 |                 __PYX_VERIFY_RETURN_INT_EXC(int, PY_LONG_LONG, PyLong_AsLongLong(x))
1286 |             }
1287 |         }
1288 |         {
1289 | #if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray)
1290 |             PyErr_SetString(PyExc_RuntimeError,
1291 |                             "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers");
1292 | #else
1293 |             int val;
1294 |             PyObject *v = __Pyx_PyNumber_Int(x);
1295 |  #if PY_MAJOR_VERSION < 3
1296 |             if (likely(v) && !PyLong_Check(v)) {
1297 |                 PyObject *tmp = v;
1298 |                 v = PyNumber_Long(tmp);
1299 |                 Py_DECREF(tmp);
1300 |             }
1301 |  #endif
1302 |             if (likely(v)) {
1303 |                 int one = 1; int is_little = (int)*(unsigned char *)&one;
1304 |                 unsigned char *bytes = (unsigned char *)&val;
1305 |                 int ret = _PyLong_AsByteArray((PyLongObject *)v,
1306 |                                               bytes, sizeof(val),
1307 |                                               is_little, !is_unsigned);
1308 |                 Py_DECREF(v);
1309 |                 if (likely(!ret))
1310 |                     return val;
1311 |             }
1312 | #endif
1313 |             return (int) -1;
1314 |         }
1315 |     } else {
1316 |         int val;
1317 |         PyObject *tmp = __Pyx_PyNumber_Int(x);
1318 |         if (!tmp) return (int) -1;
1319 |         val = __Pyx_PyInt_As_int(tmp);
1320 |         Py_DECREF(tmp);
1321 |         return val;
1322 |     }
1323 | raise_overflow:
1324 |     PyErr_SetString(PyExc_OverflowError,
1325 |         "value too large to convert to int");
1326 |     return (int) -1;
1327 | raise_neg_overflow:
1328 |     PyErr_SetString(PyExc_OverflowError,
1329 |         "can't convert negative value to int");
1330 |     return (int) -1;
1331 | }
1332 | 
1333 | static int __Pyx_check_binary_version(void) {
1334 |     char ctversion[4], rtversion[4];
1335 |     PyOS_snprintf(ctversion, 4, "%d.%d", PY_MAJOR_VERSION, PY_MINOR_VERSION);
1336 |     PyOS_snprintf(rtversion, 4, "%s", Py_GetVersion());
1337 |     if (ctversion[0] != rtversion[0] || ctversion[2] != rtversion[2]) {
1338 |         char message[200];
1339 |         PyOS_snprintf(message, sizeof(message),
1340 |                       "compiletime version %s of module '%.100s' "
1341 |                       "does not match runtime version %s",
1342 |                       ctversion, __Pyx_MODULE_NAME, rtversion);
1343 |         return PyErr_WarnEx(NULL, message, 1);
1344 |     }
1345 |     return 0;
1346 | }
1347 | 
1348 | static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) {
1349 |     while (t->p) {
1350 |         #if PY_MAJOR_VERSION < 3
1351 |         if (t->is_unicode) {
1352 |             *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL);
1353 |         } else if (t->intern) {
1354 |             *t->p = PyString_InternFromString(t->s);
1355 |         } else {
1356 |             *t->p = PyString_FromStringAndSize(t->s, t->n - 1);
1357 |         }
1358 |         #else
1359 |         if (t->is_unicode | t->is_str) {
1360 |             if (t->intern) {
1361 |                 *t->p = PyUnicode_InternFromString(t->s);
1362 |             } else if (t->encoding) {
1363 |                 *t->p = PyUnicode_Decode(t->s, t->n - 1, t->encoding, NULL);
1364 |             } else {
1365 |                 *t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1);
1366 |             }
1367 |         } else {
1368 |             *t->p = PyBytes_FromStringAndSize(t->s, t->n - 1);
1369 |         }
1370 |         #endif
1371 |         if (!*t->p)
1372 |             return -1;
1373 |         ++t;
1374 |     }
1375 |     return 0;
1376 | }
1377 | 
1378 | static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char* c_str) {
1379 |     return __Pyx_PyUnicode_FromStringAndSize(c_str, (Py_ssize_t)strlen(c_str));
1380 | }
1381 | static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject* o) {
1382 |     Py_ssize_t ignore;
1383 |     return __Pyx_PyObject_AsStringAndSize(o, &ignore);
1384 | }
1385 | static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) {
1386 | #if CYTHON_COMPILING_IN_CPYTHON && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT)
1387 |     if (
1388 | #if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
1389 |             __Pyx_sys_getdefaultencoding_not_ascii &&
1390 | #endif
1391 |             PyUnicode_Check(o)) {
1392 | #if PY_VERSION_HEX < 0x03030000
1393 |         char* defenc_c;
1394 |         PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL);
1395 |         if (!defenc) return NULL;
1396 |         defenc_c = PyBytes_AS_STRING(defenc);
1397 | #if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
1398 |         {
1399 |             char* end = defenc_c + PyBytes_GET_SIZE(defenc);
1400 |             char* c;
1401 |             for (c = defenc_c; c < end; c++) {
1402 |                 if ((unsigned char) (*c) >= 128) {
1403 |                     PyUnicode_AsASCIIString(o);
1404 |                     return NULL;
1405 |                 }
1406 |             }
1407 |         }
1408 | #endif
1409 |         *length = PyBytes_GET_SIZE(defenc);
1410 |         return defenc_c;
1411 | #else
1412 |         if (__Pyx_PyUnicode_READY(o) == -1) return NULL;
1413 | #if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII
1414 |         if (PyUnicode_IS_ASCII(o)) {
1415 |             *length = PyUnicode_GET_LENGTH(o);
1416 |             return PyUnicode_AsUTF8(o);
1417 |         } else {
1418 |             PyUnicode_AsASCIIString(o);
1419 |             return NULL;
1420 |         }
1421 | #else
1422 |         return PyUnicode_AsUTF8AndSize(o, length);
1423 | #endif
1424 | #endif
1425 |     } else
1426 | #endif
1427 | #if (!CYTHON_COMPILING_IN_PYPY) || (defined(PyByteArray_AS_STRING) && defined(PyByteArray_GET_SIZE))
1428 |     if (PyByteArray_Check(o)) {
1429 |         *length = PyByteArray_GET_SIZE(o);
1430 |         return PyByteArray_AS_STRING(o);
1431 |     } else
1432 | #endif
1433 |     {
1434 |         char* result;
1435 |         int r = PyBytes_AsStringAndSize(o, &result, length);
1436 |         if (unlikely(r < 0)) {
1437 |             return NULL;
1438 |         } else {
1439 |             return result;
1440 |         }
1441 |     }
1442 | }
1443 | static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) {
1444 |    int is_true = x == Py_True;
1445 |    if (is_true | (x == Py_False) | (x == Py_None)) return is_true;
1446 |    else return PyObject_IsTrue(x);
1447 | }
1448 | static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x) {
1449 |   PyNumberMethods *m;
1450 |   const char *name = NULL;
1451 |   PyObject *res = NULL;
1452 | #if PY_MAJOR_VERSION < 3
1453 |   if (PyInt_Check(x) || PyLong_Check(x))
1454 | #else
1455 |   if (PyLong_Check(x))
1456 | #endif
1457 |     return __Pyx_NewRef(x);
1458 |   m = Py_TYPE(x)->tp_as_number;
1459 | #if PY_MAJOR_VERSION < 3
1460 |   if (m && m->nb_int) {
1461 |     name = "int";
1462 |     res = PyNumber_Int(x);
1463 |   }
1464 |   else if (m && m->nb_long) {
1465 |     name = "long";
1466 |     res = PyNumber_Long(x);
1467 |   }
1468 | #else
1469 |   if (m && m->nb_int) {
1470 |     name = "int";
1471 |     res = PyNumber_Long(x);
1472 |   }
1473 | #endif
1474 |   if (res) {
1475 | #if PY_MAJOR_VERSION < 3
1476 |     if (!PyInt_Check(res) && !PyLong_Check(res)) {
1477 | #else
1478 |     if (!PyLong_Check(res)) {
1479 | #endif
1480 |       PyErr_Format(PyExc_TypeError,
1481 |                    "__%.4s__ returned non-%.4s (type %.200s)",
1482 |                    name, name, Py_TYPE(res)->tp_name);
1483 |       Py_DECREF(res);
1484 |       return NULL;
1485 |     }
1486 |   }
1487 |   else if (!PyErr_Occurred()) {
1488 |     PyErr_SetString(PyExc_TypeError,
1489 |                     "an integer is required");
1490 |   }
1491 |   return res;
1492 | }
1493 | static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) {
1494 |   Py_ssize_t ival;
1495 |   PyObject *x;
1496 | #if PY_MAJOR_VERSION < 3
1497 |   if (likely(PyInt_CheckExact(b))) {
1498 |     if (sizeof(Py_ssize_t) >= sizeof(long))
1499 |         return PyInt_AS_LONG(b);
1500 |     else
1501 |         return PyInt_AsSsize_t(x);
1502 |   }
1503 | #endif
1504 |   if (likely(PyLong_CheckExact(b))) {
1505 |     #if CYTHON_USE_PYLONG_INTERNALS
1506 |     const digit* digits = ((PyLongObject*)b)->ob_digit;
1507 |     const Py_ssize_t size = Py_SIZE(b);
1508 |     if (likely(__Pyx_sst_abs(size) <= 1)) {
1509 |         ival = likely(size) ? digits[0] : 0;
1510 |         if (size == -1) ival = -ival;
1511 |         return ival;
1512 |     } else {
1513 |       switch (size) {
1514 |          case 2:
1515 |            if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) {
1516 |              return (Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
1517 |            }
1518 |            break;
1519 |          case -2:
1520 |            if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) {
1521 |              return -(Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
1522 |            }
1523 |            break;
1524 |          case 3:
1525 |            if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) {
1526 |              return (Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
1527 |            }
1528 |            break;
1529 |          case -3:
1530 |            if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) {
1531 |              return -(Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
1532 |            }
1533 |            break;
1534 |          case 4:
1535 |            if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) {
1536 |              return (Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
1537 |            }
1538 |            break;
1539 |          case -4:
1540 |            if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) {
1541 |              return -(Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0]));
1542 |            }
1543 |            break;
1544 |       }
1545 |     }
1546 |     #endif
1547 |     return PyLong_AsSsize_t(b);
1548 |   }
1549 |   x = PyNumber_Index(b);
1550 |   if (!x) return -1;
1551 |   ival = PyInt_AsSsize_t(x);
1552 |   Py_DECREF(x);
1553 |   return ival;
1554 | }
1555 | static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) {
1556 |     return PyInt_FromSize_t(ival);
1557 | }
1558 | 
1559 | 
1560 | #endif /* Py_PYTHON_H */
1561 | 


--------------------------------------------------------------------------------