├── update_c.sh ├── bench ├── __init__.py ├── words100k.txt.zip └── speed.py ├── tests ├── __init__.py ├── test_inttrie.py ├── test_floattrie.py ├── test_base_trie.py └── test_trie.py ├── hat-trie ├── TODO ├── .travis.yml ├── Makefile.am ├── src │ ├── murmurhash3.h │ ├── Makefile.am │ ├── common.h │ ├── misc.h │ ├── misc.c │ ├── murmurhash3.c │ ├── hat-trie.h │ ├── ahtable.h │ ├── ahtable.c │ ├── hat-trie.c │ └── pstdint.h ├── .gitignore ├── hat-trie-0.1.pc.in ├── test │ ├── Makefile.am │ ├── str_map.h │ ├── bench_sorted_iter.c │ ├── check_ahtable.c │ ├── str_map.c │ └── check_hattrie.c ├── configure.ac ├── COPYING └── README.md ├── .hgtags ├── tox.ini ├── .travis.yml ├── .gitignore ├── MANIFEST.in ├── bench.ini ├── .hgignore ├── CHANGES.rst ├── LICENSE ├── src ├── chat_trie.pxd ├── hat_trie.pyx └── chat_trie.c ├── setup.py └── README.rst /update_c.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | cython src/hat_trie.pyx src/chat_trie.pxd -a -------------------------------------------------------------------------------- /bench/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import -------------------------------------------------------------------------------- /bench/words100k.txt.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pytries/hat-trie/HEAD/bench/words100k.txt.zip -------------------------------------------------------------------------------- /hat-trie/TODO: -------------------------------------------------------------------------------- 1 | 2 | todo: 3 | * Deletion in ahtable. 4 | * Deletion in hattrie. 5 | 6 | 7 | -------------------------------------------------------------------------------- /.hgtags: -------------------------------------------------------------------------------- 1 | 4b4c246aae46e501bb55e24495eb30de41ff0b8d 0.1 2 | 1871f420d10a49dae6f35677de5bb28b6b2cc71a 0.2 3 | -------------------------------------------------------------------------------- /hat-trie/.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | compiler: 3 | - clang 4 | - gcc 5 | before_script: autoreconf -i 6 | script: ./configure && make && make check 7 | -------------------------------------------------------------------------------- /hat-trie/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | SUBDIRS = src test 3 | 4 | EXTRA_DIST = README.md COPYING 5 | 6 | pkgconfigdir = $(libdir)/pkgconfig 7 | pkgconfig_DATA = hat-trie-0.1.pc 8 | 9 | ACLOCAL_AMFLAGS=-I m4 10 | 11 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py27,py33,py34,py35 3 | 4 | [testenv] 5 | deps = 6 | pytest 7 | # psutil 8 | commands= 9 | pip install -I . 10 | py.test [] 11 | # python bench/speed.py 12 | -------------------------------------------------------------------------------- /hat-trie/src/murmurhash3.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef MURMURHASH3_H 3 | #define MURMURHASH3_H 4 | 5 | #include 6 | 7 | #include "pstdint.h" 8 | 9 | uint32_t hash(const char* data, size_t len); 10 | 11 | #endif 12 | 13 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | python: 3 | - "3.5" 4 | - "3.4" 5 | - "3.3" 6 | - "2.7" 7 | # - "pypy" 8 | 9 | install: 10 | - pip install cython 11 | - python setup.py install 12 | 13 | script: 14 | - ./update_c.sh 15 | - py.test 16 | -------------------------------------------------------------------------------- /hat-trie/.gitignore: -------------------------------------------------------------------------------- 1 | *.la 2 | *.lo 3 | *.o 4 | *~ 5 | .DS_Store 6 | .deps 7 | .libs 8 | Makefile 9 | Makefile.in 10 | aclocal.m4 11 | autom4te.cache 12 | config.* 13 | configure 14 | depcomp 15 | hat-trie-*.pc 16 | hat-trie-*.tar.gz 17 | install-sh 18 | libtool 19 | ltmain.sh 20 | m4 21 | missing 22 | -------------------------------------------------------------------------------- /hat-trie/hat-trie-0.1.pc.in: -------------------------------------------------------------------------------- 1 | 2 | prefix=@prefix@ 3 | exec_prefix=@exec_prefix@ 4 | libdir=@libdir@ 5 | includedir=@includedir@ 6 | 7 | Name: @PACKAGE_NAME@ 8 | Description: An efficient trie implementation. 9 | Version: @PACKAGE_VERSION@ 10 | Cflags: -I{includedir} 11 | Libs: -L${libdir} 12 | 13 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | MANIFEST 2 | src/*.html 3 | 4 | *.py[cod] 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Packages 10 | *.egg 11 | *.egg-info 12 | dist 13 | build 14 | sdist 15 | __pycache__ 16 | 17 | # Installer logs 18 | pip-log.txt 19 | 20 | # Unit test / coverage reports 21 | .coverage 22 | .tox 23 | .cache 24 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst 2 | include CHANGES.rst 3 | include LICENSE 4 | include tox.ini 5 | include update_c.sh 6 | 7 | recursive-include hat-trie/src *.h *.c 8 | include hat-trie/src/config.h.in 9 | include hat-trie/configure 10 | include hat-trie/configure.ac 11 | 12 | recursive-include src *.pyx *.pxd *.c 13 | -------------------------------------------------------------------------------- /bench.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py26,py27,py32,py33 3 | 4 | [testenv] 5 | deps = 6 | cython 7 | pytest 8 | # psutil 9 | commands= 10 | python bench/speed.py 11 | 12 | [testenv:pypy] 13 | deps = 14 | git+https://github.com/cython/cython.git@8102e17127206b51d7a419a3e9673ad795672a7d#egg=cython 15 | pytest 16 | -------------------------------------------------------------------------------- /hat-trie/src/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | lib_LTLIBRARIES = libhat-trie.la 3 | 4 | libhat_trie_la_SOURCES = common.h \ 5 | ahtable.h ahtable.c \ 6 | hat-trie.h hat-trie.c \ 7 | misc.h misc.c \ 8 | murmurhash3.h murmurhash3.c 9 | 10 | pkginclude_HEADERS = hat-trie.h ahtable.h common.h pstdint.h 11 | 12 | -------------------------------------------------------------------------------- /hat-trie/src/common.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of hat-trie. 3 | * 4 | * Copyright (c) 2011 by Daniel C. Jones 5 | * 6 | * 7 | * Common typedefs, etc. 8 | * 9 | */ 10 | 11 | 12 | #ifndef HATTRIE_COMMON_H 13 | #define HATTRIE_COMMON_H 14 | 15 | #include "pstdint.h" 16 | 17 | // an unsigned int that is guaranteed to be the same size as a pointer 18 | typedef uintptr_t value_t; 19 | 20 | #endif 21 | 22 | 23 | -------------------------------------------------------------------------------- /hat-trie/src/misc.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of hat-trie. 3 | * 4 | * Copyright (c) 2011 by Daniel C. Jones 5 | * 6 | * misc : 7 | * miscelaneous functions. 8 | * 9 | */ 10 | 11 | #ifndef LINESET_MISC_H 12 | #define LINESET_MISC_H 13 | 14 | #include 15 | 16 | void* malloc_or_die(size_t); 17 | void* realloc_or_die(void*, size_t); 18 | FILE* fopen_or_die(const char*, const char*); 19 | 20 | #endif 21 | 22 | 23 | -------------------------------------------------------------------------------- /.hgignore: -------------------------------------------------------------------------------- 1 | ^build 2 | ^MANIFEST$ 3 | ^dist 4 | \.so$ 5 | \.o$ 6 | \.lo$ 7 | \.la$ 8 | 9 | ^stuff/ 10 | \.rej$ 11 | \.pyc$ 12 | ^.tox 13 | \.orig$ 14 | \.prof$ 15 | \.coverage$ 16 | \.git 17 | 18 | Makefile 19 | Makefile.in 20 | 21 | hat-trie/aclocal.m4 22 | hat-trie/autom4te 23 | hat-trie/config 24 | hat-trie/depcomp 25 | hat-trie/hat-trie-0.1.pc 26 | hat-trie/install-sh 27 | hat-trie/libtool 28 | hat-trie/ltmain.sh 29 | hat-trie/missing 30 | 31 | \.deps 32 | \.libs 33 | hat-trie/stamp-h1 34 | hat-trie/test/check_ahtable$ 35 | hat-trie/test/check_hattrie$ 36 | src/.*\.html$ 37 | src/.*\.c$ -------------------------------------------------------------------------------- /hat-trie/test/Makefile.am: -------------------------------------------------------------------------------- 1 | 2 | TESTS = check_ahtable check_hattrie 3 | check_PROGRAMS = check_ahtable check_hattrie bench_sorted_iter 4 | 5 | check_ahtable_SOURCES = check_ahtable.c str_map.c 6 | check_ahtable_LDADD = $(top_builddir)/src/libhat-trie.la 7 | check_ahtable_CPPFLAGS = -I$(top_builddir)/src 8 | 9 | check_hattrie_SOURCES = check_hattrie.c str_map.c 10 | check_hattrie_LDADD = $(top_builddir)/src/libhat-trie.la 11 | check_hattrie_CPPFLAGS = -I$(top_builddir)/src 12 | 13 | bench_sorted_iter_SOURCES = bench_sorted_iter.c 14 | bench_sorted_iter_LDADD = $(top_builddir)/src/libhat-trie.la 15 | bench_sorted_iter_CPPFLAGS = -I$(top_builddir)/src 16 | -------------------------------------------------------------------------------- /hat-trie/configure.ac: -------------------------------------------------------------------------------- 1 | 2 | AC_INIT([hat-trie], [0.1.0], [dcjones@cs.washington.edu]) 3 | AM_INIT_AUTOMAKE([foreign]) 4 | m4_ifdef([AM_SILENT_RULES],[AM_SILENT_RULES([yes])]) 5 | AC_CONFIG_MACRO_DIR([m4]) 6 | 7 | base_CFLAGS="-std=c99 -Wall -Wextra -pedantic" 8 | opt_CFLAGS="${base_CFLAGS} -O3" 9 | dbg_CFLAGS="${base_CFLAGS} -g -O0" 10 | 11 | AC_ARG_ENABLE([debugging], 12 | [AS_HELP_STRING([--enable-debugging], 13 | [enable debugging info (default is no)])], 14 | [], [enable_debugging=no]) 15 | 16 | AS_IF([test "x$enable_debugging" = xyes], 17 | [CFLAGS="$dbg_CFLAGS"], 18 | [CFLAGS="$opt_CFLAGS"]) 19 | 20 | 21 | AC_PROG_CC 22 | AC_PROG_CPP 23 | AC_PROG_INSTALL 24 | AC_PROG_LN_S 25 | AC_PROG_MAKE_SET 26 | AC_DISABLE_SHARED 27 | AC_PROG_LIBTOOL 28 | 29 | AC_C_BIGENDIAN([AC_MSG_ERROR([Big-endian systems are not currently supported.])]) 30 | AC_HEADER_STDBOOL 31 | 32 | AC_CONFIG_FILES([hat-trie-0.1.pc Makefile src/Makefile test/Makefile]) 33 | AC_OUTPUT 34 | 35 | -------------------------------------------------------------------------------- /hat-trie/src/misc.c: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of hat-trie. 3 | * 4 | * Copyright (c) 2011 by Daniel C. Jones 5 | * 6 | */ 7 | 8 | #include "misc.h" 9 | #include 10 | 11 | 12 | void* malloc_or_die(size_t n) 13 | { 14 | void* p = malloc(n); 15 | if (p == NULL && n != 0) { 16 | fprintf(stderr, "Cannot allocate %zu bytes.\n", n); 17 | exit(EXIT_FAILURE); 18 | } 19 | return p; 20 | } 21 | 22 | 23 | void* realloc_or_die(void* ptr, size_t n) 24 | { 25 | void* p = realloc(ptr, n); 26 | if (p == NULL && n != 0) { 27 | fprintf(stderr, "Cannot allocate %zu bytes.\n", n); 28 | exit(EXIT_FAILURE); 29 | } 30 | return p; 31 | } 32 | 33 | 34 | FILE* fopen_or_die(const char* path, const char* mode) 35 | { 36 | FILE* f = fopen(path, mode); 37 | if (f == NULL) { 38 | fprintf(stderr, "Cannot open file %s with mode %s.\n", path, mode); 39 | exit(EXIT_FAILURE); 40 | } 41 | return f; 42 | } 43 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /CHANGES.rst: -------------------------------------------------------------------------------- 1 | 0.3 (2016-02-08) 2 | ---------------- 3 | 4 | * hat-trie C library is updated to the latest version (thanks Michael Phan-Ba); 5 | * FloatTrie (thanks Michael Phan-Ba); 6 | * Python 2.6 and Python 3.2 support is dropped. hat-trie 0.3 likely still works 7 | in 2.6 and 3.2, but this is no longer checked by unit tests, and 8 | future compatibility is not guaranteed; 9 | * setup.py is switched to setuptools. 10 | 11 | 12 | 0.2 (2014-08-22) 13 | ---------------- 14 | 15 | * Installation is simplified: Cython is no longer required; 16 | * ``get`` method for tries (thanks Brandon Forehand); 17 | * ``iterkeys`` method is fixed (thanks Brandon Forehand); 18 | * ``hat_trie.Trie`` can store any Python object as a value (thanks Brandon Forehand); 19 | * segfault is fixed for large int values (thanks Brandon Forehand); 20 | * hat-trie C library is updated to the latest version to fix some issues 21 | with 64bit builds and RHEL (thanks Brandon Forehand and Michael Heilman); 22 | 23 | 0.1 (2014-03-27) 24 | ---------------- 25 | 26 | Initial release. 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) Mikhail Korobov, 2012-2014 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is furnished 8 | to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 14 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR 15 | A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 16 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 17 | CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 18 | OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | -------------------------------------------------------------------------------- /hat-trie/COPYING: -------------------------------------------------------------------------------- 1 | Copyright (C) 2011 by Daniel C. Jones 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so, 8 | subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 15 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 16 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | 20 | -------------------------------------------------------------------------------- /hat-trie/test/str_map.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011 by Daniel C. Jones 3 | * 4 | * hash : 5 | * A quick and simple hash table mapping strings to things. 6 | * 7 | */ 8 | 9 | 10 | #ifndef ISOLATOR_STR_MAP_H 11 | #define ISOLATOR_STR_MAP_H 12 | 13 | #if defined(__cplusplus) 14 | extern "C" { 15 | #endif 16 | 17 | #include 18 | #include 19 | 20 | #include "common.h" 21 | 22 | 23 | typedef struct str_map_pair_ 24 | { 25 | char* key; 26 | size_t keylen; 27 | value_t value; 28 | 29 | struct str_map_pair_* next; 30 | } str_map_pair; 31 | 32 | 33 | typedef struct 34 | { 35 | str_map_pair** A; /* table proper */ 36 | size_t n; /* table size */ 37 | size_t m; /* hashed items */ 38 | size_t max_m; /* max hashed items before rehash */ 39 | } str_map; 40 | 41 | 42 | 43 | str_map* str_map_create(void); 44 | void str_map_destroy(str_map*); 45 | void str_map_set(str_map*, const char* key, size_t keylen, value_t value); 46 | value_t str_map_get(const str_map*, const char* key, size_t keylen); 47 | void str_map_del(str_map* T, const char* key, size_t keylen); 48 | 49 | #if defined(__cplusplus) 50 | } 51 | #endif 52 | 53 | #endif 54 | 55 | -------------------------------------------------------------------------------- /hat-trie/README.md: -------------------------------------------------------------------------------- 1 | 2 | Hat-Trie 3 | ======== 4 | 5 | [![Build Status](https://travis-ci.org/dcjones/hat-trie.svg)](https://travis-ci.org/dcjones/hat-trie) 6 | 7 | This a ANSI C99 implementation of the HAT-trie data structure of Askitis and 8 | Sinha, an extremely efficient (space and time) modern variant of tries. 9 | 10 | The version implemented here maps arrays of bytes to words (i.e., unsigned 11 | longs), which can be used to store counts, pointers, etc, or not used at all if 12 | you simply want to maintain a set of unique strings. 13 | 14 | For details see, 15 | 16 | 1. Askitis, N., & Sinha, R. (2007). HAT-trie: a cache-conscious trie-based data 17 | structure for strings. Proceedings of the thirtieth Australasian conference on 18 | Computer science-Volume 62 (pp. 97–105). Australian Computer Society, Inc. 19 | 20 | 2. Askitis, N., & Zobel, J. (2005). Cache-conscious collision resolution in 21 | string hash tables. String Processing and Information Retrieval (pp. 22 | 91–102). Springer. 23 | 24 | 25 | Installation 26 | ------------ 27 | 28 | git clone git@github.com:dcjones/hat-trie.git 29 | cd hat-trie 30 | autoreconf -i 31 | ./configure 32 | make install 33 | 34 | To use the library, include `hat-trie.h` and link using `-lhat-trie`. 35 | 36 | 37 | Tests 38 | ----- 39 | 40 | Build and run the tests: 41 | 42 | make check 43 | 44 | Other Language Bindings 45 | ----------------------- 46 | * Ruby - https://github.com/luikore/triez 47 | * Python - https://github.com/kmike/hat-trie 48 | -------------------------------------------------------------------------------- /hat-trie/src/murmurhash3.c: -------------------------------------------------------------------------------- 1 | /* This is MurmurHash3. The original C++ code was placed in the public domain 2 | * by its author, Austin Appleby. */ 3 | 4 | #include "murmurhash3.h" 5 | 6 | static inline uint32_t fmix(uint32_t h) 7 | { 8 | h ^= h >> 16; 9 | h *= 0x85ebca6b; 10 | h ^= h >> 13; 11 | h *= 0xc2b2ae35; 12 | h ^= h >> 16; 13 | 14 | return h; 15 | } 16 | 17 | 18 | static inline uint32_t rotl32(uint32_t x, int8_t r) 19 | { 20 | return (x << r) | (x >> (32 - r)); 21 | } 22 | 23 | 24 | uint32_t hash(const char* data, size_t len_) 25 | { 26 | const int len = (int) len_; 27 | const int nblocks = len / 4; 28 | 29 | uint32_t h1 = 0xc062fb4a; 30 | 31 | uint32_t c1 = 0xcc9e2d51; 32 | uint32_t c2 = 0x1b873593; 33 | 34 | //---------- 35 | // body 36 | 37 | const uint32_t * blocks = (const uint32_t*) (data + nblocks * 4); 38 | 39 | int i; 40 | for(i = -nblocks; i; i++) 41 | { 42 | uint32_t k1 = blocks[i]; 43 | 44 | k1 *= c1; 45 | k1 = rotl32(k1, 15); 46 | k1 *= c2; 47 | 48 | h1 ^= k1; 49 | h1 = rotl32(h1, 13); 50 | h1 = h1*5+0xe6546b64; 51 | } 52 | 53 | //---------- 54 | // tail 55 | 56 | const uint8_t * tail = (const uint8_t*)(data + nblocks*4); 57 | 58 | uint32_t k1 = 0; 59 | 60 | switch(len & 3) 61 | { 62 | case 3: k1 ^= tail[2] << 16; 63 | case 2: k1 ^= tail[1] << 8; 64 | case 1: k1 ^= tail[0]; 65 | k1 *= c1; k1 = rotl32(k1,15); k1 *= c2; h1 ^= k1; 66 | } 67 | 68 | //---------- 69 | // finalization 70 | 71 | h1 ^= len; 72 | 73 | h1 = fmix(h1); 74 | 75 | return h1; 76 | } 77 | 78 | -------------------------------------------------------------------------------- /src/chat_trie.pxd: -------------------------------------------------------------------------------- 1 | cdef extern from "../hat-trie/src/hat-trie.h": 2 | 3 | ctypedef int value_t 4 | ctypedef int size_t 5 | 6 | ctypedef struct hattrie_t: 7 | pass 8 | 9 | hattrie_t* hattrie_create () # Create an empty hat-trie. 10 | void hattrie_free (hattrie_t*) # Free all memory used by a trie. 11 | hattrie_t* hattrie_dup (hattrie_t*) # Duplicate an existing trie. 12 | void hattrie_clear (hattrie_t*) # Remove all entries. 13 | size_t hattrie_size (const hattrie_t*) # Number of stored keys. 14 | 15 | 16 | # Find the given key in the trie, inserting it if it does not exist, and 17 | # returning a pointer to it's key. 18 | # This pointer is not guaranteed to be valid after additional calls to 19 | # hattrie_get, hattrie_del, hattrie_clear, or other functions that 20 | # modifies the trie. 21 | value_t* hattrie_get (hattrie_t*, char* key, size_t len) 22 | 23 | # Find a given key in the table, returning a NULL pointer if it does not exist. 24 | value_t* hattrie_tryget (hattrie_t*, char* key, size_t len) 25 | 26 | ctypedef struct hattrie_iter_t: 27 | pass 28 | 29 | hattrie_iter_t* hattrie_iter_begin (hattrie_t*, bint sorted) 30 | void hattrie_iter_next (hattrie_iter_t*) 31 | bint hattrie_iter_finished (hattrie_iter_t*) 32 | void hattrie_iter_free (hattrie_iter_t*) 33 | char* hattrie_iter_key (hattrie_iter_t*, size_t* len) 34 | value_t* hattrie_iter_val (hattrie_iter_t*) 35 | 36 | cdef struct hattrie_t_: 37 | void* root 38 | size_t m # number of stored keys 39 | -------------------------------------------------------------------------------- /tests/test_inttrie.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, unicode_literals 3 | import string 4 | import random 5 | 6 | import pytest 7 | import hat_trie 8 | 9 | def test_getitem_set(): 10 | trie = hat_trie.IntTrie() 11 | trie['foo'] = 5 12 | trie['bar'] = 10 13 | assert trie['foo'] == 5 14 | assert trie['bar'] == 10 15 | 16 | with pytest.raises(KeyError): 17 | trie['f'] 18 | 19 | with pytest.raises(KeyError): 20 | trie['foob'] 21 | 22 | with pytest.raises(KeyError): 23 | trie['x'] 24 | 25 | non_ascii_key = 'вася' 26 | trie[non_ascii_key] = 20 27 | assert trie[non_ascii_key] == 20 28 | 29 | def test_get(): 30 | trie = hat_trie.IntTrie() 31 | 32 | assert trie.get('foo') == -1 33 | assert trie.get('bar') == -1 34 | assert trie.get('foo', 5) == 5 35 | 36 | trie['foo'] = 5 37 | trie['bar'] = 10 38 | 39 | assert trie.get('foo') == 5 40 | assert trie.get('bar') == 10 41 | 42 | def test_contains(): 43 | trie = hat_trie.IntTrie() 44 | assert 'foo' not in trie 45 | trie['foo'] = 5 46 | assert 'foo' in trie 47 | assert 'f' not in trie 48 | 49 | 50 | def test_get_set_fuzzy(): 51 | russian = 'абвгдеёжзиклмнопрстуфхцчъыьэюя' 52 | alphabet = russian.upper() + string.ascii_lowercase 53 | words = list(set([ 54 | "".join([random.choice(alphabet) for x in range(random.randint(2,10))]) 55 | for y in range(20000) 56 | ])) 57 | 58 | trie = hat_trie.IntTrie() 59 | 60 | enumerated_words = list(enumerate(words)) 61 | 62 | for index, word in enumerated_words: 63 | trie[word] = index 64 | 65 | random.shuffle(enumerated_words) 66 | for index, word in enumerated_words: 67 | assert word in trie, word 68 | assert trie[word] == index, (word, index) 69 | 70 | assert sorted(trie.keys()) == sorted(words) 71 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import os 3 | from setuptools import setup 4 | from distutils.extension import Extension 5 | 6 | HATTRIE_DIR = 'hat-trie/src' 7 | HATTRIE_FILE_NAMES = ['ahtable.c', 'hat-trie.c', 'misc.c', 'murmurhash3.c'] 8 | HATTRIE_FILES = [os.path.join(HATTRIE_DIR, name) for name in HATTRIE_FILE_NAMES] 9 | 10 | with open('README.rst') as file_readme: 11 | readme = file_readme.read() 12 | 13 | with open('CHANGES.rst') as file_changes: 14 | changes = file_changes.read() 15 | 16 | setup( 17 | name="hat-trie", 18 | version="0.3", 19 | description="HAT-Trie for Python", 20 | long_description = readme + "\n\n" + changes, 21 | author='Mikhail Korobov', 22 | author_email='kmike84@gmail.com', 23 | url='https://github.com/kmike/hat-trie/', 24 | 25 | ext_modules = [ 26 | Extension( 27 | "hat_trie", 28 | ['src/hat_trie.c', 'src/chat_trie.c'] + HATTRIE_FILES, 29 | include_dirs=['hat-trie/src'], 30 | extra_compile_args=["-Wno-error=declaration-after-statement"], 31 | ) 32 | ], 33 | 34 | classifiers=[ 35 | 'Development Status :: 3 - Alpha', 36 | 'Intended Audience :: Developers', 37 | 'Intended Audience :: Science/Research', 38 | 'License :: OSI Approved :: MIT License', 39 | 'Programming Language :: Cython', 40 | 'Programming Language :: Python', 41 | 'Programming Language :: Python :: 2', 42 | 'Programming Language :: Python :: 2.7', 43 | 'Programming Language :: Python :: 3', 44 | 'Programming Language :: Python :: 3.3', 45 | 'Programming Language :: Python :: 3.4', 46 | 'Programming Language :: Python :: 3.5', 47 | 'Programming Language :: Python :: Implementation :: CPython', 48 | 'Topic :: Software Development :: Libraries :: Python Modules', 49 | 'Topic :: Scientific/Engineering :: Information Analysis', 50 | 'Topic :: Text Processing :: Linguistic', 51 | ], 52 | ) 53 | -------------------------------------------------------------------------------- /hat-trie/test/bench_sorted_iter.c: -------------------------------------------------------------------------------- 1 | 2 | /* A quick test of the degree to which ordered iteration is slower than unordered. */ 3 | 4 | #include "../src/hat-trie.h" 5 | #include 6 | #include 7 | 8 | 9 | /* Simple random string generation. */ 10 | void randstr(char* x, size_t len) 11 | { 12 | x[len] = '\0'; 13 | while (len > 0) { 14 | x[--len] = '\x20' + (rand() % ('\x7e' - '\x20' + 1)); 15 | } 16 | } 17 | 18 | int main() 19 | { 20 | hattrie_t* T = hattrie_create(); 21 | const size_t n = 1000000; // how many strings 22 | const size_t m_low = 50; // minimum length of each string 23 | const size_t m_high = 500; // maximum length of each string 24 | char x[501]; 25 | 26 | size_t i, m; 27 | for (i = 0; i < n; ++i) { 28 | m = m_low + rand() % (m_high - m_low); 29 | randstr(x, m); 30 | *hattrie_get(T, x, m) = 1; 31 | } 32 | 33 | hattrie_iter_t* it; 34 | clock_t t0, t; 35 | const size_t repetitions = 100; 36 | size_t r; 37 | 38 | /* iterate in unsorted order */ 39 | fprintf(stderr, "iterating out of order ... "); 40 | t0 = clock(); 41 | for (r = 0; r < repetitions; ++r) { 42 | it = hattrie_iter_begin(T, false); 43 | while (!hattrie_iter_finished(it)) { 44 | hattrie_iter_next(it); 45 | } 46 | hattrie_iter_free(it); 47 | } 48 | t = clock(); 49 | fprintf(stderr, "finished. (%0.2f seconds)\n", (double) (t - t0) / (double) CLOCKS_PER_SEC); 50 | 51 | 52 | /* iterate in sorted order */ 53 | fprintf(stderr, "iterating in order ... "); 54 | t0 = clock(); 55 | for (r = 0; r < repetitions; ++r) { 56 | it = hattrie_iter_begin(T, true); 57 | while (!hattrie_iter_finished(it)) { 58 | hattrie_iter_next(it); 59 | } 60 | hattrie_iter_free(it); 61 | } 62 | t = clock(); 63 | fprintf(stderr, "finished. (%0.2f seconds)\n", (double) (t - t0) / (double) CLOCKS_PER_SEC); 64 | 65 | 66 | hattrie_free(T); 67 | 68 | return 0; 69 | } 70 | -------------------------------------------------------------------------------- /tests/test_floattrie.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, unicode_literals 3 | import string 4 | import random 5 | 6 | import pytest 7 | import hat_trie 8 | 9 | try: 10 | from math import isnan 11 | except: 12 | def nan(i): i != i 13 | 14 | def test_getitem_set(): 15 | trie = hat_trie.FloatTrie() 16 | trie['foo'] = 5.1 17 | trie['bar'] = 10.1 18 | assert abs(trie['foo'] - 5.1) < .001 19 | assert abs(trie['bar'] - 10.1) < .001 20 | 21 | with pytest.raises(KeyError): 22 | trie['f'] 23 | 24 | with pytest.raises(KeyError): 25 | trie['foob'] 26 | 27 | with pytest.raises(KeyError): 28 | trie['x'] 29 | 30 | non_ascii_key = 'вася' 31 | trie[non_ascii_key] = 20.1 32 | assert abs(trie[non_ascii_key] - 20.1) < .001 33 | 34 | def test_get(): 35 | trie = hat_trie.FloatTrie() 36 | 37 | assert isnan(trie.get('foo')) 38 | assert isnan(trie.get('bar')) 39 | assert abs(trie.get('foo', 5.0) - 5.0) < .001 40 | 41 | trie['foo'] = 5.5 42 | trie['bar'] = 10.1 43 | 44 | assert abs(trie.get('foo') - 5.5) < .001 45 | assert abs(trie.get('bar') - 10.1) < .001 46 | 47 | def test_contains(): 48 | trie = hat_trie.FloatTrie() 49 | assert 'foo' not in trie 50 | trie['foo'] = 5.1 51 | assert 'foo' in trie 52 | assert 'f' not in trie 53 | 54 | 55 | def test_get_set_fuzzy(): 56 | russian = 'абвгдеёжзиклмнопрстуфхцчъыьэюя' 57 | alphabet = russian.upper() + string.ascii_lowercase 58 | words = list(set([ 59 | "".join([random.choice(alphabet) for x in range(random.randint(2,10))]) 60 | for y in range(20000) 61 | ])) 62 | 63 | trie = hat_trie.FloatTrie() 64 | 65 | enumerated_words = list(enumerate(words)) 66 | 67 | for index, word in enumerated_words: 68 | trie[word] = index 69 | 70 | random.shuffle(enumerated_words) 71 | for index, word in enumerated_words: 72 | assert word in trie, word 73 | assert trie[word] == index, (word, index) 74 | 75 | assert sorted(trie.keys()) == sorted(words) 76 | -------------------------------------------------------------------------------- /tests/test_base_trie.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, unicode_literals 3 | import string 4 | import random 5 | 6 | import pytest 7 | import hat_trie 8 | 9 | def test_getitem_set(): 10 | trie = hat_trie.BaseTrie() 11 | trie[b'foo'] = 5 12 | trie[b'bar'] = 10 13 | assert trie[b'foo'] == 5 14 | assert trie[b'bar'] == 10 15 | 16 | with pytest.raises(KeyError): 17 | trie[b'f'] 18 | 19 | with pytest.raises(KeyError): 20 | trie[b'foob'] 21 | 22 | with pytest.raises(KeyError): 23 | trie[b'x'] 24 | 25 | non_ascii_key = 'вася'.encode('cp1251') 26 | trie[non_ascii_key] = 20 27 | assert trie[non_ascii_key] == 20 28 | 29 | def test_get(): 30 | trie = hat_trie.BaseTrie() 31 | 32 | assert trie.get(b'foo') == -1 33 | assert trie.get(b'bar') == -1 34 | assert trie.get(b'foo', 5) == 5 35 | 36 | trie[b'foo'] = 5 37 | trie[b'bar'] = 10 38 | 39 | assert trie.get(b'foo') == 5 40 | assert trie.get(b'bar') == 10 41 | 42 | def test_contains(): 43 | trie = hat_trie.BaseTrie() 44 | assert b'foo' not in trie 45 | trie[b'foo'] = 5 46 | assert b'foo' in trie 47 | assert b'f' not in trie 48 | 49 | def test_len(): 50 | trie = hat_trie.BaseTrie() 51 | assert len(trie) == 0 52 | trie[b'foo'] = 1 53 | assert len(trie) == 1 54 | trie[b'bar'] = 1 55 | assert len(trie) == 2 56 | trie[b'f'] = 1 57 | assert len(trie) == 3 58 | 59 | def test_setdefault(): 60 | trie = hat_trie.BaseTrie() 61 | 62 | with pytest.raises(KeyError): 63 | trie[b'foo'] 64 | 65 | trie.setdefault(b'foo', 1) 66 | assert trie[b'foo'] == 1 67 | trie.setdefault(b'foo', 5) 68 | assert trie[b'foo'] == 1 69 | 70 | 71 | 72 | @pytest.mark.parametrize(("encoding",), [['cp1251'], ['utf8']]) 73 | def test_get_set_fuzzy(encoding): 74 | russian = 'абвгдеёжзиклмнопрстуфхцчъыьэюя' 75 | alphabet = string.ascii_lowercase + russian.upper() 76 | words = list(set([ 77 | "".join([random.choice(alphabet) for x in range(random.randint(2,10))]) 78 | for y in range(20000) 79 | ])) 80 | 81 | words = [w.encode(encoding) for w in words] 82 | 83 | trie = hat_trie.BaseTrie() 84 | 85 | enumerated_words = list(enumerate(words)) 86 | 87 | for index, word in enumerated_words: 88 | trie[word] = index 89 | 90 | random.shuffle(enumerated_words) 91 | for index, word in enumerated_words: 92 | assert word in trie, word 93 | assert trie[word] == index, (word, index) 94 | 95 | assert sorted(trie.keys()) == sorted(words) 96 | -------------------------------------------------------------------------------- /hat-trie/src/hat-trie.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of hat-trie 3 | * 4 | * Copyright (c) 2011 by Daniel C. Jones 5 | * 6 | * 7 | * This is an implementation of the HAT-trie data structure described in, 8 | * 9 | * Askitis, N., & Sinha, R. (2007). HAT-trie: a cache-conscious trie-based data 10 | * structure for strings. Proceedings of the thirtieth Australasian conference on 11 | * Computer science-Volume 62 (pp. 97–105). Australian Computer Society, Inc. 12 | * 13 | * The HAT-trie is in essence a hybrid data structure, combining tries and hash 14 | * tables in a clever way to try to get the best of both worlds. 15 | * 16 | */ 17 | 18 | #ifndef HATTRIE_HATTRIE_H 19 | #define HATTRIE_HATTRIE_H 20 | 21 | #ifdef __cplusplus 22 | extern "C" { 23 | #endif 24 | 25 | #include "common.h" 26 | #include 27 | #include 28 | 29 | typedef struct hattrie_t_ hattrie_t; 30 | 31 | hattrie_t* hattrie_create (void); // Create an empty hat-trie. 32 | void hattrie_free (hattrie_t*); // Free all memory used by a trie. 33 | hattrie_t* hattrie_dup (const hattrie_t*); // Duplicate an existing trie. 34 | void hattrie_clear (hattrie_t*); // Remove all entries. 35 | size_t hattrie_size (const hattrie_t*); // Number of stored keys. 36 | size_t hattrie_sizeof (const hattrie_t*); // Memory used in structure in bytes. 37 | 38 | 39 | /** Find the given key in the trie, inserting it if it does not exist, and 40 | * returning a pointer to it's key. 41 | * 42 | * This pointer is not guaranteed to be valid after additional calls to 43 | * hattrie_get, hattrie_del, hattrie_clear, or other functions that modifies the 44 | * trie. 45 | */ 46 | value_t* hattrie_get (hattrie_t*, const char* key, size_t len); 47 | 48 | 49 | /** Find a given key in the table, returning a NULL pointer if it does not 50 | * exist. */ 51 | value_t* hattrie_tryget (hattrie_t*, const char* key, size_t len); 52 | 53 | /** Delete a given key from trie. Returns 0 if successful or -1 if not found. 54 | */ 55 | int hattrie_del(hattrie_t* T, const char* key, size_t len); 56 | 57 | typedef struct hattrie_iter_t_ hattrie_iter_t; 58 | 59 | hattrie_iter_t* hattrie_iter_begin (const hattrie_t*, bool sorted); 60 | void hattrie_iter_next (hattrie_iter_t*); 61 | bool hattrie_iter_finished (hattrie_iter_t*); 62 | void hattrie_iter_free (hattrie_iter_t*); 63 | const char* hattrie_iter_key (hattrie_iter_t*, size_t* len); 64 | value_t* hattrie_iter_val (hattrie_iter_t*); 65 | 66 | /* Return true if two iterators are equal. */ 67 | bool hattrie_iter_equal (const hattrie_iter_t* a, 68 | const hattrie_iter_t* b); 69 | 70 | #ifdef __cplusplus 71 | } 72 | #endif 73 | 74 | #endif 75 | -------------------------------------------------------------------------------- /tests/test_trie.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import, unicode_literals 3 | import string 4 | import random 5 | 6 | import pytest 7 | import hat_trie 8 | 9 | def test_getitem_set(): 10 | trie = hat_trie.Trie() 11 | trie['foo'] = 5 12 | trie['bar'] = 'asdf' 13 | trie['baz'] = (10, 'quuz') 14 | assert trie['foo'] == 5 15 | assert trie['bar'] == 'asdf' 16 | assert trie['baz'] == (10, 'quuz') 17 | 18 | with pytest.raises(KeyError): 19 | trie['f'] 20 | 21 | with pytest.raises(KeyError): 22 | trie['foob'] 23 | 24 | with pytest.raises(KeyError): 25 | trie['x'] 26 | 27 | non_ascii_key = 'вася' 28 | trie[non_ascii_key] = 20 29 | assert trie[non_ascii_key] == 20 30 | 31 | def test_get(): 32 | trie = hat_trie.Trie() 33 | 34 | assert trie.get('foo') is None 35 | assert trie.get('bar') is None 36 | assert trie.get('foo', 5) == 5 37 | 38 | trie['foo'] = 5 39 | trie['bar'] = 10 40 | 41 | assert trie.get('foo') == 5 42 | assert trie.get('bar') == 10 43 | 44 | def test_contains(): 45 | trie = hat_trie.Trie() 46 | assert 'foo' not in trie 47 | trie['foo'] = 5 48 | assert 'foo' in trie 49 | assert 'f' not in trie 50 | 51 | def test_iterkeys(): 52 | trie = hat_trie.Trie() 53 | 54 | non_ascii_key = 'вася' 55 | trie[non_ascii_key] = 20 56 | 57 | assert next(trie.iterkeys()) == non_ascii_key 58 | 59 | def test_get_set_fuzzy(): 60 | russian = 'абвгдеёжзиклмнопрстуфхцчъыьэюя' 61 | alphabet = russian.upper() + string.ascii_lowercase 62 | words = list(set([ 63 | "".join([random.choice(alphabet) for x in range(random.randint(2,10))]) 64 | for y in range(20000) 65 | ])) 66 | 67 | trie = hat_trie.Trie() 68 | 69 | enumerated_words = list(enumerate(words)) 70 | 71 | for index, word in enumerated_words: 72 | trie[word] = index 73 | 74 | random.shuffle(enumerated_words) 75 | for index, word in enumerated_words: 76 | assert word in trie, word 77 | assert trie[word] == index, (word, index) 78 | 79 | assert sorted(trie.keys()) == sorted(words) 80 | 81 | def test_leak(): 82 | import sys 83 | 84 | values = list(string.ascii_lowercase) 85 | # Using "list(map())" to avoid the list comprehension variable 86 | # which increases the reference count. 87 | counts = list(map(sys.getrefcount, values)) 88 | 89 | trie = hat_trie.Trie() 90 | 91 | for v in values: 92 | trie['foo'] = v 93 | # Python's for loop variables leak scope into the function body 94 | del v 95 | 96 | count = sys.getrefcount(trie['foo']) 97 | for i in range(10): 98 | current_count = sys.getrefcount(trie['foo']) 99 | assert current_count == count 100 | 101 | count0 = sys.getrefcount(values[0]) 102 | count_last = sys.getrefcount(values[-1]) 103 | assert count0 == counts[0] 104 | assert count_last == counts[-1] + 1 105 | 106 | del trie 107 | 108 | after = list(map(sys.getrefcount, values)) 109 | assert after == counts 110 | -------------------------------------------------------------------------------- /hat-trie/src/ahtable.h: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of hat-trie. 3 | * 4 | * Copyright (c) 2011 by Daniel C. Jones 5 | * 6 | * 7 | * This is an implementation of the 'cache-conscious' hash tables described in, 8 | * 9 | * Askitis, N., & Zobel, J. (2005). Cache-conscious collision resolution in 10 | * string hash tables. String Processing and Information Retrieval (pp. 11 | * 91–102). Springer. 12 | * 13 | * http://naskitis.com/naskitis-spire05.pdf 14 | * 15 | * Briefly, the idea behind an Array Hash Table is, as opposed to separate 16 | * chaining with linked lists, to store keys contiguously in one big array, 17 | * thereby improving the caching behavior, and reducing space requirements. 18 | * 19 | * ahtable keeps a fixed number (array) of slots, each of which contains a 20 | * variable number of key/value pairs. Each key is preceded by its length-- 21 | * one byte for lengths < 128 bytes, and TWO bytes for longer keys. The least 22 | * significant bit of the first byte indicates, if set, that the size is two 23 | * bytes. The slot number where a key/value pair goes is determined by finding 24 | * the murmurhashed integer value of its key, modulus the number of slots. 25 | * The number of slots expands in a stepwise fashion when the number of 26 | # key/value pairs reaches an arbitrarily large number. 27 | * 28 | * +-------+-------+-------+-------+-------+-------+ 29 | * | 0 | 1 | 2 | 3 | ... | N | 30 | * +-------+-------+-------+-------+-------+-------+ 31 | * | | | | | 32 | * v | | v v 33 | * NULL | | 4html[VALUE] etc. 34 | * | v 35 | * | 5space[VALUE]4jury[VALUE] 36 | * v 37 | * 6justice[VALUE]3car[VALUE]4star[VALUE] 38 | * 39 | */ 40 | 41 | #ifndef HATTRIE_AHTABLE_H 42 | #define HATTRIE_AHTABLE_H 43 | 44 | #ifdef __cplusplus 45 | extern "C" { 46 | #endif 47 | 48 | #include 49 | #include 50 | #include "pstdint.h" 51 | #include "common.h" 52 | 53 | typedef unsigned char* slot_t; 54 | 55 | typedef struct ahtable_t_ 56 | { 57 | /* these fields are reserved for hattrie to fiddle with */ 58 | uint8_t flag; 59 | unsigned char c0; 60 | unsigned char c1; 61 | 62 | size_t n; // number of slots 63 | size_t m; // number of key/value pairs stored 64 | size_t max_m; // number of stored keys before we resize 65 | 66 | size_t* slot_sizes; 67 | slot_t* slots; 68 | } ahtable_t; 69 | 70 | extern const double ahtable_max_load_factor; 71 | extern const size_t ahtable_initial_size; 72 | 73 | ahtable_t* ahtable_create (void); // Create an empty hash table. 74 | ahtable_t* ahtable_create_n (size_t n); // Create an empty hash table, with 75 | // n slots reserved. 76 | 77 | void ahtable_free (ahtable_t*); // Free all memory used by a table. 78 | void ahtable_clear (ahtable_t*); // Remove all entries. 79 | size_t ahtable_size (const ahtable_t*); // Number of stored keys. 80 | size_t ahtable_sizeof (const ahtable_t*); // Memory used by the table in bytes. 81 | 82 | 83 | /** Find the given key in the table, inserting it if it does not exist, and 84 | * returning a pointer to it's value. 85 | * 86 | * This pointer is not guaranteed to be valid after additional calls to 87 | * ahtable_get, ahtable_del, ahtable_clear, or other functions that modify the 88 | * table. 89 | */ 90 | value_t* ahtable_get (ahtable_t*, const char* key, size_t len); 91 | 92 | 93 | /* Find a given key in the table, return a NULL pointer if it does not exist. */ 94 | value_t* ahtable_tryget (ahtable_t*, const char* key, size_t len); 95 | 96 | 97 | int ahtable_del(ahtable_t*, const char* key, size_t len); 98 | 99 | 100 | typedef struct ahtable_iter_t_ ahtable_iter_t; 101 | 102 | ahtable_iter_t* ahtable_iter_begin (const ahtable_t*, bool sorted); 103 | void ahtable_iter_next (ahtable_iter_t*); 104 | bool ahtable_iter_finished (ahtable_iter_t*); 105 | void ahtable_iter_free (ahtable_iter_t*); 106 | const char* ahtable_iter_key (ahtable_iter_t*, size_t* len); 107 | value_t* ahtable_iter_val (ahtable_iter_t*); 108 | 109 | 110 | #ifdef __cplusplus 111 | } 112 | #endif 113 | 114 | #endif 115 | 116 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | hat-trie 2 | ======== 3 | 4 | HAT-Trie structure for Python (2.x and 3.x). 5 | 6 | This package is a Python wrapper for `hat-trie`_ C library. 7 | 8 | .. image:: https://travis-ci.org/kmike/hat-trie.svg?branch=master 9 | :target: https://travis-ci.org/kmike/hat-trie 10 | 11 | .. _hat-trie: https://github.com/dcjones/hat-trie 12 | 13 | Installation 14 | ============ 15 | 16 | :: 17 | 18 | pip install hat-trie 19 | 20 | Usage 21 | ===== 22 | 23 | Create a new trie:: 24 | 25 | >>> from hat_trie import Trie 26 | >>> trie = Trie() 27 | 28 | ``trie`` variable is a dict-like object that support unicode 29 | keys and can have any Python object as a value. For keys that share prefixes 30 | it usually uses less memory than Python dict. 31 | 32 | There is also ``hat_trie.IntTrie`` which only supports positive 33 | integers as values. It can be more efficient when you don't need 34 | arbitrary objects as values. For example, if you need to store float 35 | values then storing them in an array (either numpy or stdlib's ``array.array``) 36 | and using IntTrie values as indices could be more memory efficient 37 | than storing Python float objects directly in ``hat_trie.Trie``. 38 | 39 | Another way to store float values is to use hat_trie.FloatTrie(). 40 | In this case precision is limited to float32. 41 | 42 | Currently implemented methods are: 43 | 44 | * __getitem__() 45 | * __setitem__() 46 | * __contains__() 47 | * __len__() 48 | * get() 49 | * setdefault() 50 | * keys() 51 | * iterkeys() 52 | 53 | Other methods are not implemented - contributions are welcome! 54 | 55 | 56 | Performance 57 | =========== 58 | 59 | Performance is measured for ``hat_trie.Trie`` against Python's dict with 60 | 100k unique unicode words (English and Russian) as keys and '1' numbers 61 | as values. 62 | 63 | Benchmark results for Python 3.3 (intel i5 1.8GHz, 64 | "1.000M ops/sec" == "1 000 000 operations per second"):: 65 | 66 | dict __getitem__ (hits) 6.874M ops/sec 67 | trie __getitem__ (hits) 3.754M ops/sec 68 | dict __contains__ (hits) 7.035M ops/sec 69 | trie __contains__ (hits) 3.772M ops/sec 70 | dict __contains__ (misses) 5.356M ops/sec 71 | trie __contains__ (misses) 3.364M ops/sec 72 | dict __len__ 785958.286 ops/sec 73 | trie __len__ 574164.704 ops/sec 74 | dict __setitem__ (updates) 6.830M ops/sec 75 | trie __setitem__ (updates) 3.472M ops/sec 76 | dict __setitem__ (inserts) 6.774M ops/sec 77 | trie __setitem__ (inserts) 2.460M ops/sec 78 | dict setdefault (updates) 3.522M ops/sec 79 | trie setdefault (updates) 2.680M ops/sec 80 | dict setdefault (inserts) 4.062M ops/sec 81 | trie setdefault (inserts) 1.866M ops/sec 82 | dict keys() 189.564 ops/sec 83 | trie keys() 16.067 ops/sec 84 | 85 | 86 | HAT-Trie is about 1.5x faster that `datrie`_ on all supported operations; 87 | it also supports fast inserts unlike datrie. On the other hand, 88 | datrie has more features (e.g. better iteration support and richer API); 89 | datrie is also more memory efficient. 90 | 91 | If you need a memory efficient data structure and don't need inserts 92 | then marisa-trie_ or DAWG_ should work better. 93 | 94 | .. _datrie: https://github.com/kmike/datrie 95 | .. _marisa-trie: https://github.com/kmike/marisa-trie 96 | .. _DAWG: https://github.com/kmike/DAWG 97 | 98 | Contributing 99 | ============ 100 | 101 | Development happens at github: 102 | 103 | * https://github.com/kmike/hat-trie 104 | 105 | Feel free to submit ideas, bugs, pull requests or regular patches. 106 | 107 | Please don't commit changes to generated C files; I will rebuild them myself. 108 | 109 | Running tests and benchmarks 110 | ---------------------------- 111 | 112 | Make sure `tox`_ is installed and run 113 | 114 | :: 115 | 116 | $ ./update_c.sh 117 | $ tox 118 | 119 | from the source checkout. You will need Cython_ to do that. 120 | 121 | Tests should pass under python 2.7 and 3.3+. 122 | 123 | :: 124 | 125 | $ tox -c bench.ini 126 | 127 | runs benchmarks. 128 | 129 | .. _Cython: http://cython.org 130 | .. _tox: http://tox.testrun.org 131 | 132 | Authors & Contributors 133 | ---------------------- 134 | 135 | * Mikhail Korobov 136 | * Brandon Forehand 137 | * https://github.com/yflau 138 | * Michael Heilman 139 | * Michael Phan-Ba @mikepb 140 | 141 | This module wraps `hat-trie`_ C library by Daniel Jones & contributors. 142 | 143 | License 144 | ======= 145 | 146 | Licensed under MIT License. 147 | -------------------------------------------------------------------------------- /hat-trie/test/check_ahtable.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | 6 | #include "str_map.h" 7 | #include "../src/ahtable.h" 8 | 9 | /* Simple random string generation. */ 10 | void randstr(char* x, size_t len) 11 | { 12 | x[len] = '\0'; 13 | while (len > 0) { 14 | x[--len] = '\x20' + (rand() % ('\x7e' - '\x20' + 1)); 15 | } 16 | } 17 | 18 | 19 | const size_t n = 100000; // how many unique strings 20 | const size_t m_low = 50; // minimum length of each string 21 | const size_t m_high = 500; // maximum length of each string 22 | const size_t k = 200000; // number of insertions 23 | char** xs; 24 | 25 | ahtable_t* T; 26 | str_map* M; 27 | 28 | 29 | void setup() 30 | { 31 | fprintf(stderr, "generating %zu keys ... ", n); 32 | xs = malloc(n * sizeof(char*)); 33 | size_t i; 34 | size_t m; 35 | for (i = 0; i < n; ++i) { 36 | m = m_low + rand() % (m_high - m_low); 37 | xs[i] = malloc(m + 1); 38 | randstr(xs[i], m); 39 | } 40 | 41 | T = ahtable_create(); 42 | M = str_map_create(); 43 | fprintf(stderr, "done.\n"); 44 | } 45 | 46 | 47 | void teardown() 48 | { 49 | ahtable_free(T); 50 | str_map_destroy(M); 51 | 52 | size_t i; 53 | for (i = 0; i < n; ++i) { 54 | free(xs[i]); 55 | } 56 | free(xs); 57 | } 58 | 59 | 60 | void test_ahtable_insert() 61 | { 62 | fprintf(stderr, "inserting %zu keys ... \n", k); 63 | 64 | size_t i, j; 65 | value_t* u; 66 | value_t v; 67 | 68 | for (j = 0; j < k; ++j) { 69 | i = rand() % n; 70 | 71 | 72 | v = 1 + str_map_get(M, xs[i], strlen(xs[i])); 73 | str_map_set(M, xs[i], strlen(xs[i]), v); 74 | 75 | 76 | u = ahtable_get(T, xs[i], strlen(xs[i])); 77 | *u += 1; 78 | 79 | 80 | if (*u != v) { 81 | fprintf(stderr, "[error] tally mismatch (reported: %lu, correct: %lu)\n", 82 | *u, v); 83 | } 84 | } 85 | 86 | fprintf(stderr, "sizeof: %zu\n", ahtable_sizeof(T)); 87 | 88 | /* delete some keys */ 89 | for (j = 0; i < k/100; ++j) { 90 | i = rand() % n; 91 | ahtable_del(T, xs[i], strlen(xs[i])); 92 | str_map_del(M, xs[i], strlen(xs[i])); 93 | u = ahtable_tryget(T, xs[i], strlen(xs[i])); 94 | if (u) { 95 | fprintf(stderr, "[error] deleted node found in ahtable\n"); 96 | } 97 | } 98 | 99 | fprintf(stderr, "done.\n"); 100 | } 101 | 102 | 103 | void test_ahtable_iteration() 104 | { 105 | fprintf(stderr, "iterating through %zu keys ... \n", k); 106 | 107 | ahtable_iter_t* i = ahtable_iter_begin(T, false); 108 | 109 | size_t count = 0; 110 | value_t* u; 111 | value_t v; 112 | 113 | size_t len; 114 | const char* key; 115 | 116 | while (!ahtable_iter_finished(i)) { 117 | ++count; 118 | 119 | key = ahtable_iter_key(i, &len); 120 | u = ahtable_iter_val(i); 121 | v = str_map_get(M, key, len); 122 | 123 | if (*u != v) { 124 | if (v == 0) { 125 | fprintf(stderr, "[error] incorrect iteration (%lu, %lu)\n", *u, v); 126 | } 127 | else { 128 | fprintf(stderr, "[error] incorrect iteration tally (%lu, %lu)\n", *u, v); 129 | } 130 | } 131 | 132 | // this way we will see an error if the same key is iterated through 133 | // twice 134 | str_map_set(M, key, len, 0); 135 | 136 | ahtable_iter_next(i); 137 | } 138 | 139 | if (count != M->m) { 140 | fprintf(stderr, "[error] iterated through %zu element, expected %zu\n", 141 | count, M->m); 142 | } 143 | 144 | ahtable_iter_free(i); 145 | 146 | fprintf(stderr, "done.\n"); 147 | } 148 | 149 | 150 | int cmpkey(const char* a, size_t ka, const char* b, size_t kb) 151 | { 152 | int c = memcmp(a, b, ka < kb ? ka : kb); 153 | return c == 0 ? (int) ka - (int) kb : c; 154 | } 155 | 156 | 157 | void test_ahtable_sorted_iteration() 158 | { 159 | fprintf(stderr, "iterating in order through %zu keys ... \n", k); 160 | 161 | ahtable_iter_t* i = ahtable_iter_begin(T, true); 162 | 163 | size_t count = 0; 164 | value_t* u; 165 | value_t v; 166 | 167 | char* prev_key = malloc(m_high + 1); 168 | size_t prev_len = 0; 169 | 170 | const char *key = NULL; 171 | size_t len = 0; 172 | 173 | while (!ahtable_iter_finished(i)) { 174 | memcpy(prev_key, key, len); 175 | prev_len = len; 176 | ++count; 177 | 178 | key = ahtable_iter_key(i, &len); 179 | if (prev_key != NULL && cmpkey(prev_key, prev_len, key, len) > 0) { 180 | fprintf(stderr, "[error] iteration is not correctly ordered.\n"); 181 | } 182 | 183 | u = ahtable_iter_val(i); 184 | v = str_map_get(M, key, len); 185 | 186 | if (*u != v) { 187 | if (v == 0) { 188 | fprintf(stderr, "[error] incorrect iteration (%lu, %lu)\n", *u, v); 189 | } 190 | else { 191 | fprintf(stderr, "[error] incorrect iteration tally (%lu, %lu)\n", *u, v); 192 | } 193 | } 194 | 195 | // this way we will see an error if the same key is iterated through 196 | // twice 197 | str_map_set(M, key, len, 0); 198 | 199 | ahtable_iter_next(i); 200 | } 201 | 202 | ahtable_iter_free(i); 203 | free(prev_key); 204 | 205 | fprintf(stderr, "done.\n"); 206 | } 207 | 208 | 209 | int main() 210 | { 211 | setup(); 212 | test_ahtable_insert(); 213 | test_ahtable_iteration(); 214 | teardown(); 215 | 216 | setup(); 217 | test_ahtable_insert(); 218 | test_ahtable_sorted_iteration(); 219 | teardown(); 220 | 221 | return 0; 222 | } 223 | -------------------------------------------------------------------------------- /hat-trie/test/str_map.c: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * This file is part of fastq-tools. 4 | * 5 | * Copyright (c) 2011 by Daniel C. Jones 6 | * 7 | */ 8 | 9 | 10 | #include "str_map.h" 11 | #include "misc.h" 12 | #include 13 | #include 14 | #include 15 | 16 | 17 | static const size_t INITIAL_TABLE_SIZE = 16; 18 | static const double MAX_LOAD = 0.77; 19 | 20 | 21 | /* 22 | * Paul Hsieh's SuperFastHash 23 | * http://www.azillionmonkeys.com/qed/hash.html 24 | */ 25 | 26 | 27 | #undef get16bits 28 | #if (defined(__GNUC__) && defined(__i386__)) || defined(__WATCOMC__) \ 29 | || defined(_MSC_VER) || defined (__BORLANDC__) || defined (__TURBOC__) 30 | #define get16bits(d) (*((const uint16_t *) (d))) 31 | #endif 32 | 33 | #if !defined (get16bits) 34 | #define get16bits(d) ((((uint32_t)(((const uint8_t *)(d))[1])) << 8)\ 35 | +(uint32_t)(((const uint8_t *)(d))[0]) ) 36 | #endif 37 | 38 | static uint32_t hash(const char * data, size_t len) { 39 | uint32_t hash = len, tmp; 40 | int rem; 41 | 42 | if (len <= 0 || data == NULL) return 0; 43 | 44 | rem = len & 3; 45 | len >>= 2; 46 | 47 | /* Main loop */ 48 | for (;len > 0; len--) { 49 | hash += get16bits (data); 50 | tmp = (get16bits (data+2) << 11) ^ hash; 51 | hash = (hash << 16) ^ tmp; 52 | data += 2*sizeof (uint16_t); 53 | hash += hash >> 11; 54 | } 55 | 56 | /* Handle end cases */ 57 | switch (rem) { 58 | case 3: hash += get16bits (data); 59 | hash ^= hash << 16; 60 | hash ^= data[sizeof (uint16_t)] << 18; 61 | hash += hash >> 11; 62 | break; 63 | case 2: hash += get16bits (data); 64 | hash ^= hash << 11; 65 | hash += hash >> 17; 66 | break; 67 | case 1: hash += *data; 68 | hash ^= hash << 10; 69 | hash += hash >> 1; 70 | } 71 | 72 | /* Force "avalanching" of final 127 bits */ 73 | hash ^= hash << 3; 74 | hash += hash >> 5; 75 | hash ^= hash << 4; 76 | hash += hash >> 17; 77 | hash ^= hash << 25; 78 | hash += hash >> 6; 79 | 80 | return hash; 81 | } 82 | 83 | 84 | 85 | static void rehash(str_map* T, size_t new_n); 86 | static void clear(str_map*); 87 | 88 | 89 | 90 | str_map* str_map_create() 91 | { 92 | str_map* T = malloc_or_die(sizeof(str_map)); 93 | T->A = malloc_or_die(INITIAL_TABLE_SIZE * sizeof(str_map_pair*)); 94 | memset(T->A, 0, INITIAL_TABLE_SIZE * sizeof(str_map_pair*)); 95 | T->n = INITIAL_TABLE_SIZE; 96 | T->m = 0; 97 | T->max_m = T->n * MAX_LOAD; 98 | 99 | return T; 100 | } 101 | 102 | 103 | void str_map_destroy(str_map* T) 104 | { 105 | if (T != NULL) { 106 | clear(T); 107 | free(T->A); 108 | free(T); 109 | } 110 | } 111 | 112 | 113 | 114 | void clear(str_map* T) 115 | { 116 | str_map_pair* u; 117 | size_t i; 118 | for (i = 0; i < T->n; i++) { 119 | while (T->A[i]) { 120 | u = T->A[i]->next; 121 | free(T->A[i]->key); 122 | free(T->A[i]); 123 | T->A[i] = u; 124 | } 125 | } 126 | 127 | T->m = 0; 128 | } 129 | 130 | 131 | static void insert_without_copy(str_map* T, str_map_pair* V) 132 | { 133 | uint32_t h = hash(V->key, V->keylen) % T->n; 134 | V->next = T->A[h]; 135 | T->A[h] = V; 136 | T->m++; 137 | } 138 | 139 | 140 | 141 | static void rehash(str_map* T, size_t new_n) 142 | { 143 | str_map U; 144 | U.n = new_n; 145 | U.m = 0; 146 | U.max_m = U.n * MAX_LOAD; 147 | U.A = malloc_or_die(U.n * sizeof(str_map_pair*)); 148 | memset(U.A, 0, U.n * sizeof(str_map_pair*)); 149 | 150 | str_map_pair *j, *k; 151 | size_t i; 152 | for (i = 0; i < T->n; i++) { 153 | j = T->A[i]; 154 | while (j) { 155 | k = j->next; 156 | insert_without_copy(&U, j); 157 | j = k; 158 | } 159 | T->A[i] = NULL; 160 | } 161 | 162 | free(T->A); 163 | T->A = U.A; 164 | T->n = U.n; 165 | T->max_m = U.max_m; 166 | } 167 | 168 | 169 | void str_map_set(str_map* T, const char* key, size_t keylen, value_t value) 170 | { 171 | if (T->m >= T->max_m) rehash(T, T->n * 2); 172 | 173 | uint32_t h = hash(key, keylen) % T->n; 174 | 175 | str_map_pair* u = T->A[h]; 176 | 177 | while (u) { 178 | if (u->keylen == keylen && memcmp(u->key, key, keylen) == 0) { 179 | u->value = value; 180 | return; 181 | } 182 | 183 | u = u->next; 184 | } 185 | 186 | u = malloc_or_die(sizeof(str_map_pair)); 187 | u->key = malloc_or_die(keylen); 188 | memcpy(u->key, key, keylen); 189 | u->keylen = keylen; 190 | u->value = value; 191 | 192 | u->next = T->A[h]; 193 | T->A[h] = u; 194 | 195 | T->m++; 196 | } 197 | 198 | 199 | value_t str_map_get(const str_map* T, const char* key, size_t keylen) 200 | { 201 | uint32_t h = hash(key, keylen) % T->n; 202 | 203 | str_map_pair* u = T->A[h]; 204 | 205 | while (u) { 206 | if (u->keylen == keylen && memcmp(u->key, key, keylen) == 0) { 207 | return u->value; 208 | } 209 | 210 | u = u->next; 211 | } 212 | 213 | return 0; 214 | } 215 | 216 | void str_map_del(str_map* T, const char* key, size_t keylen) 217 | { 218 | uint32_t h = hash(key, keylen) % T->n; 219 | 220 | str_map_pair* u = T->A[h]; 221 | str_map_pair* p = NULL; 222 | while (u) { 223 | 224 | if (u->keylen == keylen && memcmp(u->key, key, keylen) == 0) { 225 | if (p) { 226 | p->next = u->next; 227 | } else { 228 | T->A[h] = u->next; 229 | } 230 | free(u->key); 231 | free(u); 232 | --T->m; 233 | return; 234 | } 235 | 236 | p = u; 237 | u = u->next; 238 | } 239 | 240 | } 241 | 242 | -------------------------------------------------------------------------------- /hat-trie/test/check_hattrie.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | 6 | #include "str_map.h" 7 | #include "../src/hat-trie.h" 8 | 9 | /* Simple random string generation. */ 10 | void randstr(char* x, size_t len) 11 | { 12 | x[len] = '\0'; 13 | while (len > 0) { 14 | x[--len] = '\x20' + (rand() % ('\x7e' - '\x20' + 1)); 15 | } 16 | } 17 | 18 | const size_t n = 100000; // how many unique strings 19 | const size_t m_low = 50; // minimum length of each string 20 | const size_t m_high = 500; // maximum length of each string 21 | const size_t k = 200000; // number of insertions 22 | const size_t d = 50000; 23 | 24 | char** xs; 25 | char** ds; 26 | 27 | hattrie_t* T; 28 | str_map* M; 29 | 30 | 31 | void setup() 32 | { 33 | fprintf(stderr, "generating %zu keys ... ", n); 34 | xs = malloc(n * sizeof(char*)); 35 | ds = malloc(d * sizeof(char*)); 36 | size_t i; 37 | size_t m; 38 | for (i = 0; i < n; ++i) { 39 | m = m_low + rand() % (m_high - m_low); 40 | xs[i] = malloc(m + 1); 41 | randstr(xs[i], m); 42 | } 43 | for (i = 0; i < d; ++i) { 44 | m = rand()%n; 45 | ds[i] = xs[m]; 46 | } 47 | 48 | T = hattrie_create(); 49 | M = str_map_create(); 50 | fprintf(stderr, "done.\n"); 51 | } 52 | 53 | 54 | void teardown() 55 | { 56 | hattrie_free(T); 57 | str_map_destroy(M); 58 | 59 | size_t i; 60 | for (i = 0; i < n; ++i) { 61 | free(xs[i]); 62 | } 63 | free(xs); 64 | free(ds); 65 | } 66 | 67 | 68 | void test_hattrie_insert() 69 | { 70 | fprintf(stderr, "inserting %zu keys ... \n", k); 71 | 72 | size_t i, j; 73 | value_t* u; 74 | value_t v; 75 | 76 | for (j = 0; j < k; ++j) { 77 | i = rand() % n; 78 | 79 | 80 | v = 1 + str_map_get(M, xs[i], strlen(xs[i])); 81 | str_map_set(M, xs[i], strlen(xs[i]), v); 82 | 83 | 84 | u = hattrie_get(T, xs[i], strlen(xs[i])); 85 | *u += 1; 86 | 87 | 88 | if (*u != v) { 89 | fprintf(stderr, "[error] tally mismatch (reported: %lu, correct: %lu)\n", 90 | *u, v); 91 | } 92 | } 93 | 94 | fprintf(stderr, "sizeof: %zu\n", hattrie_sizeof(T)); 95 | 96 | fprintf(stderr, "deleting %zu keys ... \n", d); 97 | for (j = 0; j < d; ++j) { 98 | str_map_del(M, ds[j], strlen(ds[j])); 99 | hattrie_del(T, ds[j], strlen(ds[j])); 100 | u = hattrie_tryget(T, ds[j], strlen(ds[j])); 101 | if (u) { 102 | fprintf(stderr, "[error] item %zu still found in trie after delete\n", 103 | j); 104 | } 105 | } 106 | 107 | fprintf(stderr, "done.\n"); 108 | } 109 | 110 | 111 | 112 | void test_hattrie_iteration() 113 | { 114 | fprintf(stderr, "iterating through %zu keys ... \n", k); 115 | 116 | hattrie_iter_t* i = hattrie_iter_begin(T, false); 117 | 118 | size_t count = 0; 119 | value_t* u; 120 | value_t v; 121 | 122 | size_t len; 123 | const char* key; 124 | 125 | while (!hattrie_iter_finished(i)) { 126 | ++count; 127 | 128 | key = hattrie_iter_key(i, &len); 129 | u = hattrie_iter_val(i); 130 | 131 | v = str_map_get(M, key, len); 132 | 133 | if (*u != v) { 134 | if (v == 0) { 135 | fprintf(stderr, "[error] incorrect iteration (%lu, %lu)\n", *u, v); 136 | } 137 | else { 138 | fprintf(stderr, "[error] incorrect iteration tally (%lu, %lu)\n", *u, v); 139 | } 140 | } 141 | 142 | // this way we will see an error if the same key is iterated through 143 | // twice 144 | str_map_set(M, key, len, 0); 145 | 146 | hattrie_iter_next(i); 147 | } 148 | 149 | if (count != M->m) { 150 | fprintf(stderr, "[error] iterated through %zu element, expected %zu\n", 151 | count, M->m); 152 | } 153 | 154 | hattrie_iter_free(i); 155 | 156 | fprintf(stderr, "done.\n"); 157 | } 158 | 159 | 160 | int cmpkey(const char* a, size_t ka, const char* b, size_t kb) 161 | { 162 | int c = memcmp(a, b, ka < kb ? ka : kb); 163 | return c == 0 ? (int) ka - (int) kb : c; 164 | } 165 | 166 | 167 | void test_hattrie_sorted_iteration() 168 | { 169 | fprintf(stderr, "iterating in order through %zu keys ... \n", k); 170 | 171 | hattrie_iter_t* i = hattrie_iter_begin(T, true); 172 | 173 | size_t count = 0; 174 | value_t* u; 175 | value_t v; 176 | 177 | char* key_copy = malloc(m_high + 1); 178 | char* prev_key = malloc(m_high + 1); 179 | memset(prev_key, 0, m_high + 1); 180 | size_t prev_len = 0; 181 | 182 | const char *key = NULL; 183 | size_t len = 0; 184 | 185 | while (!hattrie_iter_finished(i)) { 186 | memcpy(prev_key, key_copy, len); 187 | prev_key[len] = '\0'; 188 | prev_len = len; 189 | ++count; 190 | 191 | key = hattrie_iter_key(i, &len); 192 | 193 | /* memory for key may be changed on iter, copy it */ 194 | strncpy(key_copy, key, len); 195 | 196 | if (prev_key != NULL && cmpkey(prev_key, prev_len, key, len) > 0) { 197 | fprintf(stderr, "[error] iteration is not correctly ordered.\n"); 198 | } 199 | 200 | u = hattrie_iter_val(i); 201 | v = str_map_get(M, key, len); 202 | 203 | if (*u != v) { 204 | if (v == 0) { 205 | fprintf(stderr, "[error] incorrect iteration (%lu, %lu)\n", *u, v); 206 | } 207 | else { 208 | fprintf(stderr, "[error] incorrect iteration tally (%lu, %lu)\n", *u, v); 209 | } 210 | } 211 | 212 | // this way we will see an error if the same key is iterated through 213 | // twice 214 | str_map_set(M, key, len, 0); 215 | 216 | hattrie_iter_next(i); 217 | } 218 | 219 | if (count != M->m) { 220 | fprintf(stderr, "[error] iterated through %zu element, expected %zu\n", 221 | count, M->m); 222 | } 223 | 224 | hattrie_iter_free(i); 225 | free(prev_key); 226 | free(key_copy); 227 | 228 | fprintf(stderr, "done.\n"); 229 | } 230 | 231 | 232 | void test_trie_non_ascii() 233 | { 234 | fprintf(stderr, "checking non-ascii... \n"); 235 | 236 | value_t* u; 237 | hattrie_t* T = hattrie_create(); 238 | char* txt = "\x81\x70"; 239 | 240 | u = hattrie_get(T, txt, strlen(txt)); 241 | *u = 10; 242 | 243 | u = hattrie_tryget(T, txt, strlen(txt)); 244 | if (*u != 10){ 245 | fprintf(stderr, "can't store non-ascii strings\n"); 246 | } 247 | hattrie_free(T); 248 | 249 | fprintf(stderr, "done.\n"); 250 | } 251 | 252 | 253 | 254 | 255 | int main() 256 | { 257 | test_trie_non_ascii(); 258 | 259 | setup(); 260 | test_hattrie_insert(); 261 | test_hattrie_iteration(); 262 | teardown(); 263 | 264 | setup(); 265 | test_hattrie_insert(); 266 | test_hattrie_sorted_iteration(); 267 | teardown(); 268 | 269 | return 0; 270 | } 271 | -------------------------------------------------------------------------------- /src/hat_trie.pyx: -------------------------------------------------------------------------------- 1 | # cython: profile=True 2 | 3 | from libc.math cimport NAN 4 | from chat_trie cimport * 5 | 6 | cimport cpython 7 | 8 | cdef class BaseTrie: 9 | """ 10 | Base HAT-Trie wrapper. 11 | """ 12 | 13 | cdef hattrie_t* _trie 14 | 15 | def __cinit__(self): 16 | self._trie = hattrie_create() 17 | 18 | def __dealloc__(self): 19 | if self._trie: 20 | hattrie_free(self._trie) 21 | 22 | 23 | def __getitem__(self, bytes key): 24 | return self._getitem(key) 25 | 26 | def __setitem__(self, bytes key, int value): 27 | self._setitem(key, value) 28 | 29 | def __contains__(self, bytes key): 30 | return self._contains(key) 31 | 32 | def __len__(self): 33 | return hattrie_size(self._trie) 34 | 35 | def get(self, bytes key, value=-1): 36 | try: 37 | return self._getitem(key) 38 | except KeyError: 39 | return value 40 | 41 | def setdefault(self, bytes key, int value): 42 | return self._setdefault(key, value) 43 | 44 | def keys(self): 45 | return list(self.iterkeys()) 46 | 47 | def iterkeys(self): 48 | cdef: 49 | hattrie_iter_t* it = hattrie_iter_begin(self._trie, 0) 50 | char* c_key 51 | size_t val 52 | size_t length 53 | bytes py_str 54 | 55 | try: 56 | while not hattrie_iter_finished(it): 57 | c_key = hattrie_iter_key(it, &length) 58 | py_str = c_key[:length] 59 | yield py_str 60 | hattrie_iter_next(it) 61 | 62 | finally: 63 | hattrie_iter_free(it) 64 | 65 | 66 | cdef value_t _getitem(self, char* key) except -1: 67 | cdef value_t* value_ptr = hattrie_tryget(self._trie, key, len(key)) 68 | if value_ptr == NULL: 69 | raise KeyError(key) 70 | return value_ptr[0] 71 | 72 | cdef void _setitem(self, char* key, value_t value): 73 | hattrie_get(self._trie, key, len(key))[0] = value 74 | 75 | cdef value_t _setdefault(self, char* key, value_t value): 76 | cdef value_t* value_ptr = hattrie_tryget(self._trie, key, len(key)) 77 | if value_ptr == NULL: 78 | self._setitem(key, value) 79 | return value 80 | return value_ptr[0] 81 | 82 | cdef bint _contains(self, char* key): 83 | cdef value_t* value_ptr = hattrie_tryget(self._trie, key, len(key)) 84 | return value_ptr != NULL 85 | 86 | 87 | cdef class IntTrie(BaseTrie): 88 | """ 89 | HAT-Trie with unicode support that stores int as value. 90 | """ 91 | 92 | # XXX: Internal encoding is hardcoded as UTF8. This is the fastest 93 | # encoding that can handle all unicode symbols and doesn't have 94 | # zero bytes. 95 | 96 | # This may seem sub-optimal because it is multibyte encoding; 97 | # single-byte language-specific encoding (such as cp1251) 98 | # seems to be faster. But this is not the case because: 99 | 100 | # 1) the bottleneck of this wrapper is string encoding, not trie traversal; 101 | # 2) python's unicode encoding utilities are optimized for utf8; 102 | # 3) users will have to select language-specific encoding for the trie; 103 | # 4) non-hardcoded encoding causes extra overhead and prevents cython 104 | # optimizations. 105 | 106 | # That's why hardcoded utf8 is up to 9 times faster than configurable cp1251. 107 | 108 | # XXX: char-walking utilities may become tricky with multibyte 109 | # internal encoding. 110 | 111 | def __getitem__(self, unicode key): 112 | cdef bytes bkey = key.encode('utf8') 113 | return self._getitem(bkey) 114 | 115 | def __contains__(self, unicode key): 116 | cdef bytes bkey = key.encode('utf8') 117 | return self._contains(bkey) 118 | 119 | def __setitem__(self, unicode key, int value): 120 | cdef bytes bkey = key.encode('utf8') 121 | self._setitem(bkey, value) 122 | 123 | def get(self, unicode key, value=-1): 124 | cdef bytes bkey = key.encode('utf8') 125 | try: 126 | return self._getitem(bkey) 127 | except KeyError: 128 | return value 129 | 130 | def setdefault(self, unicode key, int value): 131 | cdef bytes bkey = key.encode('utf8') 132 | return self._setdefault(bkey, value) 133 | 134 | def iterkeys(self): 135 | for key in BaseTrie.iterkeys(self): 136 | yield key.decode('utf8') 137 | 138 | 139 | cdef class FloatTrie(BaseTrie): 140 | """ 141 | HAT-Trie with unicode support that stores float as value. 142 | """ 143 | 144 | # XXX: uintptr_t is interpreted as a float32. This should work on all 145 | # systems with 32-bit or larger pointers, e.g. the majority of modern 146 | # computers. This will likely not work on embedded 8- and 16-bit 147 | # systems. 148 | 149 | def __getitem__(self, unicode key): 150 | cdef bytes bkey = key.encode('utf8') 151 | return self._fromvalue(self._getitem(bkey)) 152 | 153 | def __contains__(self, unicode key): 154 | cdef bytes bkey = key.encode('utf8') 155 | return self._contains(bkey) 156 | 157 | def __setitem__(self, unicode key, float value): 158 | cdef bytes bkey = key.encode('utf8') 159 | self._setitem(bkey, self._tovalue(value)) 160 | 161 | def get(self, unicode key, value=float('nan')): 162 | cdef bytes bkey = key.encode('utf8') 163 | try: 164 | return self._fromvalue(self._getitem(bkey)) 165 | except KeyError: 166 | return value 167 | 168 | def setdefault(self, unicode key, float value): 169 | cdef bytes bkey = key.encode('utf8') 170 | return self._fromvalue(self._setdefault(bkey, self._tovalue(value))) 171 | 172 | def iterkeys(self): 173 | for key in BaseTrie.iterkeys(self): 174 | yield key.decode('utf8') 175 | 176 | cdef float _fromvalue(self, value_t value): 177 | cdef float* float_ptr = &value 178 | return float_ptr[0] 179 | 180 | cdef value_t _tovalue(self, float value): 181 | cdef value_t* value_ptr = &value 182 | return value_ptr[0] 183 | 184 | 185 | cdef class Trie(BaseTrie): 186 | """ 187 | HAT-Trie with unicode support and arbitrary values. 188 | """ 189 | 190 | # XXX: Internal encoding is hardcoded as UTF8. See note in IntTrie 191 | # for more details. 192 | 193 | def __dealloc__(self): 194 | cdef hattrie_iter_t* it = hattrie_iter_begin(self._trie, 0) 195 | cdef cpython.PyObject *o 196 | 197 | try: 198 | while not hattrie_iter_finished(it): 199 | o = hattrie_iter_val(it)[0] 200 | cpython.Py_XDECREF(o) 201 | hattrie_iter_next(it) 202 | 203 | finally: 204 | hattrie_iter_free(it) 205 | 206 | 207 | def __getitem__(self, unicode key): 208 | cdef bytes bkey = key.encode('utf8') 209 | return self._fromvalue(self._getitem(bkey)) 210 | 211 | def __contains__(self, unicode key): 212 | cdef bytes bkey = key.encode('utf8') 213 | return self._contains(bkey) 214 | 215 | def __setitem__(self, unicode key, value): 216 | cdef bytes bkey = key.encode('utf8') 217 | self._setitem(bkey, self._tovalue(value)) 218 | 219 | def get(self, unicode key, value=None): 220 | cdef bytes bkey = key.encode('utf8') 221 | try: 222 | return self._fromvalue(self._getitem(bkey)) 223 | except KeyError: 224 | return value 225 | 226 | def setdefault(self, unicode key, value): 227 | cdef bytes bkey = key.encode('utf8') 228 | return self._setdefault(bkey, self._tovalue(value)) 229 | 230 | def iterkeys(self): 231 | for key in BaseTrie.iterkeys(self): 232 | yield key.decode('utf8') 233 | 234 | cdef void _setitem(self, char* key, value_t value): 235 | cdef cpython.PyObject *o 236 | cdef value_t* value_ptr = hattrie_tryget(self._trie, key, len(key)) 237 | if value_ptr != NULL: 238 | o = value_ptr[0] 239 | cpython.Py_XDECREF(o) 240 | hattrie_get(self._trie, key, len(key))[0] = value 241 | 242 | cdef object _fromvalue(self, value_t value): 243 | cdef cpython.PyObject *o 244 | o = value 245 | return o 246 | 247 | cdef value_t _tovalue(self, object obj): 248 | cdef cpython.PyObject *o 249 | o = obj 250 | cpython.Py_XINCREF(o) 251 | return o 252 | -------------------------------------------------------------------------------- /bench/speed.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | from __future__ import absolute_import, unicode_literals, division 4 | import random 5 | import string 6 | import timeit 7 | import os 8 | import zipfile 9 | #import pstats 10 | #import cProfile 11 | 12 | import hat_trie 13 | 14 | def words100k(): 15 | zip_name = os.path.join( 16 | os.path.abspath(os.path.dirname(__file__)), 17 | 'words100k.txt.zip' 18 | ) 19 | zf = zipfile.ZipFile(zip_name) 20 | txt = zf.open(zf.namelist()[0]).read().decode('utf8') 21 | return txt.splitlines() 22 | 23 | def random_words(num): 24 | russian = 'абвгдеёжзиклмнопрстуфхцчъыьэюя' 25 | alphabet = russian + string.ascii_letters 26 | return [ 27 | "".join([random.choice(alphabet) for x in range(random.randint(1,15))]) 28 | for y in range(num) 29 | ] 30 | 31 | def truncated_words(words): 32 | return [word[:3] for word in words] 33 | 34 | def prefixes1k(words, prefix_len): 35 | words = [w for w in words if len(w) >= prefix_len] 36 | every_nth = int(len(words)/1000) 37 | _words = [w[:prefix_len] for w in words[::every_nth]] 38 | return _words[:1000] 39 | 40 | WORDS100k = words100k() 41 | MIXED_WORDS100k = truncated_words(WORDS100k) 42 | NON_WORDS100k = random_words(100000) 43 | PREFIXES_3_1k = prefixes1k(WORDS100k, 3) 44 | PREFIXES_5_1k = prefixes1k(WORDS100k, 5) 45 | PREFIXES_8_1k = prefixes1k(WORDS100k, 8) 46 | PREFIXES_15_1k = prefixes1k(WORDS100k, 15) 47 | 48 | 49 | def format_result(key, value, text_width): 50 | key = key.ljust(text_width) 51 | print(" %s %s" % (key, value)) 52 | 53 | 54 | def bench(name, timer, descr='M ops/sec', op_count=0.1, repeats=3, runs=5, 55 | text_width=28): 56 | try: 57 | times = [] 58 | for x in range(runs): 59 | times.append(timer.timeit(repeats)) 60 | 61 | def op_time(time): 62 | return op_count*repeats / time 63 | 64 | val = "%0.3f%s" % (op_time(min(times)), descr) 65 | format_result(name, val, text_width) 66 | except (AttributeError, TypeError) as e: 67 | format_result(name, "not supported", text_width) 68 | 69 | 70 | def create_trie(): 71 | words = words100k() 72 | trie = hat_trie.Trie() 73 | for word in words: 74 | trie[word] = 1 75 | return trie 76 | 77 | def benchmark(): 78 | print('\n====== Benchmarks (100k unique unicode words) =======\n') 79 | 80 | tests = [ 81 | ('__getitem__ (hits)', "for word in words: data[word]", 'M ops/sec', 0.1, 3), 82 | ('__contains__ (hits)', "for word in words: word in data", 'M ops/sec', 0.1, 3), 83 | ('__contains__ (misses)', "for word in NON_WORDS100k: word in data", 'M ops/sec', 0.1, 3), 84 | ('__len__', 'len(data)', ' ops/sec', 1, 3), 85 | ('__setitem__ (updates)', 'for word in words: data[word]=1', 'M ops/sec',0.1, 3), 86 | ('__setitem__ (inserts)', 'for word in NON_WORDS_10k: data[word]=1', 'M ops/sec',0.01, 3), 87 | ('setdefault (updates)', 'for word in words: data.setdefault(word, 1)', 'M ops/sec', 0.1, 3), 88 | ('setdefault (inserts)', 'for word in NON_WORDS_10k: data.setdefault(word, 1)', 'M ops/sec', 0.01, 3), 89 | # ('items()', 'list(data.items())', ' ops/sec', 1, 1), 90 | ('keys()', 'list(data.keys())', ' ops/sec', 1, 1), 91 | # ('values()', 'list(data.values())', ' ops/sec', 1, 1), 92 | ] 93 | 94 | common_setup = """ 95 | from __main__ import create_trie, WORDS100k, NON_WORDS100k, MIXED_WORDS100k 96 | from __main__ import PREFIXES_3_1k, PREFIXES_5_1k, PREFIXES_8_1k, PREFIXES_15_1k 97 | words = WORDS100k 98 | NON_WORDS_10k = NON_WORDS100k[:10000] 99 | NON_WORDS_1k = ['ыва', 'xyz', 'соы', 'Axx', 'avы']*200 100 | """ 101 | dict_setup = common_setup + 'data = dict((word, 1) for word in words);' 102 | trie_setup = common_setup + 'data = create_trie();' 103 | 104 | for test_name, test, descr, op_count, repeats in tests: 105 | t_dict = timeit.Timer(test, dict_setup) 106 | t_trie = timeit.Timer(test, trie_setup) 107 | 108 | bench('dict '+test_name, t_dict, descr, op_count, repeats) 109 | bench('trie '+test_name, t_trie, descr, op_count, repeats) 110 | 111 | 112 | # trie-specific benchmarks 113 | 114 | # bench( 115 | # 'trie.iter_prefix_items (hits)', 116 | # timeit.Timer( 117 | # "for word in words:\n" 118 | # " for it in data.iter_prefix_items(word):\n" 119 | # " pass", 120 | # trie_setup 121 | # ), 122 | # ) 123 | # 124 | # bench( 125 | # 'trie.prefix_items (hits)', 126 | # timeit.Timer( 127 | # "for word in words: data.prefix_items(word)", 128 | # trie_setup 129 | # ) 130 | # ) 131 | # 132 | # bench( 133 | # 'trie.prefix_items loop (hits)', 134 | # timeit.Timer( 135 | # "for word in words:\n" 136 | # " for it in data.prefix_items(word):pass", 137 | # trie_setup 138 | # ) 139 | # ) 140 | # 141 | # bench( 142 | # 'trie.iter_prefixes (hits)', 143 | # timeit.Timer( 144 | # "for word in words:\n" 145 | # " for it in data.iter_prefixes(word): pass", 146 | # trie_setup 147 | # ) 148 | # ) 149 | # 150 | # bench( 151 | # 'trie.iter_prefixes (misses)', 152 | # timeit.Timer( 153 | # "for word in NON_WORDS100k:\n" 154 | # " for it in data.iter_prefixes(word): pass", 155 | # trie_setup 156 | # ) 157 | # ) 158 | # 159 | # bench( 160 | # 'trie.iter_prefixes (mixed)', 161 | # timeit.Timer( 162 | # "for word in MIXED_WORDS100k:\n" 163 | # " for it in data.iter_prefixes(word): pass", 164 | # trie_setup 165 | # ) 166 | # ) 167 | # 168 | # bench( 169 | # 'trie.has_keys_with_prefix (hits)', 170 | # timeit.Timer( 171 | # "for word in words: data.has_keys_with_prefix(word)", 172 | # trie_setup 173 | # ) 174 | # ) 175 | # 176 | # bench( 177 | # 'trie.has_keys_with_prefix (misses)', 178 | # timeit.Timer( 179 | # "for word in NON_WORDS100k: data.has_keys_with_prefix(word)", 180 | # trie_setup 181 | # ) 182 | # ) 183 | # 184 | # for meth in ('longest_prefix', 'longest_prefix_item'): 185 | # bench( 186 | # 'trie.%s (hits)' % meth, 187 | # timeit.Timer( 188 | # "for word in words: data.%s(word)" % meth, 189 | # trie_setup 190 | # ) 191 | # ) 192 | # 193 | # bench( 194 | # 'trie.%s (misses)' % meth, 195 | # timeit.Timer( 196 | # "for word in NON_WORDS100k: data.%s(word, default=None)" % meth, 197 | # trie_setup 198 | # ) 199 | # ) 200 | # 201 | # bench( 202 | # 'trie.%s (mixed)' % meth, 203 | # timeit.Timer( 204 | # "for word in MIXED_WORDS100k: data.%s(word, default=None)" % meth, 205 | # trie_setup 206 | # ) 207 | # ) 208 | # 209 | # 210 | # prefix_data = [ 211 | # ('xxx', 'avg_len(res)==415', 'PREFIXES_3_1k'), 212 | # ('xxxxx', 'avg_len(res)==17', 'PREFIXES_5_1k'), 213 | # ('xxxxxxxx', 'avg_len(res)==3', 'PREFIXES_8_1k'), 214 | # ('xxxxx..xx', 'avg_len(res)==1.4', 'PREFIXES_15_1k'), 215 | # ('xxx', 'NON_EXISTING', 'NON_WORDS_1k'), 216 | # ] 217 | # for xxx, avg, data in prefix_data: 218 | # for meth in ('items', 'keys', 'values'): 219 | # bench( 220 | # 'trie.%s(prefix="%s"), %s' % (meth, xxx, avg), 221 | # timeit.Timer( 222 | # "for word in %s: data.%s(word)" % (data, meth), 223 | # trie_setup 224 | # ), 225 | # 'K ops/sec', 226 | # op_count=1, 227 | # ) 228 | 229 | def check_trie(trie, words): 230 | value = 0 231 | for word in words: 232 | value += trie[word] 233 | if value != len(words): 234 | raise Exception() 235 | 236 | def profiling(): 237 | import pstats 238 | import cProfile 239 | print('\n====== Profiling =======\n') 240 | trie = create_trie() 241 | WORDS = words100k() 242 | 243 | # def check_prefixes(trie, words): 244 | # for word in words: 245 | # trie.keys(word) 246 | # cProfile.runctx("check_prefixes(trie, NON_WORDS_1k)", globals(), locals(), "Profile.prof") 247 | # 248 | cProfile.runctx("check_trie(trie, WORDS)", globals(), locals(), "Profile.prof") 249 | 250 | s = pstats.Stats("Profile.prof") 251 | s.strip_dirs().sort_stats("time").print_stats(20) 252 | 253 | #def memory(): 254 | # gc.collect() 255 | # _memory = lambda: _get_memory(os.getpid()) 256 | # initial_memory = _memory() 257 | # trie = create_trie() 258 | # gc.collect() 259 | # trie_memory = _memory() 260 | # 261 | # del trie 262 | # gc.collect() 263 | # alphabet, words = words100k() 264 | # words_dict = dict((word, 1) for word in words) 265 | # del alphabet 266 | # del words 267 | # gc.collect() 268 | # 269 | # dict_memory = _memory() 270 | # print('initial: %s, trie: +%s, dict: +%s' % ( 271 | # initial_memory, 272 | # trie_memory-initial_memory, 273 | # dict_memory-initial_memory, 274 | # )) 275 | 276 | if __name__ == '__main__': 277 | # trie = create_trie() 278 | # def check_pref(prefixes): 279 | # cntr = 0 280 | # for w in prefixes: 281 | # cntr += len(trie.keys(w)) 282 | # print(len(prefixes), cntr, cntr / len(prefixes)) 283 | # check_pref(prefixes1k(WORDS100k, 15)) 284 | 285 | 286 | benchmark() 287 | #profiling() 288 | #memory() 289 | print('\n~~~~~~~~~~~~~~\n') -------------------------------------------------------------------------------- /hat-trie/src/ahtable.c: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of hat-trie. 3 | * 4 | * Copyright (c) 2011 by Daniel C. Jones 5 | * 6 | * See ahtable.h for description of the Array Hash Table. 7 | * 8 | */ 9 | 10 | #include "ahtable.h" 11 | #include "misc.h" 12 | #include "murmurhash3.h" 13 | #include 14 | #include 15 | 16 | const double ahtable_max_load_factor = 100000.0; /* arbitrary large number => don't resize */ 17 | const size_t ahtable_initial_size = 4096; 18 | 19 | static size_t keylen(slot_t s) { 20 | if (0x1 & *s) { 21 | return (size_t) (*((uint16_t*) s) >> 1); 22 | } 23 | else { 24 | return (size_t) (*s >> 1); 25 | } 26 | } 27 | 28 | 29 | ahtable_t* ahtable_create() 30 | { 31 | return ahtable_create_n(ahtable_initial_size); 32 | } 33 | 34 | 35 | ahtable_t* ahtable_create_n(size_t n) 36 | { 37 | ahtable_t* table = malloc_or_die(sizeof(ahtable_t)); 38 | table->flag = 0; 39 | table->c0 = table->c1 = '\0'; 40 | 41 | table->n = n; 42 | table->m = 0; 43 | table->max_m = (size_t) (ahtable_max_load_factor * (double) table->n); 44 | table->slots = malloc_or_die(n * sizeof(slot_t)); 45 | memset(table->slots, 0, n * sizeof(slot_t)); 46 | 47 | table->slot_sizes = malloc_or_die(n * sizeof(size_t)); 48 | memset(table->slot_sizes, 0, n * sizeof(size_t)); 49 | 50 | return table; 51 | } 52 | 53 | 54 | void ahtable_free(ahtable_t* table) 55 | { 56 | if (table == NULL) return; 57 | size_t i; 58 | for (i = 0; i < table->n; ++i) free(table->slots[i]); 59 | free(table->slots); 60 | free(table->slot_sizes); 61 | free(table); 62 | } 63 | 64 | 65 | size_t ahtable_size(const ahtable_t* table) 66 | { 67 | return table->m; 68 | } 69 | 70 | 71 | size_t ahtable_sizeof(const ahtable_t* table) 72 | { 73 | size_t nbytes = sizeof(ahtable_t) + 74 | table->n * (sizeof(size_t) + sizeof(slot_t)); 75 | size_t i; 76 | for (i = 0; i < table->n; ++i) { 77 | nbytes += table->slot_sizes[i]; 78 | } 79 | return nbytes; 80 | } 81 | 82 | 83 | void ahtable_clear(ahtable_t* table) 84 | { 85 | size_t i; 86 | for (i = 0; i < table->n; ++i) free(table->slots[i]); 87 | table->n = ahtable_initial_size; 88 | table->slots = realloc_or_die(table->slots, table->n * sizeof(slot_t)); 89 | memset(table->slots, 0, table->n * sizeof(slot_t)); 90 | 91 | table->slot_sizes = realloc_or_die(table->slot_sizes, table->n * sizeof(size_t)); 92 | memset(table->slot_sizes, 0, table->n * sizeof(size_t)); 93 | } 94 | 95 | /** Inserts a key with value into slot s, and returns a pointer to the 96 | * space immediately after. 97 | */ 98 | static slot_t ins_key(slot_t s, const char* key, size_t len, value_t** val) 99 | { 100 | // key length 101 | if (len < 128) { 102 | s[0] = (unsigned char) (len << 1); 103 | s += 1; 104 | } 105 | else { 106 | /* The least significant bit is set to indicate that two bytes are 107 | * being used to store the key length. */ 108 | *((uint16_t*) s) = ((uint16_t) len << 1) | 0x1; 109 | s += 2; 110 | } 111 | 112 | // key 113 | memcpy(s, key, len * sizeof(unsigned char)); 114 | s += len; 115 | 116 | // value 117 | *val = (value_t*) s; 118 | **val = 0; 119 | s += sizeof(value_t); 120 | 121 | return s; 122 | } 123 | 124 | 125 | static void ahtable_expand(ahtable_t* table) 126 | { 127 | /* Resizing a table is essentially building a brand new one. 128 | * One little shortcut we can take on the memory allocation front is to 129 | * figure out how much memory each slot needs in advance. 130 | */ 131 | assert(table->n > 0); 132 | size_t new_n = 2 * table->n; 133 | size_t* slot_sizes = malloc_or_die(new_n * sizeof(size_t)); 134 | memset(slot_sizes, 0, new_n * sizeof(size_t)); 135 | 136 | const char* key; 137 | size_t len = 0; 138 | size_t m = 0; 139 | ahtable_iter_t* i = ahtable_iter_begin(table, false); 140 | while (!ahtable_iter_finished(i)) { 141 | key = ahtable_iter_key(i, &len); 142 | slot_sizes[hash(key, len) % new_n] += 143 | len + sizeof(value_t) + (len >= 128 ? 2 : 1); 144 | 145 | ++m; 146 | ahtable_iter_next(i); 147 | } 148 | assert(m == table->m); 149 | ahtable_iter_free(i); 150 | 151 | 152 | /* allocate slots */ 153 | slot_t* slots = malloc_or_die(new_n * sizeof(slot_t)); 154 | size_t j; 155 | for (j = 0; j < new_n; ++j) { 156 | if (slot_sizes[j] > 0) { 157 | slots[j] = malloc_or_die(slot_sizes[j]); 158 | } 159 | else slots[j] = NULL; 160 | } 161 | 162 | /* rehash values. A few shortcuts can be taken here as well, as we know 163 | * there will be no collisions. Instead of the regular insertion routine, 164 | * we keep track of the ends of every slot and simply insert keys. 165 | * */ 166 | slot_t* slots_next = malloc_or_die(new_n * sizeof(slot_t)); 167 | memcpy(slots_next, slots, new_n * sizeof(slot_t)); 168 | size_t h; 169 | m = 0; 170 | value_t* u; 171 | value_t* v; 172 | i = ahtable_iter_begin(table, false); 173 | while (!ahtable_iter_finished(i)) { 174 | 175 | key = ahtable_iter_key(i, &len); 176 | h = hash(key, len) % new_n; 177 | 178 | slots_next[h] = ins_key(slots_next[h], key, len, &u); 179 | v = ahtable_iter_val(i); 180 | *u = *v; 181 | 182 | ++m; 183 | ahtable_iter_next(i); 184 | } 185 | assert(m == table->m); 186 | ahtable_iter_free(i); 187 | 188 | 189 | free(slots_next); 190 | for (j = 0; j < table->n; ++j) free(table->slots[j]); 191 | 192 | free(table->slots); 193 | table->slots = slots; 194 | 195 | free(table->slot_sizes); 196 | table->slot_sizes = slot_sizes; 197 | 198 | table->n = new_n; 199 | table->max_m = (size_t) (ahtable_max_load_factor * (double) table->n); 200 | } 201 | 202 | 203 | static value_t* get_key(ahtable_t* table, const char* key, size_t len, bool insert_missing) 204 | { 205 | /* if we are at capacity, preemptively resize */ 206 | if (insert_missing && table->m >= table->max_m) { 207 | ahtable_expand(table); 208 | } 209 | 210 | 211 | uint32_t i = hash(key, len) % table->n; 212 | size_t k; 213 | slot_t s; 214 | value_t* val; 215 | 216 | /* search the array for our key */ 217 | s = table->slots[i]; 218 | while ((size_t) (s - table->slots[i]) < table->slot_sizes[i]) { 219 | /* get the key length */ 220 | k = keylen(s); 221 | s += k < 128 ? 1 : 2; 222 | 223 | /* skip keys that are longer than ours */ 224 | if (k != len) { 225 | s += k + sizeof(value_t); 226 | continue; 227 | } 228 | 229 | /* key found. */ 230 | if (memcmp(s, key, len) == 0) { 231 | return (value_t*) (s + len); 232 | } 233 | /* key not found. */ 234 | else { 235 | s += k + sizeof(value_t); 236 | continue; 237 | } 238 | } 239 | 240 | 241 | if (insert_missing) { 242 | /* the key was not found, so we must insert it. */ 243 | size_t new_size = table->slot_sizes[i]; 244 | new_size += 1 + (len >= 128 ? 1 : 0); // key length 245 | new_size += len * sizeof(unsigned char); // key 246 | new_size += sizeof(value_t); // value 247 | 248 | table->slots[i] = realloc_or_die(table->slots[i], new_size); 249 | 250 | ++table->m; 251 | ins_key(table->slots[i] + table->slot_sizes[i], key, len, &val); 252 | table->slot_sizes[i] = new_size; 253 | 254 | return val; 255 | } 256 | else return NULL; 257 | } 258 | 259 | 260 | value_t* ahtable_get(ahtable_t* table, const char* key, size_t len) 261 | { 262 | return get_key(table, key, len, true); 263 | } 264 | 265 | 266 | value_t* ahtable_tryget(ahtable_t* table, const char* key, size_t len ) 267 | { 268 | return get_key(table, key, len, false); 269 | } 270 | 271 | 272 | int ahtable_del(ahtable_t* table, const char* key, size_t len) 273 | { 274 | uint32_t i = hash(key, len) % table->n; 275 | size_t k; 276 | slot_t s; 277 | 278 | /* search the array for our key */ 279 | s = table->slots[i]; 280 | while ((size_t) (s - table->slots[i]) < table->slot_sizes[i]) { 281 | /* get the key length */ 282 | k = keylen(s); 283 | s += k < 128 ? 1 : 2; 284 | 285 | /* skip keys that are longer than ours */ 286 | if (k != len) { 287 | s += k + sizeof(value_t); 288 | continue; 289 | } 290 | 291 | /* key found. */ 292 | if (memcmp(s, key, len) == 0) { 293 | /* move everything over, resize the array */ 294 | unsigned char* t = s + len + sizeof(value_t); 295 | s -= k < 128 ? 1 : 2; 296 | memmove(s, t, table->slot_sizes[i] - (size_t) (t - table->slots[i])); 297 | table->slot_sizes[i] -= (size_t) (t - s); 298 | --table->m; 299 | return 0; 300 | } 301 | /* key not found. */ 302 | else { 303 | s += k + sizeof(value_t); 304 | continue; 305 | } 306 | } 307 | 308 | // Key was not found. Do nothing. 309 | return -1; 310 | } 311 | 312 | 313 | 314 | static int cmpkey(const void* a_, const void* b_) 315 | { 316 | slot_t a = *(slot_t*) a_; 317 | slot_t b = *(slot_t*) b_; 318 | 319 | size_t ka = keylen(a), kb = keylen(b); 320 | 321 | a += ka < 128 ? 1 : 2; 322 | b += kb < 128 ? 1 : 2; 323 | 324 | int c = memcmp(a, b, ka < kb ? ka : kb); 325 | return c == 0 ? (int) ka - (int) kb : c; 326 | } 327 | 328 | 329 | /* Sorted/unsorted iterators are kept private and exposed by passing the 330 | sorted flag to ahtable_iter_begin. */ 331 | 332 | typedef struct ahtable_sorted_iter_t_ 333 | { 334 | const ahtable_t* table; // parent 335 | slot_t* xs; // pointers to keys 336 | size_t i; // current key 337 | } ahtable_sorted_iter_t; 338 | 339 | 340 | static ahtable_sorted_iter_t* ahtable_sorted_iter_begin(const ahtable_t* table) 341 | { 342 | ahtable_sorted_iter_t* i = malloc_or_die(sizeof(ahtable_sorted_iter_t)); 343 | i->table = table; 344 | i->xs = malloc_or_die(table->m * sizeof(slot_t)); 345 | i->i = 0; 346 | 347 | slot_t s; 348 | size_t j, k, u; 349 | for (j = 0, u = 0; j < table->n; ++j) { 350 | s = table->slots[j]; 351 | while (s < table->slots[j] + table->slot_sizes[j]) { 352 | i->xs[u++] = s; 353 | k = keylen(s); 354 | s += k < 128 ? 1 : 2; 355 | s += k + sizeof(value_t); 356 | } 357 | } 358 | 359 | qsort(i->xs, table->m, sizeof(slot_t), cmpkey); 360 | 361 | return i; 362 | } 363 | 364 | 365 | static bool ahtable_sorted_iter_finished(ahtable_sorted_iter_t* i) 366 | { 367 | return i->i >= i->table->m; 368 | } 369 | 370 | 371 | static void ahtable_sorted_iter_next(ahtable_sorted_iter_t* i) 372 | { 373 | if (ahtable_sorted_iter_finished(i)) return; 374 | ++i->i; 375 | } 376 | 377 | 378 | static void ahtable_sorted_iter_free(ahtable_sorted_iter_t* i) 379 | { 380 | if (i == NULL) return; 381 | free(i->xs); 382 | free(i); 383 | } 384 | 385 | 386 | static const char* ahtable_sorted_iter_key(ahtable_sorted_iter_t* i, size_t* len) 387 | { 388 | if (ahtable_sorted_iter_finished(i)) return NULL; 389 | 390 | slot_t s = i->xs[i->i]; 391 | if (len) *len = keylen(s); 392 | 393 | return (const char*) (s + (*len < 128 ? 1 : 2)); 394 | } 395 | 396 | 397 | static value_t* ahtable_sorted_iter_val(ahtable_sorted_iter_t* i) 398 | { 399 | if (ahtable_sorted_iter_finished(i)) return NULL; 400 | 401 | slot_t s = i->xs[i->i]; 402 | size_t k = keylen(s); 403 | 404 | s += k < 128 ? 1 : 2; 405 | s += k; 406 | 407 | return (value_t*) s; 408 | } 409 | 410 | 411 | typedef struct ahtable_unsorted_iter_t_ 412 | { 413 | const ahtable_t* table; // parent 414 | size_t i; // slot index 415 | slot_t s; // slot position 416 | } ahtable_unsorted_iter_t; 417 | 418 | 419 | static ahtable_unsorted_iter_t* ahtable_unsorted_iter_begin(const ahtable_t* table) 420 | { 421 | ahtable_unsorted_iter_t* i = malloc_or_die(sizeof(ahtable_unsorted_iter_t)); 422 | i->table = table; 423 | 424 | for (i->i = 0; i->i < i->table->n; ++i->i) { 425 | i->s = table->slots[i->i]; 426 | if ((size_t) (i->s - table->slots[i->i]) >= table->slot_sizes[i->i]) continue; 427 | break; 428 | } 429 | 430 | return i; 431 | } 432 | 433 | 434 | static bool ahtable_unsorted_iter_finished(ahtable_unsorted_iter_t* i) 435 | { 436 | return i->i >= i->table->n; 437 | } 438 | 439 | 440 | static void ahtable_unsorted_iter_next(ahtable_unsorted_iter_t* i) 441 | { 442 | if (ahtable_unsorted_iter_finished(i)) return; 443 | 444 | /* get the key length */ 445 | size_t k = keylen(i->s); 446 | i->s += k < 128 ? 1 : 2; 447 | 448 | /* skip to the next key */ 449 | i->s += k + sizeof(value_t); 450 | 451 | if ((size_t) (i->s - i->table->slots[i->i]) >= i->table->slot_sizes[i->i]) { 452 | do { 453 | ++i->i; 454 | } while(i->i < i->table->n && 455 | i->table->slot_sizes[i->i] == 0); 456 | 457 | if (i->i < i->table->n) i->s = i->table->slots[i->i]; 458 | else i->s = NULL; 459 | } 460 | } 461 | 462 | 463 | static void ahtable_unsorted_iter_free(ahtable_unsorted_iter_t* i) 464 | { 465 | free(i); 466 | } 467 | 468 | 469 | static const char* ahtable_unsorted_iter_key(ahtable_unsorted_iter_t* i, size_t* len) 470 | { 471 | if (ahtable_unsorted_iter_finished(i)) return NULL; 472 | 473 | slot_t s = i->s; 474 | size_t k; 475 | if (0x1 & *s) { 476 | k = (size_t) (*((uint16_t*) s)) >> 1; 477 | s += 2; 478 | } 479 | else { 480 | k = (size_t) (*s >> 1); 481 | s += 1; 482 | } 483 | 484 | if(len) *len = k; 485 | return (const char*) s; 486 | } 487 | 488 | 489 | static value_t* ahtable_unsorted_iter_val(ahtable_unsorted_iter_t* i) 490 | { 491 | if (ahtable_unsorted_iter_finished(i)) return NULL; 492 | 493 | slot_t s = i->s; 494 | 495 | size_t k; 496 | if (0x1 & *s) { 497 | k = (size_t) (*((uint16_t*) s)) >> 1; 498 | s += 2; 499 | } 500 | else { 501 | k = (size_t) (*s >> 1); 502 | s += 1; 503 | } 504 | 505 | s += k; 506 | return (value_t*) s; 507 | } 508 | 509 | 510 | struct ahtable_iter_t_ 511 | { 512 | bool sorted; 513 | union { 514 | ahtable_unsorted_iter_t* unsorted; 515 | ahtable_sorted_iter_t* sorted; 516 | } i; 517 | }; 518 | 519 | 520 | ahtable_iter_t* ahtable_iter_begin(const ahtable_t* table, bool sorted) { 521 | ahtable_iter_t* i = malloc_or_die(sizeof(ahtable_iter_t)); 522 | i->sorted = sorted; 523 | if (sorted) i->i.sorted = ahtable_sorted_iter_begin(table); 524 | else i->i.unsorted = ahtable_unsorted_iter_begin(table); 525 | return i; 526 | } 527 | 528 | 529 | void ahtable_iter_next(ahtable_iter_t* i) 530 | { 531 | if (i->sorted) ahtable_sorted_iter_next(i->i.sorted); 532 | else ahtable_unsorted_iter_next(i->i.unsorted); 533 | } 534 | 535 | 536 | bool ahtable_iter_finished(ahtable_iter_t* i) 537 | { 538 | if (i->sorted) return ahtable_sorted_iter_finished(i->i.sorted); 539 | else return ahtable_unsorted_iter_finished(i->i.unsorted); 540 | } 541 | 542 | 543 | void ahtable_iter_free(ahtable_iter_t* i) 544 | { 545 | if (i == NULL) return; 546 | if (i->sorted) ahtable_sorted_iter_free(i->i.sorted); 547 | else ahtable_unsorted_iter_free(i->i.unsorted); 548 | free(i); 549 | } 550 | 551 | 552 | const char* ahtable_iter_key(ahtable_iter_t* i, size_t* len) 553 | { 554 | if (i->sorted) return ahtable_sorted_iter_key(i->i.sorted, len); 555 | else return ahtable_unsorted_iter_key(i->i.unsorted, len); 556 | } 557 | 558 | 559 | value_t* ahtable_iter_val(ahtable_iter_t* i) 560 | { 561 | if (i->sorted) return ahtable_sorted_iter_val(i->i.sorted); 562 | else return ahtable_unsorted_iter_val(i->i.unsorted); 563 | } 564 | 565 | -------------------------------------------------------------------------------- /hat-trie/src/hat-trie.c: -------------------------------------------------------------------------------- 1 | /* 2 | * This file is part of hat-trie. 3 | * 4 | * Copyright (c) 2011 by Daniel C. Jones 5 | * 6 | */ 7 | 8 | #include "hat-trie.h" 9 | #include "ahtable.h" 10 | #include "misc.h" 11 | #include "pstdint.h" 12 | #include 13 | #include 14 | 15 | #define HT_UNUSED(x) x=x 16 | 17 | /* maximum number of keys that may be stored in a bucket before it is burst */ 18 | static const size_t MAX_BUCKET_SIZE = 16384; 19 | #define NODE_MAXCHAR 0xff // 0x7f for 7-bit ASCII 20 | #define NODE_CHILDS (NODE_MAXCHAR+1) 21 | 22 | static const uint8_t NODE_TYPE_TRIE = 0x1; 23 | static const uint8_t NODE_TYPE_PURE_BUCKET = 0x2; 24 | static const uint8_t NODE_TYPE_HYBRID_BUCKET = 0x4; 25 | static const uint8_t NODE_HAS_VAL = 0x8; 26 | 27 | 28 | struct trie_node_t_; 29 | 30 | /* Node's may be trie nodes or buckets. This union allows us to keep 31 | * non-specific pointer. */ 32 | typedef union node_ptr_ 33 | { 34 | ahtable_t* b; 35 | struct trie_node_t_* t; 36 | uint8_t* flag; 37 | } node_ptr; 38 | 39 | 40 | typedef struct trie_node_t_ 41 | { 42 | uint8_t flag; 43 | 44 | /* the value for the key that is consumed on a trie node */ 45 | value_t val; 46 | 47 | /* Map a character to either a trie_node_t or a ahtable_t. The first byte 48 | * must be examined to determine which. */ 49 | node_ptr xs[NODE_CHILDS]; 50 | 51 | } trie_node_t; 52 | 53 | struct hattrie_t_ 54 | { 55 | node_ptr root; // root node 56 | size_t m; // number of stored keys 57 | }; 58 | 59 | 60 | 61 | size_t hattrie_size(const hattrie_t* T) 62 | { 63 | return T->m; 64 | } 65 | 66 | 67 | static size_t node_sizeof(node_ptr node) 68 | { 69 | if (*node.flag & NODE_TYPE_TRIE) { 70 | size_t nbytes = sizeof(trie_node_t); 71 | size_t i; 72 | nbytes += node_sizeof(node.t->xs[0]); 73 | for (i = 1; i < NODE_CHILDS; ++i) { 74 | if (node.t->xs[i].t != node.t->xs[i-1].t) nbytes += node_sizeof(node.t->xs[i]); 75 | } 76 | return nbytes; 77 | } 78 | else { 79 | return ahtable_sizeof(node.b); 80 | } 81 | } 82 | 83 | 84 | size_t hattrie_sizeof(const hattrie_t* T) 85 | { 86 | return sizeof(hattrie_t) + node_sizeof(T->root); 87 | } 88 | 89 | 90 | /* Create a new trie node with all pointers pointing to the given child (which 91 | * can be NULL). */ 92 | static trie_node_t* alloc_trie_node(hattrie_t* T, node_ptr child) 93 | { 94 | trie_node_t* node = malloc_or_die(sizeof(trie_node_t)); 95 | node->flag = NODE_TYPE_TRIE; 96 | node->val = 0; 97 | 98 | /* pass T to allow custom allocator for trie. */ 99 | HT_UNUSED(T); /* unused now */ 100 | 101 | size_t i; 102 | for (i = 0; i < NODE_CHILDS; ++i) node->xs[i] = child; 103 | return node; 104 | } 105 | 106 | /* iterate trie nodes until string is consumed or bucket is found */ 107 | static node_ptr hattrie_consume(node_ptr *p, const char **k, size_t *l, unsigned brk) 108 | { 109 | node_ptr node = p->t->xs[(unsigned char) **k]; 110 | while (*node.flag & NODE_TYPE_TRIE && *l > brk) { 111 | ++*k; 112 | --*l; 113 | *p = node; 114 | node = node.t->xs[(unsigned char) **k]; 115 | } 116 | 117 | /* copy and writeback variables if it's faster */ 118 | 119 | assert(*p->flag & NODE_TYPE_TRIE); 120 | return node; 121 | } 122 | 123 | /* use node value and return pointer to it */ 124 | static inline value_t* hattrie_useval(hattrie_t *T, node_ptr n) 125 | { 126 | if (!(n.t->flag & NODE_HAS_VAL)) { 127 | n.t->flag |= NODE_HAS_VAL; 128 | ++T->m; 129 | } 130 | return &n.t->val; 131 | } 132 | 133 | /* clear node value if exists */ 134 | static inline int hattrie_clrval(hattrie_t *T, node_ptr n) 135 | { 136 | if (n.t->flag & NODE_HAS_VAL) { 137 | n.t->flag &= ~NODE_HAS_VAL; 138 | n.t->val = 0; 139 | --T->m; 140 | return 0; 141 | } 142 | return -1; 143 | } 144 | 145 | /* find node in trie */ 146 | static node_ptr hattrie_find(hattrie_t* T, const char **key, size_t *len) 147 | { 148 | node_ptr parent = T->root; 149 | assert(*parent.flag & NODE_TYPE_TRIE); 150 | 151 | if (*len == 0) return parent; 152 | 153 | node_ptr node = hattrie_consume(&parent, key, len, 1); 154 | 155 | /* if the trie node consumes value, use it */ 156 | if (*node.flag & NODE_TYPE_TRIE) { 157 | if (!(node.t->flag & NODE_HAS_VAL)) { 158 | node.flag = NULL; 159 | } 160 | return node; 161 | } 162 | 163 | /* pure bucket holds only key suffixes, skip current char */ 164 | if (*node.flag & NODE_TYPE_PURE_BUCKET) { 165 | *key += 1; 166 | *len -= 1; 167 | } 168 | 169 | /* do not scan bucket, it's not needed for this operation */ 170 | return node; 171 | } 172 | 173 | hattrie_t* hattrie_create() 174 | { 175 | hattrie_t* T = malloc_or_die(sizeof(hattrie_t)); 176 | T->m = 0; 177 | 178 | node_ptr node; 179 | node.b = ahtable_create(); 180 | node.b->flag = NODE_TYPE_HYBRID_BUCKET; 181 | node.b->c0 = 0x00; 182 | node.b->c1 = NODE_MAXCHAR; 183 | T->root.t = alloc_trie_node(T, node); 184 | 185 | return T; 186 | } 187 | 188 | 189 | static void hattrie_free_node(node_ptr node) 190 | { 191 | if (*node.flag & NODE_TYPE_TRIE) { 192 | size_t i; 193 | for (i = 0; i < NODE_CHILDS; ++i) { 194 | if (i > 0 && node.t->xs[i].t == node.t->xs[i - 1].t) continue; 195 | 196 | /* XXX: recursion might not be the best choice here. It is possible 197 | * to build a very deep trie. */ 198 | if (node.t->xs[i].t) hattrie_free_node(node.t->xs[i]); 199 | } 200 | free(node.t); 201 | } 202 | else { 203 | ahtable_free(node.b); 204 | } 205 | } 206 | 207 | 208 | void hattrie_free(hattrie_t* T) 209 | { 210 | hattrie_free_node(T->root); 211 | free(T); 212 | } 213 | 214 | 215 | void hattrie_clear(hattrie_t* T) 216 | { 217 | hattrie_free_node(T->root); 218 | node_ptr node; 219 | node.b = ahtable_create(); 220 | node.b->flag = NODE_TYPE_HYBRID_BUCKET; 221 | node.b->c0 = 0x00; 222 | node.b->c1 = 0xff; 223 | T->root.t = alloc_trie_node(T, node); 224 | } 225 | 226 | 227 | /* Perform one split operation on the given node with the given parent. 228 | */ 229 | static void hattrie_split(hattrie_t* T, node_ptr parent, node_ptr node) 230 | { 231 | /* only buckets may be split */ 232 | assert(*node.flag & NODE_TYPE_PURE_BUCKET || 233 | *node.flag & NODE_TYPE_HYBRID_BUCKET); 234 | 235 | assert(*parent.flag & NODE_TYPE_TRIE); 236 | 237 | if (*node.flag & NODE_TYPE_PURE_BUCKET) { 238 | /* turn the pure bucket into a hybrid bucket */ 239 | parent.t->xs[node.b->c0].t = alloc_trie_node(T, node); 240 | 241 | /* if the bucket had an empty key, move it to the new trie node */ 242 | value_t* val = ahtable_tryget(node.b, NULL, 0); 243 | if (val) { 244 | parent.t->xs[node.b->c0].t->val = *val; 245 | parent.t->xs[node.b->c0].t->flag |= NODE_HAS_VAL; 246 | *val = 0; 247 | ahtable_del(node.b, NULL, 0); 248 | } 249 | 250 | node.b->c0 = 0x00; 251 | node.b->c1 = NODE_MAXCHAR; 252 | node.b->flag = NODE_TYPE_HYBRID_BUCKET; 253 | 254 | return; 255 | } 256 | 257 | /* This is a hybrid bucket. Perform a proper split. */ 258 | 259 | /* count the number of occourances of every leading character */ 260 | unsigned int cs[NODE_CHILDS]; // occurance count for leading chars 261 | memset(cs, 0, NODE_CHILDS * sizeof(unsigned int)); 262 | size_t len; 263 | const char* key; 264 | 265 | ahtable_iter_t* i = ahtable_iter_begin(node.b, false); 266 | while (!ahtable_iter_finished(i)) { 267 | key = ahtable_iter_key(i, &len); 268 | assert(len > 0); 269 | cs[(unsigned char) key[0]] += 1; 270 | ahtable_iter_next(i); 271 | } 272 | ahtable_iter_free(i); 273 | 274 | /* choose a split point */ 275 | unsigned int left_m, right_m, all_m; 276 | unsigned char j = node.b->c0; 277 | all_m = ahtable_size(node.b); 278 | left_m = cs[j]; 279 | right_m = all_m - left_m; 280 | int d; 281 | 282 | while (j + 1 < node.b->c1) { 283 | d = abs((int) (left_m + cs[j + 1]) - (int) (right_m - cs[j + 1])); 284 | if (d <= abs(left_m - right_m) && left_m + cs[j + 1] < all_m) { 285 | j += 1; 286 | left_m += cs[j]; 287 | right_m -= cs[j]; 288 | } 289 | else break; 290 | } 291 | 292 | /* now split into two node cooresponding to ranges [0, j] and 293 | * [j + 1, NODE_MAXCHAR], respectively. */ 294 | 295 | 296 | /* create new left and right nodes */ 297 | 298 | /* TODO: Add a special case if either node is a hybrid bucket containing all 299 | * the keys. In such a case, do not build a new table, just use the old one. 300 | * */ 301 | size_t num_slots; 302 | 303 | 304 | for (num_slots = ahtable_initial_size; 305 | (double) left_m > ahtable_max_load_factor * (double) num_slots; 306 | num_slots *= 2); 307 | 308 | node_ptr left, right; 309 | left.b = ahtable_create_n(num_slots); 310 | left.b->c0 = node.b->c0; 311 | left.b->c1 = j; 312 | left.b->flag = left.b->c0 == left.b->c1 ? 313 | NODE_TYPE_PURE_BUCKET : NODE_TYPE_HYBRID_BUCKET; 314 | 315 | 316 | for (num_slots = ahtable_initial_size; 317 | (double) right_m > ahtable_max_load_factor * (double) num_slots; 318 | num_slots *= 2); 319 | 320 | right.b = ahtable_create_n(num_slots); 321 | right.b->c0 = j + 1; 322 | right.b->c1 = node.b->c1; 323 | right.b->flag = right.b->c0 == right.b->c1 ? 324 | NODE_TYPE_PURE_BUCKET : NODE_TYPE_HYBRID_BUCKET; 325 | 326 | 327 | /* update the parent's pointer */ 328 | 329 | unsigned int c; 330 | for (c = node.b->c0; c <= j; ++c) parent.t->xs[c] = left; 331 | for (; c <= node.b->c1; ++c) parent.t->xs[c] = right; 332 | 333 | 334 | 335 | /* distribute keys to the new left or right node */ 336 | value_t* u; 337 | value_t* v; 338 | i = ahtable_iter_begin(node.b, false); 339 | while (!ahtable_iter_finished(i)) { 340 | key = ahtable_iter_key(i, &len); 341 | u = ahtable_iter_val(i); 342 | assert(len > 0); 343 | 344 | /* left */ 345 | if ((unsigned char) key[0] <= j) { 346 | if (*left.flag & NODE_TYPE_PURE_BUCKET) { 347 | v = ahtable_get(left.b, key + 1, len - 1); 348 | } 349 | else { 350 | v = ahtable_get(left.b, key, len); 351 | } 352 | *v = *u; 353 | } 354 | 355 | /* right */ 356 | else { 357 | if (*right.flag & NODE_TYPE_PURE_BUCKET) { 358 | v = ahtable_get(right.b, key + 1, len - 1); 359 | } 360 | else { 361 | v = ahtable_get(right.b, key, len); 362 | } 363 | *v = *u; 364 | } 365 | 366 | ahtable_iter_next(i); 367 | } 368 | 369 | ahtable_iter_free(i); 370 | ahtable_free(node.b); 371 | } 372 | 373 | value_t* hattrie_get(hattrie_t* T, const char* key, size_t len) 374 | { 375 | node_ptr parent = T->root; 376 | assert(*parent.flag & NODE_TYPE_TRIE); 377 | 378 | if (len == 0) return &parent.t->val; 379 | 380 | /* consume all trie nodes, now parent must be trie and child anything */ 381 | node_ptr node = hattrie_consume(&parent, &key, &len, 0); 382 | assert(*parent.flag & NODE_TYPE_TRIE); 383 | 384 | /* if the key has been consumed on a trie node, use its value */ 385 | if (len == 0) { 386 | if (*node.flag & NODE_TYPE_TRIE) { 387 | return hattrie_useval(T, node); 388 | } 389 | else if (*node.flag & NODE_TYPE_HYBRID_BUCKET) { 390 | return hattrie_useval(T, parent); 391 | } 392 | } 393 | 394 | 395 | /* preemptively split the bucket if it is full */ 396 | while (ahtable_size(node.b) >= MAX_BUCKET_SIZE) { 397 | hattrie_split(T, parent, node); 398 | 399 | /* after the split, the node pointer is invalidated, so we search from 400 | * the parent again. */ 401 | node = hattrie_consume(&parent, &key, &len, 0); 402 | 403 | /* if the key has been consumed on a trie node, use its value */ 404 | if (len == 0) { 405 | if (*node.flag & NODE_TYPE_TRIE) { 406 | return hattrie_useval(T, node); 407 | } 408 | else if (*node.flag & NODE_TYPE_HYBRID_BUCKET) { 409 | return hattrie_useval(T, parent); 410 | } 411 | } 412 | } 413 | 414 | assert(*node.flag & NODE_TYPE_PURE_BUCKET || *node.flag & NODE_TYPE_HYBRID_BUCKET); 415 | 416 | assert(len > 0); 417 | size_t m_old = node.b->m; 418 | value_t* val; 419 | if (*node.flag & NODE_TYPE_PURE_BUCKET) { 420 | val = ahtable_get(node.b, key + 1, len - 1); 421 | } 422 | else { 423 | val = ahtable_get(node.b, key, len); 424 | } 425 | T->m += (node.b->m - m_old); 426 | 427 | return val; 428 | } 429 | 430 | 431 | value_t* hattrie_tryget(hattrie_t* T, const char* key, size_t len) 432 | { 433 | /* find node for given key */ 434 | node_ptr node = hattrie_find(T, &key, &len); 435 | if (node.flag == NULL) { 436 | return NULL; 437 | } 438 | 439 | /* if the trie node consumes value, use it */ 440 | if (*node.flag & NODE_TYPE_TRIE) { 441 | return &node.t->val; 442 | } 443 | 444 | return ahtable_tryget(node.b, key, len); 445 | } 446 | 447 | 448 | int hattrie_del(hattrie_t* T, const char* key, size_t len) 449 | { 450 | node_ptr parent = T->root; 451 | HT_UNUSED(parent); 452 | assert(*parent.flag & NODE_TYPE_TRIE); 453 | 454 | /* find node for deletion */ 455 | node_ptr node = hattrie_find(T, &key, &len); 456 | if (node.flag == NULL) { 457 | return -1; 458 | } 459 | 460 | /* if consumed on a trie node, clear the value */ 461 | if (*node.flag & NODE_TYPE_TRIE) { 462 | return hattrie_clrval(T, node); 463 | } 464 | 465 | /* remove from bucket */ 466 | size_t m_old = ahtable_size(node.b); 467 | int ret = ahtable_del(node.b, key, len); 468 | T->m -= (m_old - ahtable_size(node.b)); 469 | 470 | /* merge empty buckets */ 471 | /*! \todo */ 472 | 473 | return ret; 474 | } 475 | 476 | 477 | /* plan for iteration: 478 | * This is tricky, as we have no parent pointers currently, and I would like to 479 | * avoid adding them. That means maintaining a stack 480 | * 481 | */ 482 | 483 | typedef struct hattrie_node_stack_t_ 484 | { 485 | unsigned char c; 486 | size_t level; 487 | 488 | node_ptr node; 489 | struct hattrie_node_stack_t_* next; 490 | 491 | } hattrie_node_stack_t; 492 | 493 | 494 | struct hattrie_iter_t_ 495 | { 496 | char* key; 497 | size_t keysize; // space reserved for the key 498 | size_t level; 499 | 500 | /* keep track of keys stored in trie nodes */ 501 | bool has_nil_key; 502 | value_t nil_val; 503 | 504 | const hattrie_t* T; 505 | bool sorted; 506 | ahtable_iter_t* i; 507 | hattrie_node_stack_t* stack; 508 | }; 509 | 510 | 511 | static void hattrie_iter_pushchar(hattrie_iter_t* i, size_t level, char c) 512 | { 513 | if (i->keysize < level) { 514 | i->keysize *= 2; 515 | i->key = realloc_or_die(i->key, i->keysize * sizeof(char)); 516 | } 517 | 518 | if (level > 0) { 519 | i->key[level - 1] = c; 520 | } 521 | 522 | i->level = level; 523 | } 524 | 525 | 526 | static void hattrie_iter_nextnode(hattrie_iter_t* i) 527 | { 528 | if (i->stack == NULL) return; 529 | 530 | /* pop the stack */ 531 | node_ptr node; 532 | hattrie_node_stack_t* next; 533 | unsigned char c; 534 | size_t level; 535 | 536 | node = i->stack->node; 537 | next = i->stack->next; 538 | c = i->stack->c; 539 | level = i->stack->level; 540 | 541 | free(i->stack); 542 | i->stack = next; 543 | 544 | if (*node.flag & NODE_TYPE_TRIE) { 545 | hattrie_iter_pushchar(i, level, c); 546 | 547 | if(node.t->flag & NODE_HAS_VAL) { 548 | i->has_nil_key = true; 549 | i->nil_val = node.t->val; 550 | } 551 | 552 | /* push all child nodes from right to left */ 553 | int j; 554 | for (j = NODE_MAXCHAR; j >= 0; --j) { 555 | 556 | /* skip repeated pointers to hybrid bucket */ 557 | if (j < NODE_MAXCHAR && node.t->xs[j].t == node.t->xs[j + 1].t) continue; 558 | 559 | // push stack 560 | next = i->stack; 561 | i->stack = malloc_or_die(sizeof(hattrie_node_stack_t)); 562 | i->stack->node = node.t->xs[j]; 563 | i->stack->next = next; 564 | i->stack->level = level + 1; 565 | i->stack->c = (unsigned char) j; 566 | } 567 | } 568 | else { 569 | if (*node.flag & NODE_TYPE_PURE_BUCKET) { 570 | hattrie_iter_pushchar(i, level, c); 571 | } 572 | else { 573 | i->level = level - 1; 574 | } 575 | 576 | i->i = ahtable_iter_begin(node.b, i->sorted); 577 | } 578 | } 579 | 580 | 581 | hattrie_iter_t* hattrie_iter_begin(const hattrie_t* T, bool sorted) 582 | { 583 | hattrie_iter_t* i = malloc_or_die(sizeof(hattrie_iter_t)); 584 | i->T = T; 585 | i->sorted = sorted; 586 | i->i = NULL; 587 | i->keysize = 16; 588 | i->key = malloc_or_die(i->keysize * sizeof(char)); 589 | i->level = 0; 590 | i->has_nil_key = false; 591 | i->nil_val = 0; 592 | 593 | i->stack = malloc_or_die(sizeof(hattrie_node_stack_t)); 594 | i->stack->next = NULL; 595 | i->stack->node = T->root; 596 | i->stack->c = '\0'; 597 | i->stack->level = 0; 598 | 599 | 600 | while (((i->i == NULL || ahtable_iter_finished(i->i)) && !i->has_nil_key) && 601 | i->stack != NULL ) { 602 | 603 | ahtable_iter_free(i->i); 604 | i->i = NULL; 605 | hattrie_iter_nextnode(i); 606 | } 607 | 608 | if (i->i != NULL && ahtable_iter_finished(i->i)) { 609 | ahtable_iter_free(i->i); 610 | i->i = NULL; 611 | } 612 | 613 | return i; 614 | } 615 | 616 | 617 | void hattrie_iter_next(hattrie_iter_t* i) 618 | { 619 | if (hattrie_iter_finished(i)) return; 620 | 621 | if (i->i != NULL && !ahtable_iter_finished(i->i)) { 622 | ahtable_iter_next(i->i); 623 | } 624 | else if (i->has_nil_key) { 625 | i->has_nil_key = false; 626 | i->nil_val = 0; 627 | hattrie_iter_nextnode(i); 628 | } 629 | 630 | while (((i->i == NULL || ahtable_iter_finished(i->i)) && !i->has_nil_key) && 631 | i->stack != NULL ) { 632 | 633 | ahtable_iter_free(i->i); 634 | i->i = NULL; 635 | hattrie_iter_nextnode(i); 636 | } 637 | 638 | if (i->i != NULL && ahtable_iter_finished(i->i)) { 639 | ahtable_iter_free(i->i); 640 | i->i = NULL; 641 | } 642 | } 643 | 644 | 645 | bool hattrie_iter_finished(hattrie_iter_t* i) 646 | { 647 | return i->stack == NULL && i->i == NULL && !i->has_nil_key; 648 | } 649 | 650 | 651 | void hattrie_iter_free(hattrie_iter_t* i) 652 | { 653 | if (i == NULL) return; 654 | if (i->i) ahtable_iter_free(i->i); 655 | 656 | hattrie_node_stack_t* next; 657 | while (i->stack) { 658 | next = i->stack->next; 659 | free(i->stack); 660 | i->stack = next; 661 | } 662 | 663 | free(i->key); 664 | free(i); 665 | } 666 | 667 | 668 | const char* hattrie_iter_key(hattrie_iter_t* i, size_t* len) 669 | { 670 | if (hattrie_iter_finished(i)) return NULL; 671 | 672 | size_t sublen; 673 | const char* subkey; 674 | 675 | if (i->has_nil_key) { 676 | subkey = NULL; 677 | sublen = 0; 678 | } 679 | else subkey = ahtable_iter_key(i->i, &sublen); 680 | 681 | if (i->keysize < i->level + sublen + 1) { 682 | while (i->keysize < i->level + sublen + 1) i->keysize *= 2; 683 | i->key = realloc_or_die(i->key, i->keysize * sizeof(char)); 684 | } 685 | 686 | memcpy(i->key + i->level, subkey, sublen); 687 | i->key[i->level + sublen] = '\0'; 688 | 689 | if (len) *len = i->level + sublen; 690 | return i->key; 691 | } 692 | 693 | 694 | value_t* hattrie_iter_val(hattrie_iter_t* i) 695 | { 696 | if (i->has_nil_key) return &i->nil_val; 697 | 698 | if (hattrie_iter_finished(i)) return NULL; 699 | 700 | return ahtable_iter_val(i->i); 701 | } 702 | 703 | 704 | 705 | bool hattrie_iter_equal(const hattrie_iter_t* a, 706 | const hattrie_iter_t* b) 707 | { 708 | return a->T == b->T && 709 | a->sorted == b->sorted && 710 | a->i == b->i; 711 | } 712 | -------------------------------------------------------------------------------- /hat-trie/src/pstdint.h: -------------------------------------------------------------------------------- 1 | /* A portable stdint.h 2 | **************************************************************************** 3 | * BSD License: 4 | **************************************************************************** 5 | * 6 | * Copyright (c) 2005-2014 Paul Hsieh 7 | * All rights reserved. 8 | * 9 | * Redistribution and use in source and binary forms, with or without 10 | * modification, are permitted provided that the following conditions 11 | * are met: 12 | * 13 | * 1. Redistributions of source code must retain the above copyright 14 | * notice, this list of conditions and the following disclaimer. 15 | * 2. Redistributions in binary form must reproduce the above copyright 16 | * notice, this list of conditions and the following disclaimer in the 17 | * documentation and/or other materials provided with the distribution. 18 | * 3. The name of the author may not be used to endorse or promote products 19 | * derived from this software without specific prior written permission. 20 | * 21 | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 | * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | * 32 | **************************************************************************** 33 | * 34 | * Version 0.1.14 35 | * 36 | * The ANSI C standard committee, for the C99 standard, specified the 37 | * inclusion of a new standard include file called stdint.h. This is 38 | * a very useful and long desired include file which contains several 39 | * very precise definitions for integer scalar types that is 40 | * critically important for making portable several classes of 41 | * applications including cryptography, hashing, variable length 42 | * integer libraries and so on. But for most developers its likely 43 | * useful just for programming sanity. 44 | * 45 | * The problem is that most compiler vendors have decided not to 46 | * implement the C99 standard, and the next C++ language standard 47 | * (which has a lot more mindshare these days) will be a long time in 48 | * coming and its unknown whether or not it will include stdint.h or 49 | * how much adoption it will have. Either way, it will be a long time 50 | * before all compilers come with a stdint.h and it also does nothing 51 | * for the extremely large number of compilers available today which 52 | * do not include this file, or anything comparable to it. 53 | * 54 | * So that's what this file is all about. Its an attempt to build a 55 | * single universal include file that works on as many platforms as 56 | * possible to deliver what stdint.h is supposed to. A few things 57 | * that should be noted about this file: 58 | * 59 | * 1) It is not guaranteed to be portable and/or present an identical 60 | * interface on all platforms. The extreme variability of the 61 | * ANSI C standard makes this an impossibility right from the 62 | * very get go. Its really only meant to be useful for the vast 63 | * majority of platforms that possess the capability of 64 | * implementing usefully and precisely defined, standard sized 65 | * integer scalars. Systems which are not intrinsically 2s 66 | * complement may produce invalid constants. 67 | * 68 | * 2) There is an unavoidable use of non-reserved symbols. 69 | * 70 | * 3) Other standard include files are invoked. 71 | * 72 | * 4) This file may come in conflict with future platforms that do 73 | * include stdint.h. The hope is that one or the other can be 74 | * used with no real difference. 75 | * 76 | * 5) In the current verison, if your platform can't represent 77 | * int32_t, int16_t and int8_t, it just dumps out with a compiler 78 | * error. 79 | * 80 | * 6) 64 bit integers may or may not be defined. Test for their 81 | * presence with the test: #ifdef INT64_MAX or #ifdef UINT64_MAX. 82 | * Note that this is different from the C99 specification which 83 | * requires the existence of 64 bit support in the compiler. If 84 | * this is not defined for your platform, yet it is capable of 85 | * dealing with 64 bits then it is because this file has not yet 86 | * been extended to cover all of your system's capabilities. 87 | * 88 | * 7) (u)intptr_t may or may not be defined. Test for its presence 89 | * with the test: #ifdef PTRDIFF_MAX. If this is not defined 90 | * for your platform, then it is because this file has not yet 91 | * been extended to cover all of your system's capabilities, not 92 | * because its optional. 93 | * 94 | * 8) The following might not been defined even if your platform is 95 | * capable of defining it: 96 | * 97 | * WCHAR_MIN 98 | * WCHAR_MAX 99 | * (u)int64_t 100 | * PTRDIFF_MIN 101 | * PTRDIFF_MAX 102 | * (u)intptr_t 103 | * 104 | * 9) The following have not been defined: 105 | * 106 | * WINT_MIN 107 | * WINT_MAX 108 | * 109 | * 10) The criteria for defining (u)int_least(*)_t isn't clear, 110 | * except for systems which don't have a type that precisely 111 | * defined 8, 16, or 32 bit types (which this include file does 112 | * not support anyways). Default definitions have been given. 113 | * 114 | * 11) The criteria for defining (u)int_fast(*)_t isn't something I 115 | * would trust to any particular compiler vendor or the ANSI C 116 | * committee. It is well known that "compatible systems" are 117 | * commonly created that have very different performance 118 | * characteristics from the systems they are compatible with, 119 | * especially those whose vendors make both the compiler and the 120 | * system. Default definitions have been given, but its strongly 121 | * recommended that users never use these definitions for any 122 | * reason (they do *NOT* deliver any serious guarantee of 123 | * improved performance -- not in this file, nor any vendor's 124 | * stdint.h). 125 | * 126 | * 12) The following macros: 127 | * 128 | * PRINTF_INTMAX_MODIFIER 129 | * PRINTF_INT64_MODIFIER 130 | * PRINTF_INT32_MODIFIER 131 | * PRINTF_INT16_MODIFIER 132 | * PRINTF_LEAST64_MODIFIER 133 | * PRINTF_LEAST32_MODIFIER 134 | * PRINTF_LEAST16_MODIFIER 135 | * PRINTF_INTPTR_MODIFIER 136 | * 137 | * are strings which have been defined as the modifiers required 138 | * for the "d", "u" and "x" printf formats to correctly output 139 | * (u)intmax_t, (u)int64_t, (u)int32_t, (u)int16_t, (u)least64_t, 140 | * (u)least32_t, (u)least16_t and (u)intptr_t types respectively. 141 | * PRINTF_INTPTR_MODIFIER is not defined for some systems which 142 | * provide their own stdint.h. PRINTF_INT64_MODIFIER is not 143 | * defined if INT64_MAX is not defined. These are an extension 144 | * beyond what C99 specifies must be in stdint.h. 145 | * 146 | * In addition, the following macros are defined: 147 | * 148 | * PRINTF_INTMAX_HEX_WIDTH 149 | * PRINTF_INT64_HEX_WIDTH 150 | * PRINTF_INT32_HEX_WIDTH 151 | * PRINTF_INT16_HEX_WIDTH 152 | * PRINTF_INT8_HEX_WIDTH 153 | * PRINTF_INTMAX_DEC_WIDTH 154 | * PRINTF_INT64_DEC_WIDTH 155 | * PRINTF_INT32_DEC_WIDTH 156 | * PRINTF_INT16_DEC_WIDTH 157 | * PRINTF_INT8_DEC_WIDTH 158 | * 159 | * Which specifies the maximum number of characters required to 160 | * print the number of that type in either hexadecimal or decimal. 161 | * These are an extension beyond what C99 specifies must be in 162 | * stdint.h. 163 | * 164 | * Compilers tested (all with 0 warnings at their highest respective 165 | * settings): Borland Turbo C 2.0, WATCOM C/C++ 11.0 (16 bits and 32 166 | * bits), Microsoft Visual C++ 6.0 (32 bit), Microsoft Visual Studio 167 | * .net (VC7), Intel C++ 4.0, GNU gcc v3.3.3 168 | * 169 | * This file should be considered a work in progress. Suggestions for 170 | * improvements, especially those which increase coverage are strongly 171 | * encouraged. 172 | * 173 | * Acknowledgements 174 | * 175 | * The following people have made significant contributions to the 176 | * development and testing of this file: 177 | * 178 | * Chris Howie 179 | * John Steele Scott 180 | * Dave Thorup 181 | * John Dill 182 | * Florian Wobbe 183 | * Christopher Sean Morrison 184 | * 185 | */ 186 | 187 | #include 188 | #include 189 | #include 190 | 191 | /* 192 | * For gcc with _STDINT_H, fill in the PRINTF_INT*_MODIFIER macros, and 193 | * do nothing else. On the Mac OS X version of gcc this is _STDINT_H_. 194 | */ 195 | 196 | #if ((defined(__STDC__) && __STDC__ && defined(__STDC_VERSION__) && __STDC_VERSION__ >= 199901L) || (defined (__WATCOMC__) && (defined (_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (__GNUC__ > 3 || defined(_STDINT_H) || defined(_STDINT_H_) || defined (__UINT_FAST64_TYPE__)) )) && !defined (_PSTDINT_H_INCLUDED) 197 | #include 198 | #define _PSTDINT_H_INCLUDED 199 | # if defined(__GNUC__) && (defined(__x86_64__) || defined(__ppc64__)) 200 | # ifndef PRINTF_INT64_MODIFIER 201 | # define PRINTF_INT64_MODIFIER "l" 202 | # endif 203 | # ifndef PRINTF_INT32_MODIFIER 204 | # define PRINTF_INT32_MODIFIER "" 205 | # endif 206 | # else 207 | # ifndef PRINTF_INT64_MODIFIER 208 | # define PRINTF_INT64_MODIFIER "ll" 209 | # endif 210 | # ifndef PRINTF_INT32_MODIFIER 211 | # define PRINTF_INT32_MODIFIER "l" 212 | # endif 213 | # endif 214 | # ifndef PRINTF_INT16_MODIFIER 215 | # define PRINTF_INT16_MODIFIER "h" 216 | # endif 217 | # ifndef PRINTF_INTMAX_MODIFIER 218 | # define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER 219 | # endif 220 | # ifndef PRINTF_INT64_HEX_WIDTH 221 | # define PRINTF_INT64_HEX_WIDTH "16" 222 | # endif 223 | # ifndef PRINTF_INT32_HEX_WIDTH 224 | # define PRINTF_INT32_HEX_WIDTH "8" 225 | # endif 226 | # ifndef PRINTF_INT16_HEX_WIDTH 227 | # define PRINTF_INT16_HEX_WIDTH "4" 228 | # endif 229 | # ifndef PRINTF_INT8_HEX_WIDTH 230 | # define PRINTF_INT8_HEX_WIDTH "2" 231 | # endif 232 | # ifndef PRINTF_INT64_DEC_WIDTH 233 | # define PRINTF_INT64_DEC_WIDTH "20" 234 | # endif 235 | # ifndef PRINTF_INT32_DEC_WIDTH 236 | # define PRINTF_INT32_DEC_WIDTH "10" 237 | # endif 238 | # ifndef PRINTF_INT16_DEC_WIDTH 239 | # define PRINTF_INT16_DEC_WIDTH "5" 240 | # endif 241 | # ifndef PRINTF_INT8_DEC_WIDTH 242 | # define PRINTF_INT8_DEC_WIDTH "3" 243 | # endif 244 | # ifndef PRINTF_INTMAX_HEX_WIDTH 245 | # define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH 246 | # endif 247 | # ifndef PRINTF_INTMAX_DEC_WIDTH 248 | # define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH 249 | # endif 250 | 251 | /* 252 | * Something really weird is going on with Open Watcom. Just pull some of 253 | * these duplicated definitions from Open Watcom's stdint.h file for now. 254 | */ 255 | 256 | # if defined (__WATCOMC__) && __WATCOMC__ >= 1250 257 | # if !defined (INT64_C) 258 | # define INT64_C(x) (x + (INT64_MAX - INT64_MAX)) 259 | # endif 260 | # if !defined (UINT64_C) 261 | # define UINT64_C(x) (x + (UINT64_MAX - UINT64_MAX)) 262 | # endif 263 | # if !defined (INT32_C) 264 | # define INT32_C(x) (x + (INT32_MAX - INT32_MAX)) 265 | # endif 266 | # if !defined (UINT32_C) 267 | # define UINT32_C(x) (x + (UINT32_MAX - UINT32_MAX)) 268 | # endif 269 | # if !defined (INT16_C) 270 | # define INT16_C(x) (x) 271 | # endif 272 | # if !defined (UINT16_C) 273 | # define UINT16_C(x) (x) 274 | # endif 275 | # if !defined (INT8_C) 276 | # define INT8_C(x) (x) 277 | # endif 278 | # if !defined (UINT8_C) 279 | # define UINT8_C(x) (x) 280 | # endif 281 | # if !defined (UINT64_MAX) 282 | # define UINT64_MAX 18446744073709551615ULL 283 | # endif 284 | # if !defined (INT64_MAX) 285 | # define INT64_MAX 9223372036854775807LL 286 | # endif 287 | # if !defined (UINT32_MAX) 288 | # define UINT32_MAX 4294967295UL 289 | # endif 290 | # if !defined (INT32_MAX) 291 | # define INT32_MAX 2147483647L 292 | # endif 293 | # if !defined (INTMAX_MAX) 294 | # define INTMAX_MAX INT64_MAX 295 | # endif 296 | # if !defined (INTMAX_MIN) 297 | # define INTMAX_MIN INT64_MIN 298 | # endif 299 | # endif 300 | #endif 301 | 302 | #ifndef _PSTDINT_H_INCLUDED 303 | #define _PSTDINT_H_INCLUDED 304 | 305 | #ifndef SIZE_MAX 306 | # define SIZE_MAX (~(size_t)0) 307 | #endif 308 | 309 | /* 310 | * Deduce the type assignments from limits.h under the assumption that 311 | * integer sizes in bits are powers of 2, and follow the ANSI 312 | * definitions. 313 | */ 314 | 315 | #ifndef UINT8_MAX 316 | # define UINT8_MAX 0xff 317 | #endif 318 | #if !defined(uint8_t) && !defined(_UINT8_T) 319 | # if (UCHAR_MAX == UINT8_MAX) || defined (S_SPLINT_S) 320 | typedef unsigned char uint8_t; 321 | # define UINT8_C(v) ((uint8_t) v) 322 | # else 323 | # error "Platform not supported" 324 | # endif 325 | #endif 326 | 327 | #ifndef INT8_MAX 328 | # define INT8_MAX 0x7f 329 | #endif 330 | #ifndef INT8_MIN 331 | # define INT8_MIN INT8_C(0x80) 332 | #endif 333 | #if !defined(int8_t) && !defined(_INT8_T) 334 | # if (SCHAR_MAX == INT8_MAX) || defined (S_SPLINT_S) 335 | typedef signed char int8_t; 336 | # define INT8_C(v) ((int8_t) v) 337 | # else 338 | # error "Platform not supported" 339 | # endif 340 | #endif 341 | 342 | #ifndef UINT16_MAX 343 | # define UINT16_MAX 0xffff 344 | #endif 345 | #if !defined(uint16_t) && !defined(_UINT16_T) 346 | #if (UINT_MAX == UINT16_MAX) || defined (S_SPLINT_S) 347 | typedef unsigned int uint16_t; 348 | # ifndef PRINTF_INT16_MODIFIER 349 | # define PRINTF_INT16_MODIFIER "" 350 | # endif 351 | # define UINT16_C(v) ((uint16_t) (v)) 352 | #elif (USHRT_MAX == UINT16_MAX) 353 | typedef unsigned short uint16_t; 354 | # define UINT16_C(v) ((uint16_t) (v)) 355 | # ifndef PRINTF_INT16_MODIFIER 356 | # define PRINTF_INT16_MODIFIER "h" 357 | # endif 358 | #else 359 | #error "Platform not supported" 360 | #endif 361 | #endif 362 | 363 | #ifndef INT16_MAX 364 | # define INT16_MAX 0x7fff 365 | #endif 366 | #ifndef INT16_MIN 367 | # define INT16_MIN INT16_C(0x8000) 368 | #endif 369 | #if !defined(int16_t) && !defined(_INT16_T) 370 | #if (INT_MAX == INT16_MAX) || defined (S_SPLINT_S) 371 | typedef signed int int16_t; 372 | # define INT16_C(v) ((int16_t) (v)) 373 | # ifndef PRINTF_INT16_MODIFIER 374 | # define PRINTF_INT16_MODIFIER "" 375 | # endif 376 | #elif (SHRT_MAX == INT16_MAX) 377 | typedef signed short int16_t; 378 | # define INT16_C(v) ((int16_t) (v)) 379 | # ifndef PRINTF_INT16_MODIFIER 380 | # define PRINTF_INT16_MODIFIER "h" 381 | # endif 382 | #else 383 | #error "Platform not supported" 384 | #endif 385 | #endif 386 | 387 | #ifndef UINT32_MAX 388 | # define UINT32_MAX (0xffffffffUL) 389 | #endif 390 | #if !defined(uint32_t) && !defined(_UINT32_T) 391 | #if (ULONG_MAX == UINT32_MAX) || defined (S_SPLINT_S) 392 | typedef unsigned long uint32_t; 393 | # define UINT32_C(v) v ## UL 394 | # ifndef PRINTF_INT32_MODIFIER 395 | # define PRINTF_INT32_MODIFIER "l" 396 | # endif 397 | #elif (UINT_MAX == UINT32_MAX) 398 | typedef unsigned int uint32_t; 399 | # ifndef PRINTF_INT32_MODIFIER 400 | # define PRINTF_INT32_MODIFIER "" 401 | # endif 402 | # define UINT32_C(v) v ## U 403 | #elif (USHRT_MAX == UINT32_MAX) 404 | typedef unsigned short uint32_t; 405 | # define UINT32_C(v) ((unsigned short) (v)) 406 | # ifndef PRINTF_INT32_MODIFIER 407 | # define PRINTF_INT32_MODIFIER "" 408 | # endif 409 | #else 410 | #error "Platform not supported" 411 | #endif 412 | #endif 413 | 414 | #ifndef INT32_MAX 415 | # define INT32_MAX (0x7fffffffL) 416 | #endif 417 | #ifndef INT32_MIN 418 | # define INT32_MIN INT32_C(0x80000000) 419 | #endif 420 | #if !defined(int32_t) && !defined(_INT32_T) 421 | #if (LONG_MAX == INT32_MAX) || defined (S_SPLINT_S) 422 | typedef signed long int32_t; 423 | # define INT32_C(v) v ## L 424 | # ifndef PRINTF_INT32_MODIFIER 425 | # define PRINTF_INT32_MODIFIER "l" 426 | # endif 427 | #elif (INT_MAX == INT32_MAX) 428 | typedef signed int int32_t; 429 | # define INT32_C(v) v 430 | # ifndef PRINTF_INT32_MODIFIER 431 | # define PRINTF_INT32_MODIFIER "" 432 | # endif 433 | #elif (SHRT_MAX == INT32_MAX) 434 | typedef signed short int32_t; 435 | # define INT32_C(v) ((short) (v)) 436 | # ifndef PRINTF_INT32_MODIFIER 437 | # define PRINTF_INT32_MODIFIER "" 438 | # endif 439 | #else 440 | #error "Platform not supported" 441 | #endif 442 | #endif 443 | 444 | /* 445 | * The macro stdint_int64_defined is temporarily used to record 446 | * whether or not 64 integer support is available. It must be 447 | * defined for any 64 integer extensions for new platforms that are 448 | * added. 449 | */ 450 | 451 | #undef stdint_int64_defined 452 | #if (defined(__STDC__) && defined(__STDC_VERSION__)) || defined (S_SPLINT_S) 453 | # if (__STDC__ && __STDC_VERSION__ >= 199901L) || defined (S_SPLINT_S) 454 | # define stdint_int64_defined 455 | typedef long long int64_t; 456 | typedef unsigned long long uint64_t; 457 | # define UINT64_C(v) v ## ULL 458 | # define INT64_C(v) v ## LL 459 | # ifndef PRINTF_INT64_MODIFIER 460 | # define PRINTF_INT64_MODIFIER "ll" 461 | # endif 462 | # endif 463 | #endif 464 | 465 | #if !defined (stdint_int64_defined) 466 | # if defined(__GNUC__) 467 | # define stdint_int64_defined 468 | __extension__ typedef long long int64_t; 469 | __extension__ typedef unsigned long long uint64_t; 470 | # define UINT64_C(v) v ## ULL 471 | # define INT64_C(v) v ## LL 472 | # ifndef PRINTF_INT64_MODIFIER 473 | # define PRINTF_INT64_MODIFIER "ll" 474 | # endif 475 | # elif defined(__MWERKS__) || defined (__SUNPRO_C) || defined (__SUNPRO_CC) || defined (__APPLE_CC__) || defined (_LONG_LONG) || defined (_CRAYC) || defined (S_SPLINT_S) 476 | # define stdint_int64_defined 477 | typedef long long int64_t; 478 | typedef unsigned long long uint64_t; 479 | # define UINT64_C(v) v ## ULL 480 | # define INT64_C(v) v ## LL 481 | # ifndef PRINTF_INT64_MODIFIER 482 | # define PRINTF_INT64_MODIFIER "ll" 483 | # endif 484 | # elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined (__BORLANDC__) && __BORLANDC__ > 0x460) || defined (__alpha) || defined (__DECC) 485 | # define stdint_int64_defined 486 | typedef __int64 int64_t; 487 | typedef unsigned __int64 uint64_t; 488 | # define UINT64_C(v) v ## UI64 489 | # define INT64_C(v) v ## I64 490 | # ifndef PRINTF_INT64_MODIFIER 491 | # define PRINTF_INT64_MODIFIER "I64" 492 | # endif 493 | # endif 494 | #endif 495 | 496 | #if !defined (LONG_LONG_MAX) && defined (INT64_C) 497 | # define LONG_LONG_MAX INT64_C (9223372036854775807) 498 | #endif 499 | #ifndef ULONG_LONG_MAX 500 | # define ULONG_LONG_MAX UINT64_C (18446744073709551615) 501 | #endif 502 | 503 | #if !defined (INT64_MAX) && defined (INT64_C) 504 | # define INT64_MAX INT64_C (9223372036854775807) 505 | #endif 506 | #if !defined (INT64_MIN) && defined (INT64_C) 507 | # define INT64_MIN INT64_C (-9223372036854775808) 508 | #endif 509 | #if !defined (UINT64_MAX) && defined (INT64_C) 510 | # define UINT64_MAX UINT64_C (18446744073709551615) 511 | #endif 512 | 513 | /* 514 | * Width of hexadecimal for number field. 515 | */ 516 | 517 | #ifndef PRINTF_INT64_HEX_WIDTH 518 | # define PRINTF_INT64_HEX_WIDTH "16" 519 | #endif 520 | #ifndef PRINTF_INT32_HEX_WIDTH 521 | # define PRINTF_INT32_HEX_WIDTH "8" 522 | #endif 523 | #ifndef PRINTF_INT16_HEX_WIDTH 524 | # define PRINTF_INT16_HEX_WIDTH "4" 525 | #endif 526 | #ifndef PRINTF_INT8_HEX_WIDTH 527 | # define PRINTF_INT8_HEX_WIDTH "2" 528 | #endif 529 | 530 | #ifndef PRINTF_INT64_DEC_WIDTH 531 | # define PRINTF_INT64_DEC_WIDTH "20" 532 | #endif 533 | #ifndef PRINTF_INT32_DEC_WIDTH 534 | # define PRINTF_INT32_DEC_WIDTH "10" 535 | #endif 536 | #ifndef PRINTF_INT16_DEC_WIDTH 537 | # define PRINTF_INT16_DEC_WIDTH "5" 538 | #endif 539 | #ifndef PRINTF_INT8_DEC_WIDTH 540 | # define PRINTF_INT8_DEC_WIDTH "3" 541 | #endif 542 | 543 | /* 544 | * Ok, lets not worry about 128 bit integers for now. Moore's law says 545 | * we don't need to worry about that until about 2040 at which point 546 | * we'll have bigger things to worry about. 547 | */ 548 | 549 | #ifdef stdint_int64_defined 550 | typedef int64_t intmax_t; 551 | typedef uint64_t uintmax_t; 552 | # define INTMAX_MAX INT64_MAX 553 | # define INTMAX_MIN INT64_MIN 554 | # define UINTMAX_MAX UINT64_MAX 555 | # define UINTMAX_C(v) UINT64_C(v) 556 | # define INTMAX_C(v) INT64_C(v) 557 | # ifndef PRINTF_INTMAX_MODIFIER 558 | # define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER 559 | # endif 560 | # ifndef PRINTF_INTMAX_HEX_WIDTH 561 | # define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH 562 | # endif 563 | # ifndef PRINTF_INTMAX_DEC_WIDTH 564 | # define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH 565 | # endif 566 | #else 567 | typedef int32_t intmax_t; 568 | typedef uint32_t uintmax_t; 569 | # define INTMAX_MAX INT32_MAX 570 | # define UINTMAX_MAX UINT32_MAX 571 | # define UINTMAX_C(v) UINT32_C(v) 572 | # define INTMAX_C(v) INT32_C(v) 573 | # ifndef PRINTF_INTMAX_MODIFIER 574 | # define PRINTF_INTMAX_MODIFIER PRINTF_INT32_MODIFIER 575 | # endif 576 | # ifndef PRINTF_INTMAX_HEX_WIDTH 577 | # define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT32_HEX_WIDTH 578 | # endif 579 | # ifndef PRINTF_INTMAX_DEC_WIDTH 580 | # define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT32_DEC_WIDTH 581 | # endif 582 | #endif 583 | 584 | /* 585 | * Because this file currently only supports platforms which have 586 | * precise powers of 2 as bit sizes for the default integers, the 587 | * least definitions are all trivial. Its possible that a future 588 | * version of this file could have different definitions. 589 | */ 590 | 591 | #ifndef stdint_least_defined 592 | typedef int8_t int_least8_t; 593 | typedef uint8_t uint_least8_t; 594 | typedef int16_t int_least16_t; 595 | typedef uint16_t uint_least16_t; 596 | typedef int32_t int_least32_t; 597 | typedef uint32_t uint_least32_t; 598 | # define PRINTF_LEAST32_MODIFIER PRINTF_INT32_MODIFIER 599 | # define PRINTF_LEAST16_MODIFIER PRINTF_INT16_MODIFIER 600 | # define UINT_LEAST8_MAX UINT8_MAX 601 | # define INT_LEAST8_MAX INT8_MAX 602 | # define UINT_LEAST16_MAX UINT16_MAX 603 | # define INT_LEAST16_MAX INT16_MAX 604 | # define UINT_LEAST32_MAX UINT32_MAX 605 | # define INT_LEAST32_MAX INT32_MAX 606 | # define INT_LEAST8_MIN INT8_MIN 607 | # define INT_LEAST16_MIN INT16_MIN 608 | # define INT_LEAST32_MIN INT32_MIN 609 | # ifdef stdint_int64_defined 610 | typedef int64_t int_least64_t; 611 | typedef uint64_t uint_least64_t; 612 | # define PRINTF_LEAST64_MODIFIER PRINTF_INT64_MODIFIER 613 | # define UINT_LEAST64_MAX UINT64_MAX 614 | # define INT_LEAST64_MAX INT64_MAX 615 | # define INT_LEAST64_MIN INT64_MIN 616 | # endif 617 | #endif 618 | #undef stdint_least_defined 619 | 620 | /* 621 | * The ANSI C committee pretending to know or specify anything about 622 | * performance is the epitome of misguided arrogance. The mandate of 623 | * this file is to *ONLY* ever support that absolute minimum 624 | * definition of the fast integer types, for compatibility purposes. 625 | * No extensions, and no attempt to suggest what may or may not be a 626 | * faster integer type will ever be made in this file. Developers are 627 | * warned to stay away from these types when using this or any other 628 | * stdint.h. 629 | */ 630 | 631 | typedef int_least8_t int_fast8_t; 632 | typedef uint_least8_t uint_fast8_t; 633 | typedef int_least16_t int_fast16_t; 634 | typedef uint_least16_t uint_fast16_t; 635 | typedef int_least32_t int_fast32_t; 636 | typedef uint_least32_t uint_fast32_t; 637 | #define UINT_FAST8_MAX UINT_LEAST8_MAX 638 | #define INT_FAST8_MAX INT_LEAST8_MAX 639 | #define UINT_FAST16_MAX UINT_LEAST16_MAX 640 | #define INT_FAST16_MAX INT_LEAST16_MAX 641 | #define UINT_FAST32_MAX UINT_LEAST32_MAX 642 | #define INT_FAST32_MAX INT_LEAST32_MAX 643 | #define INT_FAST8_MIN INT_LEAST8_MIN 644 | #define INT_FAST16_MIN INT_LEAST16_MIN 645 | #define INT_FAST32_MIN INT_LEAST32_MIN 646 | #ifdef stdint_int64_defined 647 | typedef int_least64_t int_fast64_t; 648 | typedef uint_least64_t uint_fast64_t; 649 | # define UINT_FAST64_MAX UINT_LEAST64_MAX 650 | # define INT_FAST64_MAX INT_LEAST64_MAX 651 | # define INT_FAST64_MIN INT_LEAST64_MIN 652 | #endif 653 | 654 | #undef stdint_int64_defined 655 | 656 | /* 657 | * Whatever piecemeal, per compiler thing we can do about the wchar_t 658 | * type limits. 659 | */ 660 | 661 | #if defined(__WATCOMC__) || defined(_MSC_VER) || defined (__GNUC__) 662 | # include 663 | # ifndef WCHAR_MIN 664 | # define WCHAR_MIN 0 665 | # endif 666 | # ifndef WCHAR_MAX 667 | # define WCHAR_MAX ((wchar_t)-1) 668 | # endif 669 | #endif 670 | 671 | /* 672 | * Whatever piecemeal, per compiler/platform thing we can do about the 673 | * (u)intptr_t types and limits. 674 | */ 675 | 676 | #if (defined (_MSC_VER) && defined (_UINTPTR_T_DEFINED)) || defined (_UINTPTR_T) 677 | # define STDINT_H_UINTPTR_T_DEFINED 678 | #endif 679 | 680 | #ifndef STDINT_H_UINTPTR_T_DEFINED 681 | # if defined (__alpha__) || defined (__ia64__) || defined (__x86_64__) || defined (_WIN64) || defined (__ppc64__) 682 | # define stdint_intptr_bits 64 683 | # elif defined (__WATCOMC__) || defined (__TURBOC__) 684 | # if defined(__TINY__) || defined(__SMALL__) || defined(__MEDIUM__) 685 | # define stdint_intptr_bits 16 686 | # else 687 | # define stdint_intptr_bits 32 688 | # endif 689 | # elif defined (__i386__) || defined (_WIN32) || defined (WIN32) || defined (__ppc64__) 690 | # define stdint_intptr_bits 32 691 | # elif defined (__INTEL_COMPILER) 692 | /* TODO -- what did Intel do about x86-64? */ 693 | # else 694 | /* #error "This platform might not be supported yet" */ 695 | # endif 696 | 697 | # ifdef stdint_intptr_bits 698 | # define stdint_intptr_glue3_i(a,b,c) a##b##c 699 | # define stdint_intptr_glue3(a,b,c) stdint_intptr_glue3_i(a,b,c) 700 | # ifndef PRINTF_INTPTR_MODIFIER 701 | # define PRINTF_INTPTR_MODIFIER stdint_intptr_glue3(PRINTF_INT,stdint_intptr_bits,_MODIFIER) 702 | # endif 703 | # ifndef PTRDIFF_MAX 704 | # define PTRDIFF_MAX stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX) 705 | # endif 706 | # ifndef PTRDIFF_MIN 707 | # define PTRDIFF_MIN stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN) 708 | # endif 709 | # ifndef UINTPTR_MAX 710 | # define UINTPTR_MAX stdint_intptr_glue3(UINT,stdint_intptr_bits,_MAX) 711 | # endif 712 | # ifndef INTPTR_MAX 713 | # define INTPTR_MAX stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX) 714 | # endif 715 | # ifndef INTPTR_MIN 716 | # define INTPTR_MIN stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN) 717 | # endif 718 | # ifndef INTPTR_C 719 | # define INTPTR_C(x) stdint_intptr_glue3(INT,stdint_intptr_bits,_C)(x) 720 | # endif 721 | # ifndef UINTPTR_C 722 | # define UINTPTR_C(x) stdint_intptr_glue3(UINT,stdint_intptr_bits,_C)(x) 723 | # endif 724 | typedef stdint_intptr_glue3(uint,stdint_intptr_bits,_t) uintptr_t; 725 | typedef stdint_intptr_glue3( int,stdint_intptr_bits,_t) intptr_t; 726 | # else 727 | /* TODO -- This following is likely wrong for some platforms, and does 728 | nothing for the definition of uintptr_t. */ 729 | typedef ptrdiff_t intptr_t; 730 | # endif 731 | # define STDINT_H_UINTPTR_T_DEFINED 732 | #endif 733 | 734 | /* 735 | * Assumes sig_atomic_t is signed and we have a 2s complement machine. 736 | */ 737 | 738 | #ifndef SIG_ATOMIC_MAX 739 | # define SIG_ATOMIC_MAX ((((sig_atomic_t) 1) << (sizeof (sig_atomic_t)*CHAR_BIT-1)) - 1) 740 | #endif 741 | 742 | #endif 743 | 744 | #if defined (__TEST_PSTDINT_FOR_CORRECTNESS) 745 | 746 | /* 747 | * Please compile with the maximum warning settings to make sure macros are not 748 | * defined more than once. 749 | */ 750 | 751 | #include 752 | #include 753 | #include 754 | 755 | #define glue3_aux(x,y,z) x ## y ## z 756 | #define glue3(x,y,z) glue3_aux(x,y,z) 757 | 758 | #define DECLU(bits) glue3(uint,bits,_t) glue3(u,bits,) = glue3(UINT,bits,_C) (0); 759 | #define DECLI(bits) glue3(int,bits,_t) glue3(i,bits,) = glue3(INT,bits,_C) (0); 760 | 761 | #define DECL(us,bits) glue3(DECL,us,) (bits) 762 | 763 | #define TESTUMAX(bits) glue3(u,bits,) = ~glue3(u,bits,); if (glue3(UINT,bits,_MAX) != glue3(u,bits,)) printf ("Something wrong with UINT%d_MAX\n", bits) 764 | 765 | int main () { 766 | DECL(I,8) 767 | DECL(U,8) 768 | DECL(I,16) 769 | DECL(U,16) 770 | DECL(I,32) 771 | DECL(U,32) 772 | #ifdef INT64_MAX 773 | DECL(I,64) 774 | DECL(U,64) 775 | #endif 776 | intmax_t imax = INTMAX_C(0); 777 | uintmax_t umax = UINTMAX_C(0); 778 | char str0[256], str1[256]; 779 | 780 | sprintf (str0, "%d %x\n", 0, ~0); 781 | 782 | sprintf (str1, "%d %x\n", i8, ~0); 783 | if (0 != strcmp (str0, str1)) printf ("Something wrong with i8 : %s\n", str1); 784 | sprintf (str1, "%u %x\n", u8, ~0); 785 | if (0 != strcmp (str0, str1)) printf ("Something wrong with u8 : %s\n", str1); 786 | sprintf (str1, "%d %x\n", i16, ~0); 787 | if (0 != strcmp (str0, str1)) printf ("Something wrong with i16 : %s\n", str1); 788 | sprintf (str1, "%u %x\n", u16, ~0); 789 | if (0 != strcmp (str0, str1)) printf ("Something wrong with u16 : %s\n", str1); 790 | sprintf (str1, "%" PRINTF_INT32_MODIFIER "d %x\n", i32, ~0); 791 | if (0 != strcmp (str0, str1)) printf ("Something wrong with i32 : %s\n", str1); 792 | sprintf (str1, "%" PRINTF_INT32_MODIFIER "u %x\n", u32, ~0); 793 | if (0 != strcmp (str0, str1)) printf ("Something wrong with u32 : %s\n", str1); 794 | #ifdef INT64_MAX 795 | sprintf (str1, "%" PRINTF_INT64_MODIFIER "d %x\n", i64, ~0); 796 | if (0 != strcmp (str0, str1)) printf ("Something wrong with i64 : %s\n", str1); 797 | #endif 798 | sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "d %x\n", imax, ~0); 799 | if (0 != strcmp (str0, str1)) printf ("Something wrong with imax : %s\n", str1); 800 | sprintf (str1, "%" PRINTF_INTMAX_MODIFIER "u %x\n", umax, ~0); 801 | if (0 != strcmp (str0, str1)) printf ("Something wrong with umax : %s\n", str1); 802 | 803 | TESTUMAX(8); 804 | TESTUMAX(16); 805 | TESTUMAX(32); 806 | #ifdef INT64_MAX 807 | TESTUMAX(64); 808 | #endif 809 | 810 | return EXIT_SUCCESS; 811 | } 812 | 813 | #endif 814 | -------------------------------------------------------------------------------- /src/chat_trie.c: -------------------------------------------------------------------------------- 1 | /* Generated by Cython 0.23.4 */ 2 | 3 | #define PY_SSIZE_T_CLEAN 4 | #include "Python.h" 5 | #ifndef Py_PYTHON_H 6 | #error Python headers needed to compile C extensions, please install development version of Python. 7 | #elif PY_VERSION_HEX < 0x02060000 || (0x03000000 <= PY_VERSION_HEX && PY_VERSION_HEX < 0x03020000) 8 | #error Cython requires Python 2.6+ or Python 3.2+. 9 | #else 10 | #define CYTHON_ABI "0_23_4" 11 | #include 12 | #ifndef offsetof 13 | #define offsetof(type, member) ( (size_t) & ((type*)0) -> member ) 14 | #endif 15 | #if !defined(WIN32) && !defined(MS_WINDOWS) 16 | #ifndef __stdcall 17 | #define __stdcall 18 | #endif 19 | #ifndef __cdecl 20 | #define __cdecl 21 | #endif 22 | #ifndef __fastcall 23 | #define __fastcall 24 | #endif 25 | #endif 26 | #ifndef DL_IMPORT 27 | #define DL_IMPORT(t) t 28 | #endif 29 | #ifndef DL_EXPORT 30 | #define DL_EXPORT(t) t 31 | #endif 32 | #ifndef PY_LONG_LONG 33 | #define PY_LONG_LONG LONG_LONG 34 | #endif 35 | #ifndef Py_HUGE_VAL 36 | #define Py_HUGE_VAL HUGE_VAL 37 | #endif 38 | #ifdef PYPY_VERSION 39 | #define CYTHON_COMPILING_IN_PYPY 1 40 | #define CYTHON_COMPILING_IN_CPYTHON 0 41 | #else 42 | #define CYTHON_COMPILING_IN_PYPY 0 43 | #define CYTHON_COMPILING_IN_CPYTHON 1 44 | #endif 45 | #if !defined(CYTHON_USE_PYLONG_INTERNALS) && CYTHON_COMPILING_IN_CPYTHON && PY_VERSION_HEX >= 0x02070000 46 | #define CYTHON_USE_PYLONG_INTERNALS 1 47 | #endif 48 | #if CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX < 0x02070600 && !defined(Py_OptimizeFlag) 49 | #define Py_OptimizeFlag 0 50 | #endif 51 | #define __PYX_BUILD_PY_SSIZE_T "n" 52 | #define CYTHON_FORMAT_SSIZE_T "z" 53 | #if PY_MAJOR_VERSION < 3 54 | #define __Pyx_BUILTIN_MODULE_NAME "__builtin__" 55 | #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ 56 | PyCode_New(a+k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) 57 | #define __Pyx_DefaultClassType PyClass_Type 58 | #else 59 | #define __Pyx_BUILTIN_MODULE_NAME "builtins" 60 | #define __Pyx_PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos)\ 61 | PyCode_New(a, k, l, s, f, code, c, n, v, fv, cell, fn, name, fline, lnos) 62 | #define __Pyx_DefaultClassType PyType_Type 63 | #endif 64 | #ifndef Py_TPFLAGS_CHECKTYPES 65 | #define Py_TPFLAGS_CHECKTYPES 0 66 | #endif 67 | #ifndef Py_TPFLAGS_HAVE_INDEX 68 | #define Py_TPFLAGS_HAVE_INDEX 0 69 | #endif 70 | #ifndef Py_TPFLAGS_HAVE_NEWBUFFER 71 | #define Py_TPFLAGS_HAVE_NEWBUFFER 0 72 | #endif 73 | #ifndef Py_TPFLAGS_HAVE_FINALIZE 74 | #define Py_TPFLAGS_HAVE_FINALIZE 0 75 | #endif 76 | #if PY_VERSION_HEX > 0x03030000 && defined(PyUnicode_KIND) 77 | #define CYTHON_PEP393_ENABLED 1 78 | #define __Pyx_PyUnicode_READY(op) (likely(PyUnicode_IS_READY(op)) ?\ 79 | 0 : _PyUnicode_Ready((PyObject *)(op))) 80 | #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_LENGTH(u) 81 | #define __Pyx_PyUnicode_READ_CHAR(u, i) PyUnicode_READ_CHAR(u, i) 82 | #define __Pyx_PyUnicode_KIND(u) PyUnicode_KIND(u) 83 | #define __Pyx_PyUnicode_DATA(u) PyUnicode_DATA(u) 84 | #define __Pyx_PyUnicode_READ(k, d, i) PyUnicode_READ(k, d, i) 85 | #else 86 | #define CYTHON_PEP393_ENABLED 0 87 | #define __Pyx_PyUnicode_READY(op) (0) 88 | #define __Pyx_PyUnicode_GET_LENGTH(u) PyUnicode_GET_SIZE(u) 89 | #define __Pyx_PyUnicode_READ_CHAR(u, i) ((Py_UCS4)(PyUnicode_AS_UNICODE(u)[i])) 90 | #define __Pyx_PyUnicode_KIND(u) (sizeof(Py_UNICODE)) 91 | #define __Pyx_PyUnicode_DATA(u) ((void*)PyUnicode_AS_UNICODE(u)) 92 | #define __Pyx_PyUnicode_READ(k, d, i) ((void)(k), (Py_UCS4)(((Py_UNICODE*)d)[i])) 93 | #endif 94 | #if CYTHON_COMPILING_IN_PYPY 95 | #define __Pyx_PyUnicode_Concat(a, b) PyNumber_Add(a, b) 96 | #define __Pyx_PyUnicode_ConcatSafe(a, b) PyNumber_Add(a, b) 97 | #else 98 | #define __Pyx_PyUnicode_Concat(a, b) PyUnicode_Concat(a, b) 99 | #define __Pyx_PyUnicode_ConcatSafe(a, b) ((unlikely((a) == Py_None) || unlikely((b) == Py_None)) ?\ 100 | PyNumber_Add(a, b) : __Pyx_PyUnicode_Concat(a, b)) 101 | #endif 102 | #if CYTHON_COMPILING_IN_PYPY && !defined(PyUnicode_Contains) 103 | #define PyUnicode_Contains(u, s) PySequence_Contains(u, s) 104 | #endif 105 | #define __Pyx_PyString_FormatSafe(a, b) ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : __Pyx_PyString_Format(a, b)) 106 | #define __Pyx_PyUnicode_FormatSafe(a, b) ((unlikely((a) == Py_None)) ? PyNumber_Remainder(a, b) : PyUnicode_Format(a, b)) 107 | #if PY_MAJOR_VERSION >= 3 108 | #define __Pyx_PyString_Format(a, b) PyUnicode_Format(a, b) 109 | #else 110 | #define __Pyx_PyString_Format(a, b) PyString_Format(a, b) 111 | #endif 112 | #if PY_MAJOR_VERSION >= 3 113 | #define PyBaseString_Type PyUnicode_Type 114 | #define PyStringObject PyUnicodeObject 115 | #define PyString_Type PyUnicode_Type 116 | #define PyString_Check PyUnicode_Check 117 | #define PyString_CheckExact PyUnicode_CheckExact 118 | #endif 119 | #if PY_MAJOR_VERSION >= 3 120 | #define __Pyx_PyBaseString_Check(obj) PyUnicode_Check(obj) 121 | #define __Pyx_PyBaseString_CheckExact(obj) PyUnicode_CheckExact(obj) 122 | #else 123 | #define __Pyx_PyBaseString_Check(obj) (PyString_Check(obj) || PyUnicode_Check(obj)) 124 | #define __Pyx_PyBaseString_CheckExact(obj) (PyString_CheckExact(obj) || PyUnicode_CheckExact(obj)) 125 | #endif 126 | #ifndef PySet_CheckExact 127 | #define PySet_CheckExact(obj) (Py_TYPE(obj) == &PySet_Type) 128 | #endif 129 | #define __Pyx_TypeCheck(obj, type) PyObject_TypeCheck(obj, (PyTypeObject *)type) 130 | #if PY_MAJOR_VERSION >= 3 131 | #define PyIntObject PyLongObject 132 | #define PyInt_Type PyLong_Type 133 | #define PyInt_Check(op) PyLong_Check(op) 134 | #define PyInt_CheckExact(op) PyLong_CheckExact(op) 135 | #define PyInt_FromString PyLong_FromString 136 | #define PyInt_FromUnicode PyLong_FromUnicode 137 | #define PyInt_FromLong PyLong_FromLong 138 | #define PyInt_FromSize_t PyLong_FromSize_t 139 | #define PyInt_FromSsize_t PyLong_FromSsize_t 140 | #define PyInt_AsLong PyLong_AsLong 141 | #define PyInt_AS_LONG PyLong_AS_LONG 142 | #define PyInt_AsSsize_t PyLong_AsSsize_t 143 | #define PyInt_AsUnsignedLongMask PyLong_AsUnsignedLongMask 144 | #define PyInt_AsUnsignedLongLongMask PyLong_AsUnsignedLongLongMask 145 | #define PyNumber_Int PyNumber_Long 146 | #endif 147 | #if PY_MAJOR_VERSION >= 3 148 | #define PyBoolObject PyLongObject 149 | #endif 150 | #if PY_MAJOR_VERSION >= 3 && CYTHON_COMPILING_IN_PYPY 151 | #ifndef PyUnicode_InternFromString 152 | #define PyUnicode_InternFromString(s) PyUnicode_FromString(s) 153 | #endif 154 | #endif 155 | #if PY_VERSION_HEX < 0x030200A4 156 | typedef long Py_hash_t; 157 | #define __Pyx_PyInt_FromHash_t PyInt_FromLong 158 | #define __Pyx_PyInt_AsHash_t PyInt_AsLong 159 | #else 160 | #define __Pyx_PyInt_FromHash_t PyInt_FromSsize_t 161 | #define __Pyx_PyInt_AsHash_t PyInt_AsSsize_t 162 | #endif 163 | #if PY_MAJOR_VERSION >= 3 164 | #define __Pyx_PyMethod_New(func, self, klass) ((self) ? PyMethod_New(func, self) : PyInstanceMethod_New(func)) 165 | #else 166 | #define __Pyx_PyMethod_New(func, self, klass) PyMethod_New(func, self, klass) 167 | #endif 168 | #if PY_VERSION_HEX >= 0x030500B1 169 | #define __Pyx_PyAsyncMethodsStruct PyAsyncMethods 170 | #define __Pyx_PyType_AsAsync(obj) (Py_TYPE(obj)->tp_as_async) 171 | #elif CYTHON_COMPILING_IN_CPYTHON && PY_MAJOR_VERSION >= 3 172 | typedef struct { 173 | unaryfunc am_await; 174 | unaryfunc am_aiter; 175 | unaryfunc am_anext; 176 | } __Pyx_PyAsyncMethodsStruct; 177 | #define __Pyx_PyType_AsAsync(obj) ((__Pyx_PyAsyncMethodsStruct*) (Py_TYPE(obj)->tp_reserved)) 178 | #else 179 | #define __Pyx_PyType_AsAsync(obj) NULL 180 | #endif 181 | #ifndef CYTHON_RESTRICT 182 | #if defined(__GNUC__) 183 | #define CYTHON_RESTRICT __restrict__ 184 | #elif defined(_MSC_VER) && _MSC_VER >= 1400 185 | #define CYTHON_RESTRICT __restrict 186 | #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L 187 | #define CYTHON_RESTRICT restrict 188 | #else 189 | #define CYTHON_RESTRICT 190 | #endif 191 | #endif 192 | #define __Pyx_void_to_None(void_result) ((void)(void_result), Py_INCREF(Py_None), Py_None) 193 | 194 | #ifndef CYTHON_INLINE 195 | #if defined(__GNUC__) 196 | #define CYTHON_INLINE __inline__ 197 | #elif defined(_MSC_VER) 198 | #define CYTHON_INLINE __inline 199 | #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L 200 | #define CYTHON_INLINE inline 201 | #else 202 | #define CYTHON_INLINE 203 | #endif 204 | #endif 205 | 206 | #if defined(WIN32) || defined(MS_WINDOWS) 207 | #define _USE_MATH_DEFINES 208 | #endif 209 | #include 210 | #ifdef NAN 211 | #define __PYX_NAN() ((float) NAN) 212 | #else 213 | static CYTHON_INLINE float __PYX_NAN() { 214 | float value; 215 | memset(&value, 0xFF, sizeof(value)); 216 | return value; 217 | } 218 | #endif 219 | 220 | 221 | #if PY_MAJOR_VERSION >= 3 222 | #define __Pyx_PyNumber_Divide(x,y) PyNumber_TrueDivide(x,y) 223 | #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceTrueDivide(x,y) 224 | #else 225 | #define __Pyx_PyNumber_Divide(x,y) PyNumber_Divide(x,y) 226 | #define __Pyx_PyNumber_InPlaceDivide(x,y) PyNumber_InPlaceDivide(x,y) 227 | #endif 228 | 229 | #ifndef __PYX_EXTERN_C 230 | #ifdef __cplusplus 231 | #define __PYX_EXTERN_C extern "C" 232 | #else 233 | #define __PYX_EXTERN_C extern 234 | #endif 235 | #endif 236 | 237 | #define __PYX_HAVE__chat_trie 238 | #define __PYX_HAVE_API__chat_trie 239 | #include "../hat-trie/src/hat-trie.h" 240 | #ifdef _OPENMP 241 | #include 242 | #endif /* _OPENMP */ 243 | 244 | #ifdef PYREX_WITHOUT_ASSERTIONS 245 | #define CYTHON_WITHOUT_ASSERTIONS 246 | #endif 247 | 248 | #ifndef CYTHON_UNUSED 249 | # if defined(__GNUC__) 250 | # if !(defined(__cplusplus)) || (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) 251 | # define CYTHON_UNUSED __attribute__ ((__unused__)) 252 | # else 253 | # define CYTHON_UNUSED 254 | # endif 255 | # elif defined(__ICC) || (defined(__INTEL_COMPILER) && !defined(_MSC_VER)) 256 | # define CYTHON_UNUSED __attribute__ ((__unused__)) 257 | # else 258 | # define CYTHON_UNUSED 259 | # endif 260 | #endif 261 | #ifndef CYTHON_NCP_UNUSED 262 | # if CYTHON_COMPILING_IN_CPYTHON 263 | # define CYTHON_NCP_UNUSED 264 | # else 265 | # define CYTHON_NCP_UNUSED CYTHON_UNUSED 266 | # endif 267 | #endif 268 | typedef struct {PyObject **p; char *s; const Py_ssize_t n; const char* encoding; 269 | const char is_unicode; const char is_str; const char intern; } __Pyx_StringTabEntry; 270 | 271 | #define __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 0 272 | #define __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 0 273 | #define __PYX_DEFAULT_STRING_ENCODING "" 274 | #define __Pyx_PyObject_FromString __Pyx_PyBytes_FromString 275 | #define __Pyx_PyObject_FromStringAndSize __Pyx_PyBytes_FromStringAndSize 276 | #define __Pyx_uchar_cast(c) ((unsigned char)c) 277 | #define __Pyx_long_cast(x) ((long)x) 278 | #define __Pyx_fits_Py_ssize_t(v, type, is_signed) (\ 279 | (sizeof(type) < sizeof(Py_ssize_t)) ||\ 280 | (sizeof(type) > sizeof(Py_ssize_t) &&\ 281 | likely(v < (type)PY_SSIZE_T_MAX ||\ 282 | v == (type)PY_SSIZE_T_MAX) &&\ 283 | (!is_signed || likely(v > (type)PY_SSIZE_T_MIN ||\ 284 | v == (type)PY_SSIZE_T_MIN))) ||\ 285 | (sizeof(type) == sizeof(Py_ssize_t) &&\ 286 | (is_signed || likely(v < (type)PY_SSIZE_T_MAX ||\ 287 | v == (type)PY_SSIZE_T_MAX))) ) 288 | #if defined (__cplusplus) && __cplusplus >= 201103L 289 | #include 290 | #define __Pyx_sst_abs(value) std::abs(value) 291 | #elif SIZEOF_INT >= SIZEOF_SIZE_T 292 | #define __Pyx_sst_abs(value) abs(value) 293 | #elif SIZEOF_LONG >= SIZEOF_SIZE_T 294 | #define __Pyx_sst_abs(value) labs(value) 295 | #elif defined (_MSC_VER) && defined (_M_X64) 296 | #define __Pyx_sst_abs(value) _abs64(value) 297 | #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L 298 | #define __Pyx_sst_abs(value) llabs(value) 299 | #elif defined (__GNUC__) 300 | #define __Pyx_sst_abs(value) __builtin_llabs(value) 301 | #else 302 | #define __Pyx_sst_abs(value) ((value<0) ? -value : value) 303 | #endif 304 | static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject*); 305 | static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject*, Py_ssize_t* length); 306 | #define __Pyx_PyByteArray_FromString(s) PyByteArray_FromStringAndSize((const char*)s, strlen((const char*)s)) 307 | #define __Pyx_PyByteArray_FromStringAndSize(s, l) PyByteArray_FromStringAndSize((const char*)s, l) 308 | #define __Pyx_PyBytes_FromString PyBytes_FromString 309 | #define __Pyx_PyBytes_FromStringAndSize PyBytes_FromStringAndSize 310 | static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char*); 311 | #if PY_MAJOR_VERSION < 3 312 | #define __Pyx_PyStr_FromString __Pyx_PyBytes_FromString 313 | #define __Pyx_PyStr_FromStringAndSize __Pyx_PyBytes_FromStringAndSize 314 | #else 315 | #define __Pyx_PyStr_FromString __Pyx_PyUnicode_FromString 316 | #define __Pyx_PyStr_FromStringAndSize __Pyx_PyUnicode_FromStringAndSize 317 | #endif 318 | #define __Pyx_PyObject_AsSString(s) ((signed char*) __Pyx_PyObject_AsString(s)) 319 | #define __Pyx_PyObject_AsUString(s) ((unsigned char*) __Pyx_PyObject_AsString(s)) 320 | #define __Pyx_PyObject_FromCString(s) __Pyx_PyObject_FromString((const char*)s) 321 | #define __Pyx_PyBytes_FromCString(s) __Pyx_PyBytes_FromString((const char*)s) 322 | #define __Pyx_PyByteArray_FromCString(s) __Pyx_PyByteArray_FromString((const char*)s) 323 | #define __Pyx_PyStr_FromCString(s) __Pyx_PyStr_FromString((const char*)s) 324 | #define __Pyx_PyUnicode_FromCString(s) __Pyx_PyUnicode_FromString((const char*)s) 325 | #if PY_MAJOR_VERSION < 3 326 | static CYTHON_INLINE size_t __Pyx_Py_UNICODE_strlen(const Py_UNICODE *u) 327 | { 328 | const Py_UNICODE *u_end = u; 329 | while (*u_end++) ; 330 | return (size_t)(u_end - u - 1); 331 | } 332 | #else 333 | #define __Pyx_Py_UNICODE_strlen Py_UNICODE_strlen 334 | #endif 335 | #define __Pyx_PyUnicode_FromUnicode(u) PyUnicode_FromUnicode(u, __Pyx_Py_UNICODE_strlen(u)) 336 | #define __Pyx_PyUnicode_FromUnicodeAndLength PyUnicode_FromUnicode 337 | #define __Pyx_PyUnicode_AsUnicode PyUnicode_AsUnicode 338 | #define __Pyx_NewRef(obj) (Py_INCREF(obj), obj) 339 | #define __Pyx_Owned_Py_None(b) __Pyx_NewRef(Py_None) 340 | #define __Pyx_PyBool_FromLong(b) ((b) ? __Pyx_NewRef(Py_True) : __Pyx_NewRef(Py_False)) 341 | static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject*); 342 | static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x); 343 | static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject*); 344 | static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t); 345 | #if CYTHON_COMPILING_IN_CPYTHON 346 | #define __pyx_PyFloat_AsDouble(x) (PyFloat_CheckExact(x) ? PyFloat_AS_DOUBLE(x) : PyFloat_AsDouble(x)) 347 | #else 348 | #define __pyx_PyFloat_AsDouble(x) PyFloat_AsDouble(x) 349 | #endif 350 | #define __pyx_PyFloat_AsFloat(x) ((float) __pyx_PyFloat_AsDouble(x)) 351 | #if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 352 | static int __Pyx_sys_getdefaultencoding_not_ascii; 353 | static int __Pyx_init_sys_getdefaultencoding_params(void) { 354 | PyObject* sys; 355 | PyObject* default_encoding = NULL; 356 | PyObject* ascii_chars_u = NULL; 357 | PyObject* ascii_chars_b = NULL; 358 | const char* default_encoding_c; 359 | sys = PyImport_ImportModule("sys"); 360 | if (!sys) goto bad; 361 | default_encoding = PyObject_CallMethod(sys, (char*) "getdefaultencoding", NULL); 362 | Py_DECREF(sys); 363 | if (!default_encoding) goto bad; 364 | default_encoding_c = PyBytes_AsString(default_encoding); 365 | if (!default_encoding_c) goto bad; 366 | if (strcmp(default_encoding_c, "ascii") == 0) { 367 | __Pyx_sys_getdefaultencoding_not_ascii = 0; 368 | } else { 369 | char ascii_chars[128]; 370 | int c; 371 | for (c = 0; c < 128; c++) { 372 | ascii_chars[c] = c; 373 | } 374 | __Pyx_sys_getdefaultencoding_not_ascii = 1; 375 | ascii_chars_u = PyUnicode_DecodeASCII(ascii_chars, 128, NULL); 376 | if (!ascii_chars_u) goto bad; 377 | ascii_chars_b = PyUnicode_AsEncodedString(ascii_chars_u, default_encoding_c, NULL); 378 | if (!ascii_chars_b || !PyBytes_Check(ascii_chars_b) || memcmp(ascii_chars, PyBytes_AS_STRING(ascii_chars_b), 128) != 0) { 379 | PyErr_Format( 380 | PyExc_ValueError, 381 | "This module compiled with c_string_encoding=ascii, but default encoding '%.200s' is not a superset of ascii.", 382 | default_encoding_c); 383 | goto bad; 384 | } 385 | Py_DECREF(ascii_chars_u); 386 | Py_DECREF(ascii_chars_b); 387 | } 388 | Py_DECREF(default_encoding); 389 | return 0; 390 | bad: 391 | Py_XDECREF(default_encoding); 392 | Py_XDECREF(ascii_chars_u); 393 | Py_XDECREF(ascii_chars_b); 394 | return -1; 395 | } 396 | #endif 397 | #if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT && PY_MAJOR_VERSION >= 3 398 | #define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_DecodeUTF8(c_str, size, NULL) 399 | #else 400 | #define __Pyx_PyUnicode_FromStringAndSize(c_str, size) PyUnicode_Decode(c_str, size, __PYX_DEFAULT_STRING_ENCODING, NULL) 401 | #if __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT 402 | static char* __PYX_DEFAULT_STRING_ENCODING; 403 | static int __Pyx_init_sys_getdefaultencoding_params(void) { 404 | PyObject* sys; 405 | PyObject* default_encoding = NULL; 406 | char* default_encoding_c; 407 | sys = PyImport_ImportModule("sys"); 408 | if (!sys) goto bad; 409 | default_encoding = PyObject_CallMethod(sys, (char*) (const char*) "getdefaultencoding", NULL); 410 | Py_DECREF(sys); 411 | if (!default_encoding) goto bad; 412 | default_encoding_c = PyBytes_AsString(default_encoding); 413 | if (!default_encoding_c) goto bad; 414 | __PYX_DEFAULT_STRING_ENCODING = (char*) malloc(strlen(default_encoding_c)); 415 | if (!__PYX_DEFAULT_STRING_ENCODING) goto bad; 416 | strcpy(__PYX_DEFAULT_STRING_ENCODING, default_encoding_c); 417 | Py_DECREF(default_encoding); 418 | return 0; 419 | bad: 420 | Py_XDECREF(default_encoding); 421 | return -1; 422 | } 423 | #endif 424 | #endif 425 | 426 | 427 | /* Test for GCC > 2.95 */ 428 | #if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95))) 429 | #define likely(x) __builtin_expect(!!(x), 1) 430 | #define unlikely(x) __builtin_expect(!!(x), 0) 431 | #else /* !__GNUC__ or GCC < 2.95 */ 432 | #define likely(x) (x) 433 | #define unlikely(x) (x) 434 | #endif /* __GNUC__ */ 435 | 436 | static PyObject *__pyx_m; 437 | static PyObject *__pyx_d; 438 | static PyObject *__pyx_b; 439 | static PyObject *__pyx_empty_tuple; 440 | static PyObject *__pyx_empty_bytes; 441 | static int __pyx_lineno; 442 | static int __pyx_clineno = 0; 443 | static const char * __pyx_cfilenm= __FILE__; 444 | static const char *__pyx_filename; 445 | 446 | 447 | static const char *__pyx_f[] = { 448 | "src/chat_trie.pxd", 449 | }; 450 | 451 | /*--- Type declarations ---*/ 452 | struct __pyx_t_9chat_trie_hattrie_t_; 453 | 454 | /* "chat_trie.pxd":36 455 | * value_t* hattrie_iter_val (hattrie_iter_t*) 456 | * 457 | * cdef struct hattrie_t_: # <<<<<<<<<<<<<< 458 | * void* root 459 | * size_t m # number of stored keys 460 | */ 461 | struct __pyx_t_9chat_trie_hattrie_t_ { 462 | void *root; 463 | size_t m; 464 | }; 465 | 466 | /* --- Runtime support code (head) --- */ 467 | #ifndef CYTHON_REFNANNY 468 | #define CYTHON_REFNANNY 0 469 | #endif 470 | #if CYTHON_REFNANNY 471 | typedef struct { 472 | void (*INCREF)(void*, PyObject*, int); 473 | void (*DECREF)(void*, PyObject*, int); 474 | void (*GOTREF)(void*, PyObject*, int); 475 | void (*GIVEREF)(void*, PyObject*, int); 476 | void* (*SetupContext)(const char*, int, const char*); 477 | void (*FinishContext)(void**); 478 | } __Pyx_RefNannyAPIStruct; 479 | static __Pyx_RefNannyAPIStruct *__Pyx_RefNanny = NULL; 480 | static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname); 481 | #define __Pyx_RefNannyDeclarations void *__pyx_refnanny = NULL; 482 | #ifdef WITH_THREAD 483 | #define __Pyx_RefNannySetupContext(name, acquire_gil)\ 484 | if (acquire_gil) {\ 485 | PyGILState_STATE __pyx_gilstate_save = PyGILState_Ensure();\ 486 | __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__);\ 487 | PyGILState_Release(__pyx_gilstate_save);\ 488 | } else {\ 489 | __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__);\ 490 | } 491 | #else 492 | #define __Pyx_RefNannySetupContext(name, acquire_gil)\ 493 | __pyx_refnanny = __Pyx_RefNanny->SetupContext((name), __LINE__, __FILE__) 494 | #endif 495 | #define __Pyx_RefNannyFinishContext()\ 496 | __Pyx_RefNanny->FinishContext(&__pyx_refnanny) 497 | #define __Pyx_INCREF(r) __Pyx_RefNanny->INCREF(__pyx_refnanny, (PyObject *)(r), __LINE__) 498 | #define __Pyx_DECREF(r) __Pyx_RefNanny->DECREF(__pyx_refnanny, (PyObject *)(r), __LINE__) 499 | #define __Pyx_GOTREF(r) __Pyx_RefNanny->GOTREF(__pyx_refnanny, (PyObject *)(r), __LINE__) 500 | #define __Pyx_GIVEREF(r) __Pyx_RefNanny->GIVEREF(__pyx_refnanny, (PyObject *)(r), __LINE__) 501 | #define __Pyx_XINCREF(r) do { if((r) != NULL) {__Pyx_INCREF(r); }} while(0) 502 | #define __Pyx_XDECREF(r) do { if((r) != NULL) {__Pyx_DECREF(r); }} while(0) 503 | #define __Pyx_XGOTREF(r) do { if((r) != NULL) {__Pyx_GOTREF(r); }} while(0) 504 | #define __Pyx_XGIVEREF(r) do { if((r) != NULL) {__Pyx_GIVEREF(r);}} while(0) 505 | #else 506 | #define __Pyx_RefNannyDeclarations 507 | #define __Pyx_RefNannySetupContext(name, acquire_gil) 508 | #define __Pyx_RefNannyFinishContext() 509 | #define __Pyx_INCREF(r) Py_INCREF(r) 510 | #define __Pyx_DECREF(r) Py_DECREF(r) 511 | #define __Pyx_GOTREF(r) 512 | #define __Pyx_GIVEREF(r) 513 | #define __Pyx_XINCREF(r) Py_XINCREF(r) 514 | #define __Pyx_XDECREF(r) Py_XDECREF(r) 515 | #define __Pyx_XGOTREF(r) 516 | #define __Pyx_XGIVEREF(r) 517 | #endif 518 | #define __Pyx_XDECREF_SET(r, v) do {\ 519 | PyObject *tmp = (PyObject *) r;\ 520 | r = v; __Pyx_XDECREF(tmp);\ 521 | } while (0) 522 | #define __Pyx_DECREF_SET(r, v) do {\ 523 | PyObject *tmp = (PyObject *) r;\ 524 | r = v; __Pyx_DECREF(tmp);\ 525 | } while (0) 526 | #define __Pyx_CLEAR(r) do { PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);} while(0) 527 | #define __Pyx_XCLEAR(r) do { if((r) != NULL) {PyObject* tmp = ((PyObject*)(r)); r = NULL; __Pyx_DECREF(tmp);}} while(0) 528 | 529 | typedef struct { 530 | int code_line; 531 | PyCodeObject* code_object; 532 | } __Pyx_CodeObjectCacheEntry; 533 | struct __Pyx_CodeObjectCache { 534 | int count; 535 | int max_count; 536 | __Pyx_CodeObjectCacheEntry* entries; 537 | }; 538 | static struct __Pyx_CodeObjectCache __pyx_code_cache = {0,0,NULL}; 539 | static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line); 540 | static PyCodeObject *__pyx_find_code_object(int code_line); 541 | static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object); 542 | 543 | static void __Pyx_AddTraceback(const char *funcname, int c_line, 544 | int py_line, const char *filename); 545 | 546 | static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value); 547 | 548 | static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *); 549 | 550 | static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *); 551 | 552 | static int __Pyx_check_binary_version(void); 553 | 554 | static int __Pyx_InitStrings(__Pyx_StringTabEntry *t); 555 | 556 | 557 | /* Module declarations from 'chat_trie' */ 558 | #define __Pyx_MODULE_NAME "chat_trie" 559 | int __pyx_module_is_main_chat_trie = 0; 560 | 561 | /* Implementation of 'chat_trie' */ 562 | static char __pyx_k_main[] = "__main__"; 563 | static char __pyx_k_test[] = "__test__"; 564 | static PyObject *__pyx_n_s_main; 565 | static PyObject *__pyx_n_s_test; 566 | 567 | static PyMethodDef __pyx_methods[] = { 568 | {0, 0, 0, 0} 569 | }; 570 | 571 | #if PY_MAJOR_VERSION >= 3 572 | static struct PyModuleDef __pyx_moduledef = { 573 | #if PY_VERSION_HEX < 0x03020000 574 | { PyObject_HEAD_INIT(NULL) NULL, 0, NULL }, 575 | #else 576 | PyModuleDef_HEAD_INIT, 577 | #endif 578 | "chat_trie", 579 | 0, /* m_doc */ 580 | -1, /* m_size */ 581 | __pyx_methods /* m_methods */, 582 | NULL, /* m_reload */ 583 | NULL, /* m_traverse */ 584 | NULL, /* m_clear */ 585 | NULL /* m_free */ 586 | }; 587 | #endif 588 | 589 | static __Pyx_StringTabEntry __pyx_string_tab[] = { 590 | {&__pyx_n_s_main, __pyx_k_main, sizeof(__pyx_k_main), 0, 0, 1, 1}, 591 | {&__pyx_n_s_test, __pyx_k_test, sizeof(__pyx_k_test), 0, 0, 1, 1}, 592 | {0, 0, 0, 0, 0, 0, 0} 593 | }; 594 | static int __Pyx_InitCachedBuiltins(void) { 595 | return 0; 596 | } 597 | 598 | static int __Pyx_InitCachedConstants(void) { 599 | __Pyx_RefNannyDeclarations 600 | __Pyx_RefNannySetupContext("__Pyx_InitCachedConstants", 0); 601 | __Pyx_RefNannyFinishContext(); 602 | return 0; 603 | } 604 | 605 | static int __Pyx_InitGlobals(void) { 606 | if (__Pyx_InitStrings(__pyx_string_tab) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; 607 | return 0; 608 | __pyx_L1_error:; 609 | return -1; 610 | } 611 | 612 | #if PY_MAJOR_VERSION < 3 613 | PyMODINIT_FUNC initchat_trie(void); /*proto*/ 614 | PyMODINIT_FUNC initchat_trie(void) 615 | #else 616 | PyMODINIT_FUNC PyInit_chat_trie(void); /*proto*/ 617 | PyMODINIT_FUNC PyInit_chat_trie(void) 618 | #endif 619 | { 620 | PyObject *__pyx_t_1 = NULL; 621 | int __pyx_lineno = 0; 622 | const char *__pyx_filename = NULL; 623 | int __pyx_clineno = 0; 624 | __Pyx_RefNannyDeclarations 625 | #if CYTHON_REFNANNY 626 | __Pyx_RefNanny = __Pyx_RefNannyImportAPI("refnanny"); 627 | if (!__Pyx_RefNanny) { 628 | PyErr_Clear(); 629 | __Pyx_RefNanny = __Pyx_RefNannyImportAPI("Cython.Runtime.refnanny"); 630 | if (!__Pyx_RefNanny) 631 | Py_FatalError("failed to import 'refnanny' module"); 632 | } 633 | #endif 634 | __Pyx_RefNannySetupContext("PyMODINIT_FUNC PyInit_chat_trie(void)", 0); 635 | if (__Pyx_check_binary_version() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} 636 | __pyx_empty_tuple = PyTuple_New(0); if (unlikely(!__pyx_empty_tuple)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} 637 | __pyx_empty_bytes = PyBytes_FromStringAndSize("", 0); if (unlikely(!__pyx_empty_bytes)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} 638 | #ifdef __Pyx_CyFunction_USED 639 | if (__pyx_CyFunction_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} 640 | #endif 641 | #ifdef __Pyx_FusedFunction_USED 642 | if (__pyx_FusedFunction_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} 643 | #endif 644 | #ifdef __Pyx_Coroutine_USED 645 | if (__pyx_Coroutine_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} 646 | #endif 647 | #ifdef __Pyx_Generator_USED 648 | if (__pyx_Generator_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} 649 | #endif 650 | #ifdef __Pyx_StopAsyncIteration_USED 651 | if (__pyx_StopAsyncIteration_init() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} 652 | #endif 653 | /*--- Library function declarations ---*/ 654 | /*--- Threads initialization code ---*/ 655 | #if defined(__PYX_FORCE_INIT_THREADS) && __PYX_FORCE_INIT_THREADS 656 | #ifdef WITH_THREAD /* Python build with threading support? */ 657 | PyEval_InitThreads(); 658 | #endif 659 | #endif 660 | /*--- Module creation code ---*/ 661 | #if PY_MAJOR_VERSION < 3 662 | __pyx_m = Py_InitModule4("chat_trie", __pyx_methods, 0, 0, PYTHON_API_VERSION); Py_XINCREF(__pyx_m); 663 | #else 664 | __pyx_m = PyModule_Create(&__pyx_moduledef); 665 | #endif 666 | if (unlikely(!__pyx_m)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} 667 | __pyx_d = PyModule_GetDict(__pyx_m); if (unlikely(!__pyx_d)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} 668 | Py_INCREF(__pyx_d); 669 | __pyx_b = PyImport_AddModule(__Pyx_BUILTIN_MODULE_NAME); if (unlikely(!__pyx_b)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} 670 | #if CYTHON_COMPILING_IN_PYPY 671 | Py_INCREF(__pyx_b); 672 | #endif 673 | if (PyObject_SetAttrString(__pyx_m, "__builtins__", __pyx_b) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;}; 674 | /*--- Initialize various global constants etc. ---*/ 675 | if (__Pyx_InitGlobals() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} 676 | #if PY_MAJOR_VERSION < 3 && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT) 677 | if (__Pyx_init_sys_getdefaultencoding_params() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} 678 | #endif 679 | if (__pyx_module_is_main_chat_trie) { 680 | if (PyObject_SetAttrString(__pyx_m, "__name__", __pyx_n_s_main) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} 681 | } 682 | #if PY_MAJOR_VERSION >= 3 683 | { 684 | PyObject *modules = PyImport_GetModuleDict(); if (unlikely(!modules)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} 685 | if (!PyDict_GetItemString(modules, "chat_trie")) { 686 | if (unlikely(PyDict_SetItemString(modules, "chat_trie", __pyx_m) < 0)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} 687 | } 688 | } 689 | #endif 690 | /*--- Builtin init code ---*/ 691 | if (__Pyx_InitCachedBuiltins() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} 692 | /*--- Constants init code ---*/ 693 | if (__Pyx_InitCachedConstants() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} 694 | /*--- Global init code ---*/ 695 | /*--- Variable export code ---*/ 696 | /*--- Function export code ---*/ 697 | /*--- Type init code ---*/ 698 | /*--- Type import code ---*/ 699 | /*--- Variable import code ---*/ 700 | /*--- Function import code ---*/ 701 | /*--- Execution code ---*/ 702 | #if defined(__Pyx_Generator_USED) || defined(__Pyx_Coroutine_USED) 703 | if (__Pyx_patch_abc() < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} 704 | #endif 705 | 706 | /* "chat_trie.pxd":1 707 | * cdef extern from "../hat-trie/src/hat-trie.h": # <<<<<<<<<<<<<< 708 | * 709 | * ctypedef int value_t 710 | */ 711 | __pyx_t_1 = PyDict_New(); if (unlikely(!__pyx_t_1)) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} 712 | __Pyx_GOTREF(__pyx_t_1); 713 | if (PyDict_SetItem(__pyx_d, __pyx_n_s_test, __pyx_t_1) < 0) {__pyx_filename = __pyx_f[0]; __pyx_lineno = 1; __pyx_clineno = __LINE__; goto __pyx_L1_error;} 714 | __Pyx_DECREF(__pyx_t_1); __pyx_t_1 = 0; 715 | 716 | /*--- Wrapped vars code ---*/ 717 | 718 | goto __pyx_L0; 719 | __pyx_L1_error:; 720 | __Pyx_XDECREF(__pyx_t_1); 721 | if (__pyx_m) { 722 | if (__pyx_d) { 723 | __Pyx_AddTraceback("init chat_trie", __pyx_clineno, __pyx_lineno, __pyx_filename); 724 | } 725 | Py_DECREF(__pyx_m); __pyx_m = 0; 726 | } else if (!PyErr_Occurred()) { 727 | PyErr_SetString(PyExc_ImportError, "init chat_trie"); 728 | } 729 | __pyx_L0:; 730 | __Pyx_RefNannyFinishContext(); 731 | #if PY_MAJOR_VERSION < 3 732 | return; 733 | #else 734 | return __pyx_m; 735 | #endif 736 | } 737 | 738 | /* --- Runtime support code --- */ 739 | #if CYTHON_REFNANNY 740 | static __Pyx_RefNannyAPIStruct *__Pyx_RefNannyImportAPI(const char *modname) { 741 | PyObject *m = NULL, *p = NULL; 742 | void *r = NULL; 743 | m = PyImport_ImportModule((char *)modname); 744 | if (!m) goto end; 745 | p = PyObject_GetAttrString(m, (char *)"RefNannyAPI"); 746 | if (!p) goto end; 747 | r = PyLong_AsVoidPtr(p); 748 | end: 749 | Py_XDECREF(p); 750 | Py_XDECREF(m); 751 | return (__Pyx_RefNannyAPIStruct *)r; 752 | } 753 | #endif 754 | 755 | static int __pyx_bisect_code_objects(__Pyx_CodeObjectCacheEntry* entries, int count, int code_line) { 756 | int start = 0, mid = 0, end = count - 1; 757 | if (end >= 0 && code_line > entries[end].code_line) { 758 | return count; 759 | } 760 | while (start < end) { 761 | mid = start + (end - start) / 2; 762 | if (code_line < entries[mid].code_line) { 763 | end = mid; 764 | } else if (code_line > entries[mid].code_line) { 765 | start = mid + 1; 766 | } else { 767 | return mid; 768 | } 769 | } 770 | if (code_line <= entries[mid].code_line) { 771 | return mid; 772 | } else { 773 | return mid + 1; 774 | } 775 | } 776 | static PyCodeObject *__pyx_find_code_object(int code_line) { 777 | PyCodeObject* code_object; 778 | int pos; 779 | if (unlikely(!code_line) || unlikely(!__pyx_code_cache.entries)) { 780 | return NULL; 781 | } 782 | pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); 783 | if (unlikely(pos >= __pyx_code_cache.count) || unlikely(__pyx_code_cache.entries[pos].code_line != code_line)) { 784 | return NULL; 785 | } 786 | code_object = __pyx_code_cache.entries[pos].code_object; 787 | Py_INCREF(code_object); 788 | return code_object; 789 | } 790 | static void __pyx_insert_code_object(int code_line, PyCodeObject* code_object) { 791 | int pos, i; 792 | __Pyx_CodeObjectCacheEntry* entries = __pyx_code_cache.entries; 793 | if (unlikely(!code_line)) { 794 | return; 795 | } 796 | if (unlikely(!entries)) { 797 | entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Malloc(64*sizeof(__Pyx_CodeObjectCacheEntry)); 798 | if (likely(entries)) { 799 | __pyx_code_cache.entries = entries; 800 | __pyx_code_cache.max_count = 64; 801 | __pyx_code_cache.count = 1; 802 | entries[0].code_line = code_line; 803 | entries[0].code_object = code_object; 804 | Py_INCREF(code_object); 805 | } 806 | return; 807 | } 808 | pos = __pyx_bisect_code_objects(__pyx_code_cache.entries, __pyx_code_cache.count, code_line); 809 | if ((pos < __pyx_code_cache.count) && unlikely(__pyx_code_cache.entries[pos].code_line == code_line)) { 810 | PyCodeObject* tmp = entries[pos].code_object; 811 | entries[pos].code_object = code_object; 812 | Py_DECREF(tmp); 813 | return; 814 | } 815 | if (__pyx_code_cache.count == __pyx_code_cache.max_count) { 816 | int new_max = __pyx_code_cache.max_count + 64; 817 | entries = (__Pyx_CodeObjectCacheEntry*)PyMem_Realloc( 818 | __pyx_code_cache.entries, (size_t)new_max*sizeof(__Pyx_CodeObjectCacheEntry)); 819 | if (unlikely(!entries)) { 820 | return; 821 | } 822 | __pyx_code_cache.entries = entries; 823 | __pyx_code_cache.max_count = new_max; 824 | } 825 | for (i=__pyx_code_cache.count; i>pos; i--) { 826 | entries[i] = entries[i-1]; 827 | } 828 | entries[pos].code_line = code_line; 829 | entries[pos].code_object = code_object; 830 | __pyx_code_cache.count++; 831 | Py_INCREF(code_object); 832 | } 833 | 834 | #include "compile.h" 835 | #include "frameobject.h" 836 | #include "traceback.h" 837 | static PyCodeObject* __Pyx_CreateCodeObjectForTraceback( 838 | const char *funcname, int c_line, 839 | int py_line, const char *filename) { 840 | PyCodeObject *py_code = 0; 841 | PyObject *py_srcfile = 0; 842 | PyObject *py_funcname = 0; 843 | #if PY_MAJOR_VERSION < 3 844 | py_srcfile = PyString_FromString(filename); 845 | #else 846 | py_srcfile = PyUnicode_FromString(filename); 847 | #endif 848 | if (!py_srcfile) goto bad; 849 | if (c_line) { 850 | #if PY_MAJOR_VERSION < 3 851 | py_funcname = PyString_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); 852 | #else 853 | py_funcname = PyUnicode_FromFormat( "%s (%s:%d)", funcname, __pyx_cfilenm, c_line); 854 | #endif 855 | } 856 | else { 857 | #if PY_MAJOR_VERSION < 3 858 | py_funcname = PyString_FromString(funcname); 859 | #else 860 | py_funcname = PyUnicode_FromString(funcname); 861 | #endif 862 | } 863 | if (!py_funcname) goto bad; 864 | py_code = __Pyx_PyCode_New( 865 | 0, 866 | 0, 867 | 0, 868 | 0, 869 | 0, 870 | __pyx_empty_bytes, /*PyObject *code,*/ 871 | __pyx_empty_tuple, /*PyObject *consts,*/ 872 | __pyx_empty_tuple, /*PyObject *names,*/ 873 | __pyx_empty_tuple, /*PyObject *varnames,*/ 874 | __pyx_empty_tuple, /*PyObject *freevars,*/ 875 | __pyx_empty_tuple, /*PyObject *cellvars,*/ 876 | py_srcfile, /*PyObject *filename,*/ 877 | py_funcname, /*PyObject *name,*/ 878 | py_line, 879 | __pyx_empty_bytes /*PyObject *lnotab*/ 880 | ); 881 | Py_DECREF(py_srcfile); 882 | Py_DECREF(py_funcname); 883 | return py_code; 884 | bad: 885 | Py_XDECREF(py_srcfile); 886 | Py_XDECREF(py_funcname); 887 | return NULL; 888 | } 889 | static void __Pyx_AddTraceback(const char *funcname, int c_line, 890 | int py_line, const char *filename) { 891 | PyCodeObject *py_code = 0; 892 | PyFrameObject *py_frame = 0; 893 | py_code = __pyx_find_code_object(c_line ? c_line : py_line); 894 | if (!py_code) { 895 | py_code = __Pyx_CreateCodeObjectForTraceback( 896 | funcname, c_line, py_line, filename); 897 | if (!py_code) goto bad; 898 | __pyx_insert_code_object(c_line ? c_line : py_line, py_code); 899 | } 900 | py_frame = PyFrame_New( 901 | PyThreadState_GET(), /*PyThreadState *tstate,*/ 902 | py_code, /*PyCodeObject *code,*/ 903 | __pyx_d, /*PyObject *globals,*/ 904 | 0 /*PyObject *locals*/ 905 | ); 906 | if (!py_frame) goto bad; 907 | py_frame->f_lineno = py_line; 908 | PyTraceBack_Here(py_frame); 909 | bad: 910 | Py_XDECREF(py_code); 911 | Py_XDECREF(py_frame); 912 | } 913 | 914 | static CYTHON_INLINE PyObject* __Pyx_PyInt_From_long(long value) { 915 | const long neg_one = (long) -1, const_zero = (long) 0; 916 | const int is_unsigned = neg_one > const_zero; 917 | if (is_unsigned) { 918 | if (sizeof(long) < sizeof(long)) { 919 | return PyInt_FromLong((long) value); 920 | } else if (sizeof(long) <= sizeof(unsigned long)) { 921 | return PyLong_FromUnsignedLong((unsigned long) value); 922 | } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) { 923 | return PyLong_FromUnsignedLongLong((unsigned PY_LONG_LONG) value); 924 | } 925 | } else { 926 | if (sizeof(long) <= sizeof(long)) { 927 | return PyInt_FromLong((long) value); 928 | } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) { 929 | return PyLong_FromLongLong((PY_LONG_LONG) value); 930 | } 931 | } 932 | { 933 | int one = 1; int little = (int)*(unsigned char *)&one; 934 | unsigned char *bytes = (unsigned char *)&value; 935 | return _PyLong_FromByteArray(bytes, sizeof(long), 936 | little, !is_unsigned); 937 | } 938 | } 939 | 940 | #define __PYX_VERIFY_RETURN_INT(target_type, func_type, func_value)\ 941 | __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 0) 942 | #define __PYX_VERIFY_RETURN_INT_EXC(target_type, func_type, func_value)\ 943 | __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, 1) 944 | #define __PYX__VERIFY_RETURN_INT(target_type, func_type, func_value, exc)\ 945 | {\ 946 | func_type value = func_value;\ 947 | if (sizeof(target_type) < sizeof(func_type)) {\ 948 | if (unlikely(value != (func_type) (target_type) value)) {\ 949 | func_type zero = 0;\ 950 | if (exc && unlikely(value == (func_type)-1 && PyErr_Occurred()))\ 951 | return (target_type) -1;\ 952 | if (is_unsigned && unlikely(value < zero))\ 953 | goto raise_neg_overflow;\ 954 | else\ 955 | goto raise_overflow;\ 956 | }\ 957 | }\ 958 | return (target_type) value;\ 959 | } 960 | 961 | #if CYTHON_USE_PYLONG_INTERNALS 962 | #include "longintrepr.h" 963 | #endif 964 | 965 | static CYTHON_INLINE long __Pyx_PyInt_As_long(PyObject *x) { 966 | const long neg_one = (long) -1, const_zero = (long) 0; 967 | const int is_unsigned = neg_one > const_zero; 968 | #if PY_MAJOR_VERSION < 3 969 | if (likely(PyInt_Check(x))) { 970 | if (sizeof(long) < sizeof(long)) { 971 | __PYX_VERIFY_RETURN_INT(long, long, PyInt_AS_LONG(x)) 972 | } else { 973 | long val = PyInt_AS_LONG(x); 974 | if (is_unsigned && unlikely(val < 0)) { 975 | goto raise_neg_overflow; 976 | } 977 | return (long) val; 978 | } 979 | } else 980 | #endif 981 | if (likely(PyLong_Check(x))) { 982 | if (is_unsigned) { 983 | #if CYTHON_USE_PYLONG_INTERNALS 984 | const digit* digits = ((PyLongObject*)x)->ob_digit; 985 | switch (Py_SIZE(x)) { 986 | case 0: return (long) 0; 987 | case 1: __PYX_VERIFY_RETURN_INT(long, digit, digits[0]) 988 | case 2: 989 | if (8 * sizeof(long) > 1 * PyLong_SHIFT) { 990 | if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { 991 | __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) 992 | } else if (8 * sizeof(long) >= 2 * PyLong_SHIFT) { 993 | return (long) (((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); 994 | } 995 | } 996 | break; 997 | case 3: 998 | if (8 * sizeof(long) > 2 * PyLong_SHIFT) { 999 | if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { 1000 | __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) 1001 | } else if (8 * sizeof(long) >= 3 * PyLong_SHIFT) { 1002 | return (long) (((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); 1003 | } 1004 | } 1005 | break; 1006 | case 4: 1007 | if (8 * sizeof(long) > 3 * PyLong_SHIFT) { 1008 | if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { 1009 | __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) 1010 | } else if (8 * sizeof(long) >= 4 * PyLong_SHIFT) { 1011 | return (long) (((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0])); 1012 | } 1013 | } 1014 | break; 1015 | } 1016 | #endif 1017 | #if CYTHON_COMPILING_IN_CPYTHON 1018 | if (unlikely(Py_SIZE(x) < 0)) { 1019 | goto raise_neg_overflow; 1020 | } 1021 | #else 1022 | { 1023 | int result = PyObject_RichCompareBool(x, Py_False, Py_LT); 1024 | if (unlikely(result < 0)) 1025 | return (long) -1; 1026 | if (unlikely(result == 1)) 1027 | goto raise_neg_overflow; 1028 | } 1029 | #endif 1030 | if (sizeof(long) <= sizeof(unsigned long)) { 1031 | __PYX_VERIFY_RETURN_INT_EXC(long, unsigned long, PyLong_AsUnsignedLong(x)) 1032 | } else if (sizeof(long) <= sizeof(unsigned PY_LONG_LONG)) { 1033 | __PYX_VERIFY_RETURN_INT_EXC(long, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) 1034 | } 1035 | } else { 1036 | #if CYTHON_USE_PYLONG_INTERNALS 1037 | const digit* digits = ((PyLongObject*)x)->ob_digit; 1038 | switch (Py_SIZE(x)) { 1039 | case 0: return (long) 0; 1040 | case -1: __PYX_VERIFY_RETURN_INT(long, sdigit, -(sdigit) digits[0]) 1041 | case 1: __PYX_VERIFY_RETURN_INT(long, digit, +digits[0]) 1042 | case -2: 1043 | if (8 * sizeof(long) - 1 > 1 * PyLong_SHIFT) { 1044 | if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { 1045 | __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) 1046 | } else if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) { 1047 | return (long) (((long)-1)*(((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); 1048 | } 1049 | } 1050 | break; 1051 | case 2: 1052 | if (8 * sizeof(long) > 1 * PyLong_SHIFT) { 1053 | if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { 1054 | __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) 1055 | } else if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) { 1056 | return (long) ((((((long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); 1057 | } 1058 | } 1059 | break; 1060 | case -3: 1061 | if (8 * sizeof(long) - 1 > 2 * PyLong_SHIFT) { 1062 | if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { 1063 | __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) 1064 | } else if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) { 1065 | return (long) (((long)-1)*(((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); 1066 | } 1067 | } 1068 | break; 1069 | case 3: 1070 | if (8 * sizeof(long) > 2 * PyLong_SHIFT) { 1071 | if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { 1072 | __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) 1073 | } else if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) { 1074 | return (long) ((((((((long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); 1075 | } 1076 | } 1077 | break; 1078 | case -4: 1079 | if (8 * sizeof(long) - 1 > 3 * PyLong_SHIFT) { 1080 | if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { 1081 | __PYX_VERIFY_RETURN_INT(long, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) 1082 | } else if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) { 1083 | return (long) (((long)-1)*(((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); 1084 | } 1085 | } 1086 | break; 1087 | case 4: 1088 | if (8 * sizeof(long) > 3 * PyLong_SHIFT) { 1089 | if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { 1090 | __PYX_VERIFY_RETURN_INT(long, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) 1091 | } else if (8 * sizeof(long) - 1 > 4 * PyLong_SHIFT) { 1092 | return (long) ((((((((((long)digits[3]) << PyLong_SHIFT) | (long)digits[2]) << PyLong_SHIFT) | (long)digits[1]) << PyLong_SHIFT) | (long)digits[0]))); 1093 | } 1094 | } 1095 | break; 1096 | } 1097 | #endif 1098 | if (sizeof(long) <= sizeof(long)) { 1099 | __PYX_VERIFY_RETURN_INT_EXC(long, long, PyLong_AsLong(x)) 1100 | } else if (sizeof(long) <= sizeof(PY_LONG_LONG)) { 1101 | __PYX_VERIFY_RETURN_INT_EXC(long, PY_LONG_LONG, PyLong_AsLongLong(x)) 1102 | } 1103 | } 1104 | { 1105 | #if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray) 1106 | PyErr_SetString(PyExc_RuntimeError, 1107 | "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers"); 1108 | #else 1109 | long val; 1110 | PyObject *v = __Pyx_PyNumber_Int(x); 1111 | #if PY_MAJOR_VERSION < 3 1112 | if (likely(v) && !PyLong_Check(v)) { 1113 | PyObject *tmp = v; 1114 | v = PyNumber_Long(tmp); 1115 | Py_DECREF(tmp); 1116 | } 1117 | #endif 1118 | if (likely(v)) { 1119 | int one = 1; int is_little = (int)*(unsigned char *)&one; 1120 | unsigned char *bytes = (unsigned char *)&val; 1121 | int ret = _PyLong_AsByteArray((PyLongObject *)v, 1122 | bytes, sizeof(val), 1123 | is_little, !is_unsigned); 1124 | Py_DECREF(v); 1125 | if (likely(!ret)) 1126 | return val; 1127 | } 1128 | #endif 1129 | return (long) -1; 1130 | } 1131 | } else { 1132 | long val; 1133 | PyObject *tmp = __Pyx_PyNumber_Int(x); 1134 | if (!tmp) return (long) -1; 1135 | val = __Pyx_PyInt_As_long(tmp); 1136 | Py_DECREF(tmp); 1137 | return val; 1138 | } 1139 | raise_overflow: 1140 | PyErr_SetString(PyExc_OverflowError, 1141 | "value too large to convert to long"); 1142 | return (long) -1; 1143 | raise_neg_overflow: 1144 | PyErr_SetString(PyExc_OverflowError, 1145 | "can't convert negative value to long"); 1146 | return (long) -1; 1147 | } 1148 | 1149 | static CYTHON_INLINE int __Pyx_PyInt_As_int(PyObject *x) { 1150 | const int neg_one = (int) -1, const_zero = (int) 0; 1151 | const int is_unsigned = neg_one > const_zero; 1152 | #if PY_MAJOR_VERSION < 3 1153 | if (likely(PyInt_Check(x))) { 1154 | if (sizeof(int) < sizeof(long)) { 1155 | __PYX_VERIFY_RETURN_INT(int, long, PyInt_AS_LONG(x)) 1156 | } else { 1157 | long val = PyInt_AS_LONG(x); 1158 | if (is_unsigned && unlikely(val < 0)) { 1159 | goto raise_neg_overflow; 1160 | } 1161 | return (int) val; 1162 | } 1163 | } else 1164 | #endif 1165 | if (likely(PyLong_Check(x))) { 1166 | if (is_unsigned) { 1167 | #if CYTHON_USE_PYLONG_INTERNALS 1168 | const digit* digits = ((PyLongObject*)x)->ob_digit; 1169 | switch (Py_SIZE(x)) { 1170 | case 0: return (int) 0; 1171 | case 1: __PYX_VERIFY_RETURN_INT(int, digit, digits[0]) 1172 | case 2: 1173 | if (8 * sizeof(int) > 1 * PyLong_SHIFT) { 1174 | if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { 1175 | __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) 1176 | } else if (8 * sizeof(int) >= 2 * PyLong_SHIFT) { 1177 | return (int) (((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); 1178 | } 1179 | } 1180 | break; 1181 | case 3: 1182 | if (8 * sizeof(int) > 2 * PyLong_SHIFT) { 1183 | if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { 1184 | __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) 1185 | } else if (8 * sizeof(int) >= 3 * PyLong_SHIFT) { 1186 | return (int) (((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); 1187 | } 1188 | } 1189 | break; 1190 | case 4: 1191 | if (8 * sizeof(int) > 3 * PyLong_SHIFT) { 1192 | if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { 1193 | __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) 1194 | } else if (8 * sizeof(int) >= 4 * PyLong_SHIFT) { 1195 | return (int) (((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0])); 1196 | } 1197 | } 1198 | break; 1199 | } 1200 | #endif 1201 | #if CYTHON_COMPILING_IN_CPYTHON 1202 | if (unlikely(Py_SIZE(x) < 0)) { 1203 | goto raise_neg_overflow; 1204 | } 1205 | #else 1206 | { 1207 | int result = PyObject_RichCompareBool(x, Py_False, Py_LT); 1208 | if (unlikely(result < 0)) 1209 | return (int) -1; 1210 | if (unlikely(result == 1)) 1211 | goto raise_neg_overflow; 1212 | } 1213 | #endif 1214 | if (sizeof(int) <= sizeof(unsigned long)) { 1215 | __PYX_VERIFY_RETURN_INT_EXC(int, unsigned long, PyLong_AsUnsignedLong(x)) 1216 | } else if (sizeof(int) <= sizeof(unsigned PY_LONG_LONG)) { 1217 | __PYX_VERIFY_RETURN_INT_EXC(int, unsigned PY_LONG_LONG, PyLong_AsUnsignedLongLong(x)) 1218 | } 1219 | } else { 1220 | #if CYTHON_USE_PYLONG_INTERNALS 1221 | const digit* digits = ((PyLongObject*)x)->ob_digit; 1222 | switch (Py_SIZE(x)) { 1223 | case 0: return (int) 0; 1224 | case -1: __PYX_VERIFY_RETURN_INT(int, sdigit, -(sdigit) digits[0]) 1225 | case 1: __PYX_VERIFY_RETURN_INT(int, digit, +digits[0]) 1226 | case -2: 1227 | if (8 * sizeof(int) - 1 > 1 * PyLong_SHIFT) { 1228 | if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { 1229 | __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) 1230 | } else if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) { 1231 | return (int) (((int)-1)*(((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); 1232 | } 1233 | } 1234 | break; 1235 | case 2: 1236 | if (8 * sizeof(int) > 1 * PyLong_SHIFT) { 1237 | if (8 * sizeof(unsigned long) > 2 * PyLong_SHIFT) { 1238 | __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) 1239 | } else if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) { 1240 | return (int) ((((((int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); 1241 | } 1242 | } 1243 | break; 1244 | case -3: 1245 | if (8 * sizeof(int) - 1 > 2 * PyLong_SHIFT) { 1246 | if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { 1247 | __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) 1248 | } else if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) { 1249 | return (int) (((int)-1)*(((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); 1250 | } 1251 | } 1252 | break; 1253 | case 3: 1254 | if (8 * sizeof(int) > 2 * PyLong_SHIFT) { 1255 | if (8 * sizeof(unsigned long) > 3 * PyLong_SHIFT) { 1256 | __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) 1257 | } else if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) { 1258 | return (int) ((((((((int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); 1259 | } 1260 | } 1261 | break; 1262 | case -4: 1263 | if (8 * sizeof(int) - 1 > 3 * PyLong_SHIFT) { 1264 | if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { 1265 | __PYX_VERIFY_RETURN_INT(int, long, -(long) (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) 1266 | } else if (8 * sizeof(int) - 1 > 4 * PyLong_SHIFT) { 1267 | return (int) (((int)-1)*(((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); 1268 | } 1269 | } 1270 | break; 1271 | case 4: 1272 | if (8 * sizeof(int) > 3 * PyLong_SHIFT) { 1273 | if (8 * sizeof(unsigned long) > 4 * PyLong_SHIFT) { 1274 | __PYX_VERIFY_RETURN_INT(int, unsigned long, (((((((((unsigned long)digits[3]) << PyLong_SHIFT) | (unsigned long)digits[2]) << PyLong_SHIFT) | (unsigned long)digits[1]) << PyLong_SHIFT) | (unsigned long)digits[0]))) 1275 | } else if (8 * sizeof(int) - 1 > 4 * PyLong_SHIFT) { 1276 | return (int) ((((((((((int)digits[3]) << PyLong_SHIFT) | (int)digits[2]) << PyLong_SHIFT) | (int)digits[1]) << PyLong_SHIFT) | (int)digits[0]))); 1277 | } 1278 | } 1279 | break; 1280 | } 1281 | #endif 1282 | if (sizeof(int) <= sizeof(long)) { 1283 | __PYX_VERIFY_RETURN_INT_EXC(int, long, PyLong_AsLong(x)) 1284 | } else if (sizeof(int) <= sizeof(PY_LONG_LONG)) { 1285 | __PYX_VERIFY_RETURN_INT_EXC(int, PY_LONG_LONG, PyLong_AsLongLong(x)) 1286 | } 1287 | } 1288 | { 1289 | #if CYTHON_COMPILING_IN_PYPY && !defined(_PyLong_AsByteArray) 1290 | PyErr_SetString(PyExc_RuntimeError, 1291 | "_PyLong_AsByteArray() not available in PyPy, cannot convert large numbers"); 1292 | #else 1293 | int val; 1294 | PyObject *v = __Pyx_PyNumber_Int(x); 1295 | #if PY_MAJOR_VERSION < 3 1296 | if (likely(v) && !PyLong_Check(v)) { 1297 | PyObject *tmp = v; 1298 | v = PyNumber_Long(tmp); 1299 | Py_DECREF(tmp); 1300 | } 1301 | #endif 1302 | if (likely(v)) { 1303 | int one = 1; int is_little = (int)*(unsigned char *)&one; 1304 | unsigned char *bytes = (unsigned char *)&val; 1305 | int ret = _PyLong_AsByteArray((PyLongObject *)v, 1306 | bytes, sizeof(val), 1307 | is_little, !is_unsigned); 1308 | Py_DECREF(v); 1309 | if (likely(!ret)) 1310 | return val; 1311 | } 1312 | #endif 1313 | return (int) -1; 1314 | } 1315 | } else { 1316 | int val; 1317 | PyObject *tmp = __Pyx_PyNumber_Int(x); 1318 | if (!tmp) return (int) -1; 1319 | val = __Pyx_PyInt_As_int(tmp); 1320 | Py_DECREF(tmp); 1321 | return val; 1322 | } 1323 | raise_overflow: 1324 | PyErr_SetString(PyExc_OverflowError, 1325 | "value too large to convert to int"); 1326 | return (int) -1; 1327 | raise_neg_overflow: 1328 | PyErr_SetString(PyExc_OverflowError, 1329 | "can't convert negative value to int"); 1330 | return (int) -1; 1331 | } 1332 | 1333 | static int __Pyx_check_binary_version(void) { 1334 | char ctversion[4], rtversion[4]; 1335 | PyOS_snprintf(ctversion, 4, "%d.%d", PY_MAJOR_VERSION, PY_MINOR_VERSION); 1336 | PyOS_snprintf(rtversion, 4, "%s", Py_GetVersion()); 1337 | if (ctversion[0] != rtversion[0] || ctversion[2] != rtversion[2]) { 1338 | char message[200]; 1339 | PyOS_snprintf(message, sizeof(message), 1340 | "compiletime version %s of module '%.100s' " 1341 | "does not match runtime version %s", 1342 | ctversion, __Pyx_MODULE_NAME, rtversion); 1343 | return PyErr_WarnEx(NULL, message, 1); 1344 | } 1345 | return 0; 1346 | } 1347 | 1348 | static int __Pyx_InitStrings(__Pyx_StringTabEntry *t) { 1349 | while (t->p) { 1350 | #if PY_MAJOR_VERSION < 3 1351 | if (t->is_unicode) { 1352 | *t->p = PyUnicode_DecodeUTF8(t->s, t->n - 1, NULL); 1353 | } else if (t->intern) { 1354 | *t->p = PyString_InternFromString(t->s); 1355 | } else { 1356 | *t->p = PyString_FromStringAndSize(t->s, t->n - 1); 1357 | } 1358 | #else 1359 | if (t->is_unicode | t->is_str) { 1360 | if (t->intern) { 1361 | *t->p = PyUnicode_InternFromString(t->s); 1362 | } else if (t->encoding) { 1363 | *t->p = PyUnicode_Decode(t->s, t->n - 1, t->encoding, NULL); 1364 | } else { 1365 | *t->p = PyUnicode_FromStringAndSize(t->s, t->n - 1); 1366 | } 1367 | } else { 1368 | *t->p = PyBytes_FromStringAndSize(t->s, t->n - 1); 1369 | } 1370 | #endif 1371 | if (!*t->p) 1372 | return -1; 1373 | ++t; 1374 | } 1375 | return 0; 1376 | } 1377 | 1378 | static CYTHON_INLINE PyObject* __Pyx_PyUnicode_FromString(const char* c_str) { 1379 | return __Pyx_PyUnicode_FromStringAndSize(c_str, (Py_ssize_t)strlen(c_str)); 1380 | } 1381 | static CYTHON_INLINE char* __Pyx_PyObject_AsString(PyObject* o) { 1382 | Py_ssize_t ignore; 1383 | return __Pyx_PyObject_AsStringAndSize(o, &ignore); 1384 | } 1385 | static CYTHON_INLINE char* __Pyx_PyObject_AsStringAndSize(PyObject* o, Py_ssize_t *length) { 1386 | #if CYTHON_COMPILING_IN_CPYTHON && (__PYX_DEFAULT_STRING_ENCODING_IS_ASCII || __PYX_DEFAULT_STRING_ENCODING_IS_DEFAULT) 1387 | if ( 1388 | #if PY_MAJOR_VERSION < 3 && __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 1389 | __Pyx_sys_getdefaultencoding_not_ascii && 1390 | #endif 1391 | PyUnicode_Check(o)) { 1392 | #if PY_VERSION_HEX < 0x03030000 1393 | char* defenc_c; 1394 | PyObject* defenc = _PyUnicode_AsDefaultEncodedString(o, NULL); 1395 | if (!defenc) return NULL; 1396 | defenc_c = PyBytes_AS_STRING(defenc); 1397 | #if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 1398 | { 1399 | char* end = defenc_c + PyBytes_GET_SIZE(defenc); 1400 | char* c; 1401 | for (c = defenc_c; c < end; c++) { 1402 | if ((unsigned char) (*c) >= 128) { 1403 | PyUnicode_AsASCIIString(o); 1404 | return NULL; 1405 | } 1406 | } 1407 | } 1408 | #endif 1409 | *length = PyBytes_GET_SIZE(defenc); 1410 | return defenc_c; 1411 | #else 1412 | if (__Pyx_PyUnicode_READY(o) == -1) return NULL; 1413 | #if __PYX_DEFAULT_STRING_ENCODING_IS_ASCII 1414 | if (PyUnicode_IS_ASCII(o)) { 1415 | *length = PyUnicode_GET_LENGTH(o); 1416 | return PyUnicode_AsUTF8(o); 1417 | } else { 1418 | PyUnicode_AsASCIIString(o); 1419 | return NULL; 1420 | } 1421 | #else 1422 | return PyUnicode_AsUTF8AndSize(o, length); 1423 | #endif 1424 | #endif 1425 | } else 1426 | #endif 1427 | #if (!CYTHON_COMPILING_IN_PYPY) || (defined(PyByteArray_AS_STRING) && defined(PyByteArray_GET_SIZE)) 1428 | if (PyByteArray_Check(o)) { 1429 | *length = PyByteArray_GET_SIZE(o); 1430 | return PyByteArray_AS_STRING(o); 1431 | } else 1432 | #endif 1433 | { 1434 | char* result; 1435 | int r = PyBytes_AsStringAndSize(o, &result, length); 1436 | if (unlikely(r < 0)) { 1437 | return NULL; 1438 | } else { 1439 | return result; 1440 | } 1441 | } 1442 | } 1443 | static CYTHON_INLINE int __Pyx_PyObject_IsTrue(PyObject* x) { 1444 | int is_true = x == Py_True; 1445 | if (is_true | (x == Py_False) | (x == Py_None)) return is_true; 1446 | else return PyObject_IsTrue(x); 1447 | } 1448 | static CYTHON_INLINE PyObject* __Pyx_PyNumber_Int(PyObject* x) { 1449 | PyNumberMethods *m; 1450 | const char *name = NULL; 1451 | PyObject *res = NULL; 1452 | #if PY_MAJOR_VERSION < 3 1453 | if (PyInt_Check(x) || PyLong_Check(x)) 1454 | #else 1455 | if (PyLong_Check(x)) 1456 | #endif 1457 | return __Pyx_NewRef(x); 1458 | m = Py_TYPE(x)->tp_as_number; 1459 | #if PY_MAJOR_VERSION < 3 1460 | if (m && m->nb_int) { 1461 | name = "int"; 1462 | res = PyNumber_Int(x); 1463 | } 1464 | else if (m && m->nb_long) { 1465 | name = "long"; 1466 | res = PyNumber_Long(x); 1467 | } 1468 | #else 1469 | if (m && m->nb_int) { 1470 | name = "int"; 1471 | res = PyNumber_Long(x); 1472 | } 1473 | #endif 1474 | if (res) { 1475 | #if PY_MAJOR_VERSION < 3 1476 | if (!PyInt_Check(res) && !PyLong_Check(res)) { 1477 | #else 1478 | if (!PyLong_Check(res)) { 1479 | #endif 1480 | PyErr_Format(PyExc_TypeError, 1481 | "__%.4s__ returned non-%.4s (type %.200s)", 1482 | name, name, Py_TYPE(res)->tp_name); 1483 | Py_DECREF(res); 1484 | return NULL; 1485 | } 1486 | } 1487 | else if (!PyErr_Occurred()) { 1488 | PyErr_SetString(PyExc_TypeError, 1489 | "an integer is required"); 1490 | } 1491 | return res; 1492 | } 1493 | static CYTHON_INLINE Py_ssize_t __Pyx_PyIndex_AsSsize_t(PyObject* b) { 1494 | Py_ssize_t ival; 1495 | PyObject *x; 1496 | #if PY_MAJOR_VERSION < 3 1497 | if (likely(PyInt_CheckExact(b))) { 1498 | if (sizeof(Py_ssize_t) >= sizeof(long)) 1499 | return PyInt_AS_LONG(b); 1500 | else 1501 | return PyInt_AsSsize_t(x); 1502 | } 1503 | #endif 1504 | if (likely(PyLong_CheckExact(b))) { 1505 | #if CYTHON_USE_PYLONG_INTERNALS 1506 | const digit* digits = ((PyLongObject*)b)->ob_digit; 1507 | const Py_ssize_t size = Py_SIZE(b); 1508 | if (likely(__Pyx_sst_abs(size) <= 1)) { 1509 | ival = likely(size) ? digits[0] : 0; 1510 | if (size == -1) ival = -ival; 1511 | return ival; 1512 | } else { 1513 | switch (size) { 1514 | case 2: 1515 | if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) { 1516 | return (Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); 1517 | } 1518 | break; 1519 | case -2: 1520 | if (8 * sizeof(Py_ssize_t) > 2 * PyLong_SHIFT) { 1521 | return -(Py_ssize_t) (((((size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); 1522 | } 1523 | break; 1524 | case 3: 1525 | if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) { 1526 | return (Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); 1527 | } 1528 | break; 1529 | case -3: 1530 | if (8 * sizeof(Py_ssize_t) > 3 * PyLong_SHIFT) { 1531 | return -(Py_ssize_t) (((((((size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); 1532 | } 1533 | break; 1534 | case 4: 1535 | if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) { 1536 | return (Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); 1537 | } 1538 | break; 1539 | case -4: 1540 | if (8 * sizeof(Py_ssize_t) > 4 * PyLong_SHIFT) { 1541 | return -(Py_ssize_t) (((((((((size_t)digits[3]) << PyLong_SHIFT) | (size_t)digits[2]) << PyLong_SHIFT) | (size_t)digits[1]) << PyLong_SHIFT) | (size_t)digits[0])); 1542 | } 1543 | break; 1544 | } 1545 | } 1546 | #endif 1547 | return PyLong_AsSsize_t(b); 1548 | } 1549 | x = PyNumber_Index(b); 1550 | if (!x) return -1; 1551 | ival = PyInt_AsSsize_t(x); 1552 | Py_DECREF(x); 1553 | return ival; 1554 | } 1555 | static CYTHON_INLINE PyObject * __Pyx_PyInt_FromSize_t(size_t ival) { 1556 | return PyInt_FromSize_t(ival); 1557 | } 1558 | 1559 | 1560 | #endif /* Py_PYTHON_H */ 1561 | --------------------------------------------------------------------------------