├── test ├── pandokia_top └── shmht │ └── trivial.py ├── MANIFEST.in ├── ext_shmht ├── __init__.py ├── raw_performance_test.py ├── HashTable.py └── Cacher.py ├── .gitignore ├── shmht.notes.txt ├── hashtable.h ├── README.md ├── LICENSE ├── setup.py ├── shmht.c └── hashtable.c /test/pandokia_top: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include *.h 2 | include setup.py 3 | include README.md 4 | include LICENSE 5 | -------------------------------------------------------------------------------- /ext_shmht/__init__.py: -------------------------------------------------------------------------------- 1 | #!/bin/env python 2 | 3 | from HashTable import HashTable 4 | from Cacher import Cacher, MemCacher 5 | 6 | -------------------------------------------------------------------------------- /ext_shmht/raw_performance_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | #coding: utf-8 3 | 4 | import shmht 5 | import time 6 | 7 | capacity = 300000 8 | 9 | fd = shmht.open('/dev/shm/test.performance', capacity, 1) 10 | 11 | begin_time = time.time() 12 | for i in range(capacity): 13 | s = '%064d' % i 14 | shmht.setval(fd, s, s) 15 | end_time = time.time() 16 | print capacity / (end_time - begin_time), 'iops @ set' 17 | 18 | begin_timend_time = time.time() 19 | for i in range(capacity): 20 | s = '%064d' % i 21 | if s != shmht.getval(fd, s): 22 | raise Exception(s) 23 | end_time = time.time() 24 | print capacity / (end_time - begin_time), 'iops @ get' 25 | 26 | shmht.close(fd) 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | bin/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # Installer logs 26 | pip-log.txt 27 | pip-delete-this-directory.txt 28 | 29 | # Unit test / coverage reports 30 | htmlcov/ 31 | .tox/ 32 | .coverage 33 | .cache 34 | nosetests.xml 35 | coverage.xml 36 | 37 | # Translations 38 | *.mo 39 | 40 | # Mr Developer 41 | .mr.developer.cfg 42 | .project 43 | .pydevproject 44 | 45 | # Rope 46 | .ropeproject 47 | 48 | # Django stuff: 49 | *.log 50 | *.pot 51 | 52 | # Sphinx documentation 53 | docs/_build/ 54 | 55 | -------------------------------------------------------------------------------- /shmht.notes.txt: -------------------------------------------------------------------------------- 1 | hash tables 2 | 3 | max key size = 256 4 | max value size = 1024 5 | 6 | shmht.open( 7 | s|ii 8 | name 9 | file name 10 | capacity = 0 11 | min number of slots in hash table 12 | force_init = 0 13 | initialize even if initialized 14 | 15 | creates a file with a hash table in it 16 | 17 | returns an integer "ident" - hash table number 18 | 19 | shmht.close 20 | i 21 | idx 22 | number of the hash table to close 23 | 24 | shmht.getval 25 | is 26 | idx 27 | number of the hash table 28 | key 29 | string index of hash table element 30 | 31 | shmht.setval 32 | iss 33 | idx 34 | number of the hash table 35 | key 36 | string index of hash table element 37 | value 38 | string value of hash table element 39 | 40 | shmht.remove 41 | is 42 | idx 43 | number of the hash table 44 | key 45 | string index of hash table element 46 | 47 | 48 | shmht.foreach 49 | iO 50 | idx 51 | number of the hash table 52 | O 53 | callable to be called for each element 54 | called with key, value 55 | -------------------------------------------------------------------------------- /hashtable.h: -------------------------------------------------------------------------------- 1 | #ifndef __HASH_TABLE__ 2 | #define __HASH_TABLE__ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #define ALLOC(type, n) ((type *)malloc(sizeof(type) * n)) 12 | 13 | typedef struct __hashtable { 14 | unsigned magic; 15 | size_t ref_cnt, orig_capacity, capacity, size, flag_offset, bucket_offset; 16 | } hashtable; 17 | 18 | typedef unsigned u_int32; 19 | 20 | typedef struct _ht_str { 21 | u_int32 size; 22 | char str[1]; 23 | } ht_str; 24 | 25 | typedef struct _ht_iter { 26 | hashtable *ht; 27 | size_t pos; 28 | ht_str *key, *value; 29 | } ht_iter; 30 | 31 | typedef int BOOL; 32 | #define True 1 33 | #define False 0 34 | 35 | ht_iter* ht_get_iterator(hashtable *ht); 36 | int ht_iter_next(ht_iter* iter); 37 | 38 | size_t ht_memory_size(size_t capacity); 39 | hashtable* ht_init(void *base_addr, size_t capacity, int force_init); 40 | ht_str* ht_get(hashtable *ht, const char *key, u_int32 key_size); 41 | int ht_set(hashtable *ht, const char *key, u_int32 key_size, const char *value, u_int32 value_size); 42 | int ht_remove(hashtable *ht, const char *key, u_int32 key_size); 43 | int ht_destroy(hashtable *ht); 44 | 45 | int ht_is_valid(hashtable *ht); 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | pyshmht 2 | ======= 3 | 4 | **Sharing memory based** Hash Table extension for Python 5 | 6 | For examples, see test cases in python files (pyshmht/Cacher.py, pyshmht/HashTable.py), where you can find performance tests as well. 7 | 8 | Performance 9 | =========== 10 | 11 | capacity=200M, 64 bytes key/value tests, tested on (Xeon E5-2670 0 @ 2.60GHz, 128GB ram) 12 | 13 | * hashtable.c (raw hash table in c, tested on `malloc`ed memory) 14 | > set: 0.93 Million iops; 15 | > get: 2.35 Million iops; 16 | 17 | * performance\_test.py (raw python binding) 18 | > set: 451k iops; 19 | > get: 272k iops; 20 | 21 | * HashTable.py (simple wrapper, no serialization) 22 | > set: 354k iops; 23 | > get: 202k iops; 24 | 25 | * Cacher.py (cached wrapper, with serialization) 26 | > set: 501k iops (cached), 228k iops (after write\_back); 27 | > get: 560k iops (cached), 238k iops (no cache); 28 | 29 | * python native dict 30 | > set: 741k iops; 31 | > get: 390k iops; 32 | 33 | Notice 34 | ====== 35 | 36 | In hashtable.c, default max key length is `256 - 4`, max value length is `1024 - 4`; you can change `bucket_size` and `max_key_size` manually, but bear in mind that increasing these two arguments will result in larger memory consumption. 37 | 38 | If you find any bugs, please submit an issue or send me a pull request, I'll see to it ASAP :) 39 | 40 | p.s. `hashtable.c` is independent (i.e. has nothing to do with python), you can use it in other projects if needed. :P 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, Felix021 2 | All rights reserved. 3 | Copyright (c) 2015, Association of Universities for Research in Astronomy 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above copyright notice, 12 | this list of conditions and the following disclaimer in the documentation 13 | and/or other materials provided with the distribution. 14 | 15 | * Neither the name of the {organization} nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 27 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /test/shmht/trivial.py: -------------------------------------------------------------------------------- 1 | # using Pandokia - http://ssb.stsci.edu/testing/pandokia 2 | # 3 | # using this feature: 4 | # http://ssb.stsci.edu/testing/pandokia/docs_new/runner_minipyt.html#linear-execution-in-sequential-code-with-statement 5 | # 6 | import pandokia.helpers.pycode as pycode 7 | from pandokia.helpers.filecomp import safe_rm 8 | 9 | import shmht 10 | 11 | testfile = 'test_shmht.dat' 12 | 13 | safe_rm(testfile) 14 | 15 | with pycode.test('open-error') : 16 | 17 | try : 18 | ident = shmht.open( testfile ) 19 | 20 | except shmht.error as e : 21 | pass 22 | 23 | else : 24 | assert False, 'should have raised an exception' 25 | 26 | with pycode.test('open-init') : 27 | 28 | ident = shmht.open( testfile, 10 ) 29 | 30 | with pycode.test('insert-lookup') : 31 | 32 | shmht.setval( ident, 'arf', 'data for arf' ) 33 | assert shmht.getval( ident, 'arf' ) == 'data for arf' 34 | 35 | shmht.setval( ident, 'narf', 'data for narf' ) 36 | assert shmht.getval( ident, 'narf' ) == 'data for narf' 37 | 38 | assert shmht.getval( ident, 'arf' ) == 'data for arf' 39 | assert shmht.getval( ident, 'narf' ) == 'data for narf' 40 | 41 | with pycode.test('iter-small') : 42 | d = { } 43 | def collect( key, value ): 44 | d[key] = value 45 | 46 | shmht.foreach( ident, collect ) 47 | 48 | print d 49 | assert d == { 'arf' : 'data for arf', 'narf' : 'data for narf' } 50 | 51 | with pycode.test('remove') : 52 | shmht.remove( ident, 'arf' ) 53 | assert shmht.getval( ident, 'arf' ) == None 54 | assert shmht.getval( ident, 'narf' ) == 'data for narf' 55 | shmht.remove( ident, 'narf' ) 56 | assert shmht.getval( ident, 'arf' ) == None 57 | assert shmht.getval( ident, 'narf' ) == None 58 | 59 | def collect( key, value ): 60 | assert 0, 'somehow found %s : %s'% (key, value) 61 | 62 | shmht.foreach( ident, collect ) 63 | 64 | with pycode.test('fill') : 65 | for x in range(57): 66 | shmht.setval( ident, str(x), str(x)+' data' ) 67 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import os 3 | from distutils.core import setup, Extension 4 | 5 | #os.putenv("CFLAGS", "-g") 6 | 7 | shmht = Extension('ext_shmht/_shmht', 8 | sources = ['shmht.c', 'hashtable.c'] 9 | ) 10 | 11 | setup( 12 | name = 'ext_shmht', 13 | # minimal changing of the version 14 | version = '0.1', 15 | # not to claim credit for another's work, nor to unfairly attribute my errors 16 | author = '', 17 | author_email = '', 18 | description = 'shared memory hash table with locking', 19 | license = "BSD", 20 | keywords = "shared memory hash table shmem mmap", 21 | url = "http://github.com/stsci-sienkiew/pyshmht", 22 | ext_modules = [shmht], 23 | packages = ["ext_shmht"], 24 | long_description = """ 25 | An extended pyshmht - a simple hash table stored in an mmapped file 26 | 27 | The basic access is vaguely dict like with the core capability being: 28 | 29 | h = ext_shmht.HashTable( filename, max_entries ) 30 | h['key'] = 'value' 31 | v = h['key'] 32 | 33 | The table only uses strings for keys and values, but there is an 34 | interface that uses an object serializer, such as json or some other 35 | serializer that you provide. 36 | 37 | There is a max length of key and value that are specified by defines 38 | in the C code. 39 | 40 | extensions include: 41 | 42 | - file locking for multi-threaded or multi-process access 43 | n.b. do not use the same object in multiple threads - open the 44 | file again in each thread. 45 | 46 | - a little bit of documentation 47 | 48 | - a few test cases that run in Pandokia. See http://ssb.stsci.edu/testing/pandokia/ or 'pip install pandokia'. 49 | 50 | 51 | This is extended from pyshmht by felix021@gmail.com. My intent is 52 | to enhance the original for my needs, in a way that the changes may 53 | someday make a reasonable pull request into the original. It is a 54 | fork with a new name because I don't have time for the coordination 55 | with someone on the other side of the world right now. (No kidding! 56 | felix is in Shanghai and I am in Baltimore, separated by 160 degrees 57 | longitude, or 10 to 11 time zones.) 58 | """, 59 | 60 | ) 61 | -------------------------------------------------------------------------------- /ext_shmht/HashTable.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | #coding: utf-8 3 | 4 | import os 5 | from . import _shmht 6 | import marshal 7 | 8 | #basic wrapper: open, close, get, set, remove, foreach 9 | #extended wrapper: getobj, setobj, [], to_dict, update 10 | 11 | class HashTable(object): 12 | """ 13 | Simple hash table stored in shared memory. 14 | max open tables = 256 15 | (bug: fix this someday) 16 | string keys, max len = 256 17 | string data, max len = 1024 18 | (bug: make this settable per table someday) 19 | 20 | import pyshmht 21 | h = pyshmht.HashTable( filename, max_entries ) 22 | 23 | ## for string keys and data values only: 24 | 25 | h.put( 'key', 'data' ) 26 | h['key'] = 'data' 27 | # put string data 28 | 29 | s = h.get('key') 30 | s = h['key'] 31 | # returns string, or None if key not present 32 | 33 | d = h.to_dict() 34 | # returns dict copied from hash table 35 | 36 | h.remove('key') 37 | # removes key from hash table 38 | 39 | h.update(dict) 40 | # insert each element of dict 41 | 42 | print 'key' in h 43 | # 44 | 45 | h.close() 46 | 47 | ## for string key and non-string python objects 48 | h.getobj() and h.setobj() use a serializer to convert the object 49 | to a string for storage. 50 | 51 | """ 52 | def __init__(self, name, capacity=0, force_init=False, serializer=marshal, mkdirs=False): 53 | if mkdirs: 54 | try: 55 | d = os.path.dirname(name) 56 | os.makedirs(d) 57 | except OSError : 58 | pass 59 | force_init = 1 if force_init else 0 60 | self.fd = _shmht.open(name, capacity, force_init) 61 | self.loads = serializer.loads 62 | self.dumps = serializer.dumps 63 | 64 | def close(self): 65 | _shmht.close(self.fd) 66 | 67 | def get(self, key, default=None): 68 | val = _shmht.getval(self.fd, key) 69 | if val == None: 70 | return default 71 | return val 72 | 73 | def set(self, key, value): 74 | return _shmht.setval(self.fd, key, value) 75 | 76 | # "set" is a python data type, so use put() 77 | 78 | def put(self, key, value): 79 | return _shmht.setval(self.fd, key, value) 80 | 81 | def remove(self, key): 82 | return _shmht.remove(self.fd, key) 83 | 84 | def foreach(self, callback, unserialize=False): 85 | if not unserialize: 86 | cb = callback 87 | else: 88 | loads = self.loads 89 | def mcb(key, value): 90 | return callback(key, loads(value)) 91 | cb = mcb 92 | return _shmht.foreach(self.fd, cb) 93 | 94 | def getobj(self, key, default=None): 95 | val = self.get(key, default) 96 | if val == default: 97 | return default 98 | return self.loads(val) 99 | 100 | def setobj(self, key, val): 101 | val = self.dumps(val) 102 | return self.set(key, val) 103 | 104 | def __getitem__(self, key): 105 | val = _shmht.getval(self.fd, key) 106 | if val == None: 107 | raise KeyError(key) 108 | return val 109 | 110 | def __setitem__(self, key, value): 111 | return _shmht.setval(self.fd, key, value) 112 | 113 | def __delitem__(self, key): 114 | if False == _shmht.remove(self.fd, key): 115 | raise KeyError(key) 116 | 117 | def __contains__(self, key): 118 | return _shmht.getval(self.fd, key) != None 119 | 120 | def to_dict(self, unserialize=False): 121 | d = {} 122 | def insert(k,v): 123 | d[k] = v 124 | self.foreach(insert, unserialize) 125 | return d 126 | 127 | def update(self, d, serialize=False): 128 | dumps = self.dumps 129 | if serialize: 130 | for k in d: 131 | self[k] = dumps(d[k]) 132 | else: 133 | for k in d: 134 | self[k] = d[k] 135 | 136 | if __name__ == "__main__": 137 | loads = marshal.loads 138 | dumps = marshal.dumps 139 | #test cases 140 | ht = HashTable('/dev/shm/test.HashTable', 1024, 1) 141 | 142 | #set 143 | ht['a'] = '1' 144 | ht.set('b', '2') 145 | c = {'hello': 'world'} 146 | ht.setobj('c', c) 147 | 148 | #get 149 | print ht['b'] == '2' 150 | print ht['c'] == marshal.dumps(c) 151 | print ht.getobj('c') == c 152 | print ht.get('d') == None 153 | try: 154 | ht['d'] 155 | print False 156 | except: 157 | print True 158 | 159 | #contains 160 | print ('c' in ht) == True 161 | print ('d' in ht) == False 162 | 163 | #del 164 | del ht['c'] 165 | print ht.get('c') == None 166 | try: 167 | del ht['d'] 168 | print 'del:', False 169 | except: 170 | print True 171 | 172 | #update & to_dict & foreach 173 | ht.setobj('c', c) 174 | print ht.to_dict() == {'a': '1', 'b': '2', 'c': dumps(c)} 175 | 176 | s = '' 177 | def cb(key, value): 178 | global s 179 | s += key + str(value) 180 | ht.foreach(cb) 181 | print s == 'a1b2c' + dumps(c) 182 | 183 | ht.update({'a': 1, 'b': 2}, serialize=True) 184 | 185 | s = '' 186 | ht.foreach(cb, unserialize=True) 187 | print s == 'a1b2c' + str(c) 188 | 189 | print ht.to_dict() == {'a':dumps(1), 'b':dumps(2), 'c':dumps(c)} 190 | print ht.to_dict(unserialize=True) == {'a': 1, 'b': 2, 'c': c} 191 | 192 | #close 193 | ht.close() 194 | try: 195 | ht['a'] 196 | print False 197 | except: 198 | print True 199 | 200 | #simple performance test 201 | import time 202 | 203 | capacity = 300000 204 | 205 | #write_through 206 | ht = HashTable('/dev/shm/test.HashTable', capacity, True) 207 | 208 | begin_time = time.time() 209 | for i in range(capacity): 210 | s = '%064d' % i 211 | ht[s] = s 212 | end_time = time.time() 213 | print capacity / (end_time - begin_time), 'iops @ set' 214 | 215 | begin_timend_time = time.time() 216 | for i in range(capacity): 217 | s = '%064d' % i 218 | if s != ht[s]: 219 | raise Exception(s) 220 | end_time = time.time() 221 | print capacity / (end_time - begin_time), 'iops @ get' 222 | 223 | ht.close() 224 | 225 | -------------------------------------------------------------------------------- /ext_shmht/Cacher.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | #coding: utf-8 3 | 4 | import marshal 5 | import HashTable 6 | 7 | _debug = False 8 | 9 | class Cacher(object): 10 | """ 11 | Cacher: wrap HashTable with serializer and write_back mechanism 12 | if you intend to modify the cache, call write_back() before the program exits 13 | 14 | notice: 15 | Cacher tries to simulate dict in most cases, mainly except for: 16 | (a) no __iter__, please use foreach instead 17 | (b) key should always be a str, where dict allows all hashable objects 18 | (c) no comparation with other 'dict's 19 | When necessary, you can use .to_dict() to get a real dict object. 20 | """ 21 | def __init__(self, name, capacity=0, force_init=False, serializer=marshal): 22 | """ 23 | 'name' the path of the file to be 'mmap'ed 24 | use MemCacher(name, ...) to add prefix '/dev/shm' automatically 25 | 'capacity' optional, if you want to connect to an existing shmht 26 | 'serializer' should contain loads/dumps (marshal, json, pickle, etc.) 27 | """ 28 | 29 | self.ht = HashTable.HashTable(name, capacity, force_init, serializer) 30 | self.d = {} 31 | self.loads = serializer.loads 32 | self.dumps = serializer.dumps 33 | 34 | def __getitem__(self, key): 35 | d = self.d 36 | if key in d: 37 | val = d[key] 38 | else: 39 | val = self.loads(self.ht[key]) 40 | d[key] = val 41 | return val 42 | 43 | def __setitem__(self, key, val): 44 | self.d[key] = val 45 | 46 | def __delitem__(self, key): 47 | if key in self.d: 48 | del self.d[key] 49 | try: 50 | del self.ht[key] 51 | except: 52 | pass 53 | else: 54 | del self.d[key] 55 | 56 | def __contains__(self, key): #notice: key will be cached here 57 | return self.get(key) != None 58 | 59 | def get(self, key, default=None): 60 | try: 61 | return self.__getitem__(key) 62 | except: 63 | return default 64 | 65 | def update(self, dic): 66 | self.d.update(dic) 67 | 68 | def foreach(self, callback): 69 | self.write_back() 70 | return self.ht.foreach(callback, unserialize=True) 71 | 72 | def to_dict(self): 73 | self.write_back() 74 | return self.ht.to_dict(unserialize=True) 75 | 76 | def write_back(self): 77 | self.ht.update(self.d, serialize=True) 78 | 79 | def close(self): 80 | if self.d: 81 | global _debug 82 | if not _debug: 83 | self.write_back() #commented out for testing 84 | del self.d 85 | self.d = None 86 | if self.ht: 87 | self.ht.close() 88 | self.ht = None 89 | 90 | def __del__(self): 91 | """ 92 | don't rely on this, please call write_back() manually if necessary. 93 | """ 94 | self.close() 95 | 96 | def MemCacher(name, capacity=0, force_init=False, serializer=marshal): 97 | """ 98 | Add an prefix '/dev/shm/' to `name`, so that the file is saved only in memory 99 | For more information, see `help(Cacher)` 100 | """ 101 | name = '/dev/shm/' + name 102 | return Cacher(name, capacity, force_init, serializer) 103 | 104 | if __name__ == "__main__": 105 | #test cases 106 | ht = MemCacher('test.Cacher', 1024, True) 107 | print 'fd:', ht.ht.fd 108 | 109 | #set 110 | ht['a'] = '1' 111 | ht['b'] = 2 112 | c = {'hello': 'world'} 113 | ht['c'] = c 114 | 115 | #get 116 | print ht['b'] == 2 117 | print ht['c'] == c 118 | print ht.get('c') == c 119 | print ht.get('d') == None 120 | try: 121 | ht['d'] 122 | print False 123 | except: 124 | print True 125 | 126 | #contains 127 | print ('c' in ht) == True 128 | print ('d' in ht) == False 129 | 130 | #del 131 | del ht['c'] 132 | print ht.get('c') == None 133 | try: 134 | del ht['d'] 135 | print 'del:', False 136 | except: 137 | print True 138 | 139 | #update & to_dict & foreach 140 | dumps = marshal.dumps 141 | ht['c'] = c 142 | print ht.to_dict() == {'a': '1', 'b': 2, 'c': c} 143 | 144 | def cb(key, value): 145 | global s 146 | s += key + str(value) 147 | 148 | s = '' 149 | ht.foreach(cb) 150 | print s == 'a1b2c' + str(c) 151 | 152 | ht.update({'a': 'x', 'b': 1000}) 153 | 154 | s = '' 155 | ht.foreach(cb) 156 | print s == 'axb1000c' + str(c) 157 | 158 | print ht.to_dict() == {'a': 'x', 'b': 1000, 'c': c} 159 | 160 | #close 161 | ht.close() 162 | try: 163 | ht['a'] 164 | print False 165 | except: 166 | print True 167 | 168 | #write_back 169 | ht = MemCacher('test.Cacher', 1024, True) 170 | print 'fd:', ht.ht.fd 171 | ht['a'] = 1 172 | ht.write_back() 173 | ht['b'] = 2 174 | 175 | _debug = True 176 | ht.close() #write_back() is called in close() when not debugging 177 | 178 | ht = MemCacher('test.Cacher', 1024, False) 179 | print 'fd:', ht.ht.fd 180 | print ht['a'] == 1 181 | try: 182 | print ht['b'] 183 | print False 184 | except: 185 | print True 186 | ht.close() 187 | 188 | #simple performance test 189 | import time 190 | 191 | capacity = 300000 192 | 193 | ht = MemCacher('test.Cacher', capacity, force_init=True) 194 | begin_time = time.time() 195 | for i in range(capacity): 196 | s = '%064d' % i 197 | ht[s] = s 198 | end_time = time.time() 199 | print capacity / (end_time - begin_time), 'iops @ set / no write_back ' 200 | 201 | ht.write_back() 202 | end_time = time.time() 203 | print capacity / (end_time - begin_time), 'iops @ set / after write_back ' 204 | 205 | ht.d = {} 206 | begin_time = time.time() 207 | for i in range(capacity): 208 | s = '%064d' % i 209 | if s != ht[s]: 210 | raise Exception(s) 211 | end_time = time.time() 212 | print capacity / (end_time - begin_time), 'iops @ get / no cache ' 213 | 214 | begin_time = time.time() 215 | for i in range(capacity): 216 | s = '%064d' % i 217 | if s != ht[s]: 218 | raise Exception(s) 219 | end_time = time.time() 220 | print capacity / (end_time - begin_time), 'iops @ get / all cached ' 221 | 222 | ht.close() 223 | -------------------------------------------------------------------------------- /shmht.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | #include "hashtable.h" 13 | 14 | struct mapnode { 15 | int fd; 16 | size_t mem_size; 17 | hashtable *ht; 18 | }; 19 | 20 | #define max_ht_map_entries 2048 21 | static struct mapnode ht_map[max_ht_map_entries]; 22 | static int ht_idx = -1; 23 | 24 | static PyObject * shmht_open(PyObject *self, PyObject *args); 25 | static PyObject * shmht_close(PyObject *self, PyObject *args); 26 | static PyObject * shmht_getval(PyObject *self, PyObject *args); 27 | static PyObject * shmht_setval(PyObject *self, PyObject *args); 28 | static PyObject * shmht_remove(PyObject *self, PyObject *args); 29 | static PyObject * shmht_foreach(PyObject *self, PyObject *args); 30 | 31 | static PyObject *shmht_error; 32 | PyMODINIT_FUNC init_shmht(void); 33 | 34 | static PyMethodDef shmht_methods[] = { 35 | {"open", shmht_open, METH_VARARGS, "create a shared memory hash table"}, 36 | {"close", shmht_close, METH_VARARGS, ""}, 37 | {"getval", shmht_getval, METH_VARARGS, ""}, 38 | {"setval", shmht_setval, METH_VARARGS, ""}, 39 | {"remove", shmht_remove, METH_VARARGS, ""}, 40 | {"foreach", shmht_foreach, METH_VARARGS, ""}, 41 | {NULL, NULL, 0, NULL} 42 | }; 43 | 44 | // bug: half-assed file locking; I'm in a hurry at the moment. It 45 | // might make sense to separate read/write locks or even use file 46 | // regions, but there is no substitute for simplicity. 47 | static void mylock(fd) { 48 | flock(fd, LOCK_EX); 49 | // bug: not handling error condition 50 | } 51 | 52 | static void myunlock(fd) { 53 | flock(fd, LOCK_UN); 54 | // bug: not handling error condition 55 | } 56 | 57 | 58 | PyMODINIT_FUNC init_shmht(void) 59 | { 60 | PyObject *m = Py_InitModule("ext_shmht._shmht", shmht_methods); 61 | if (m == NULL) 62 | return; 63 | 64 | shmht_error = PyErr_NewException("ext_shmht._shmht.error", NULL, NULL); 65 | Py_INCREF(shmht_error); 66 | PyModule_AddObject(m, "error", shmht_error); 67 | 68 | bzero(ht_map, sizeof(ht_map)); 69 | } 70 | 71 | static PyObject * shmht_open(PyObject *self, PyObject *args) 72 | { 73 | int fd = 0; 74 | size_t mem_size = 0; 75 | hashtable *ht = NULL; 76 | 77 | const char *name; 78 | size_t i_capacity = 0; 79 | int force_init = 0; 80 | if (!PyArg_ParseTuple(args, "s|ii:shmht.create", &name, &i_capacity, &force_init)) 81 | return NULL; 82 | 83 | size_t capacity = i_capacity; 84 | 85 | fd = open(name, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR); 86 | if (fd < 0) { 87 | PyErr_Format(shmht_error, "open file(%s) failed: [%d] %s", name, errno, strerror(errno)); 88 | return NULL; 89 | } 90 | 91 | mylock(fd); 92 | 93 | struct stat buf; 94 | fstat(fd, &buf); 95 | 96 | if (force_init == 0) { //try to load from existing shmht 97 | mem_size = sizeof(hashtable); 98 | if (buf.st_size >= sizeof(hashtable)) { //may be valid 99 | ht = mmap(NULL, sizeof(hashtable), PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 100 | if (ht == MAP_FAILED) { 101 | PyErr_Format(shmht_error, "mmap failed, map_size=sizeof(hashtable)=%lu: [%d] %s", 102 | mem_size, errno, strerror(errno)); 103 | goto create_failed; 104 | } 105 | 106 | if (ht_is_valid(ht)) { 107 | // may not ask for larger capacity than is already in file 108 | if (capacity != 0 && capacity > ht->orig_capacity) { 109 | PyErr_Format(shmht_error, "file has smaller capacity than requested (req %d, have %d); specify force_init=1 to overwrite an existing shmht", (int)capacity, (int)ht->orig_capacity); 110 | goto create_failed; 111 | } 112 | capacity = ht->orig_capacity; //loaded capacity 113 | } 114 | munmap(ht, sizeof(hashtable)); 115 | ht = NULL; 116 | } 117 | } 118 | 119 | if (capacity == 0) { 120 | PyErr_Format(shmht_error, "please specify 'capacity' when you try to create a shmht"); 121 | goto create_failed; 122 | } 123 | 124 | mem_size = ht_memory_size(capacity); 125 | 126 | if (buf.st_size < mem_size) { 127 | if (lseek(fd, mem_size - 1, SEEK_SET) == -1) { 128 | PyErr_Format(shmht_error, "lseek failed: [%d] %s", errno, strerror(errno)); 129 | goto create_failed; 130 | } 131 | char t = 0; 132 | if (write(fd, &t, 1) == -1) { 133 | PyErr_Format(shmht_error, "write failed: [%d] %s", errno, strerror(errno)); 134 | goto create_failed; 135 | } 136 | } 137 | 138 | ht = mmap(NULL, mem_size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); 139 | if (ht == MAP_FAILED) { 140 | PyErr_Format(shmht_error, "mmap failed, mem_size=%lu: [%d] %s", 141 | mem_size, errno, strerror(errno)); 142 | goto create_failed; 143 | } 144 | 145 | ht_init(ht, capacity, force_init); 146 | int count; 147 | for (count = 0; count < max_ht_map_entries; count++) 148 | { 149 | ht_idx = (ht_idx + 1) % max_ht_map_entries; 150 | count += 1; 151 | if (ht_map[ht_idx].ht == NULL) 152 | break; 153 | } 154 | if (count >= max_ht_map_entries) { 155 | PyErr_Format(shmht_error, "exceeded max_ht_map_entries(%d) in one process", max_ht_map_entries); 156 | goto create_failed; 157 | } 158 | ht_map[ht_idx].fd = fd; 159 | ht_map[ht_idx].mem_size = mem_size; 160 | ht_map[ht_idx].ht = ht; 161 | 162 | myunlock(fd); 163 | return PyInt_FromLong(ht_idx); 164 | 165 | create_failed: 166 | if (fd >= 0) { 167 | myunlock(fd); 168 | close(fd); 169 | } 170 | if (ht != NULL) 171 | munmap(ht, mem_size); 172 | return NULL; 173 | } 174 | 175 | static PyObject * shmht_close(PyObject *self, PyObject *args) 176 | { 177 | int idx; 178 | if (!PyArg_ParseTuple(args, "i:shmht.create", &idx)) 179 | return NULL; 180 | 181 | if (idx < 0 || idx >= max_ht_map_entries || ht_map[idx].ht == NULL) { 182 | PyErr_Format(shmht_error, "invalid ht id: (%d)", idx); 183 | return NULL; 184 | } 185 | 186 | hashtable *ht = ht_map[idx].ht; 187 | 188 | size_t ref_cnt = ht_destroy(ht); 189 | 190 | if (munmap(ht, ht_map[idx].mem_size) != 0) { 191 | PyErr_Format(shmht_error, "munmap failed: [%d] %s", errno, strerror(errno)); 192 | //return NULL; 193 | } 194 | 195 | // Do not delete the mapping file - somebody else might still 196 | // want it. If the application knows that the shared memory 197 | // should not persist, it can delete the file. 198 | 199 | close(ht_map[idx].fd); 200 | 201 | memset(&ht_map[idx], 0, sizeof(struct mapnode)); 202 | 203 | Py_RETURN_TRUE; 204 | } 205 | 206 | static PyObject * shmht_getval(PyObject *self, PyObject *args) 207 | { 208 | int idx, key_size; 209 | const char *key; 210 | PyObject * return_value; 211 | 212 | if (!PyArg_ParseTuple(args, "is#:shmht.getval", &idx, &key, &key_size)) 213 | return NULL; 214 | 215 | if (idx < 0 || idx >= max_ht_map_entries || ht_map[idx].ht == NULL) { 216 | PyErr_Format(shmht_error, "invalid ht id: (%d)", idx); 217 | return NULL; 218 | } 219 | 220 | mylock(ht_map[idx].fd); 221 | 222 | hashtable *ht = ht_map[idx].ht; 223 | 224 | ht_str* value = ht_get(ht, key, key_size); 225 | if (value == NULL) { 226 | myunlock(ht_map[idx].fd); 227 | Py_RETURN_NONE; 228 | } 229 | 230 | myunlock(ht_map[idx].fd); 231 | return PyString_FromStringAndSize(value->str, value->size); 232 | } 233 | 234 | static PyObject * shmht_setval(PyObject *self, PyObject *args) 235 | { 236 | int idx, key_size, value_size; 237 | const char *key, *value; 238 | if (!PyArg_ParseTuple(args, "is#s#:shmht.setval", &idx, &key, &key_size, &value, &value_size)) { 239 | return NULL; 240 | } 241 | 242 | if (idx < 0 || idx >= max_ht_map_entries || ht_map[idx].ht == NULL) { 243 | PyErr_Format(shmht_error, "invalid ht id: (%d)", idx); 244 | return NULL; 245 | } 246 | 247 | hashtable *ht = ht_map[idx].ht; 248 | 249 | mylock(ht_map[idx].fd); 250 | 251 | int result = ht_set(ht, key, key_size, value, value_size); 252 | 253 | myunlock(ht_map[idx].fd); 254 | 255 | if (result == False ) { 256 | PyErr_Format(shmht_error, "insert failed for key(%s)", key); 257 | return NULL; 258 | } 259 | 260 | Py_RETURN_TRUE; 261 | } 262 | 263 | static PyObject * shmht_remove(PyObject *self, PyObject *args) 264 | { 265 | int idx, key_size; 266 | const char *key; 267 | if (!PyArg_ParseTuple(args, "is#:shmht.remove", &idx, &key, &key_size)) 268 | return NULL; 269 | 270 | if (idx < 0 || idx >= max_ht_map_entries || ht_map[idx].ht == NULL) { 271 | PyErr_Format(shmht_error, "invalid ht id: (%d)", idx); 272 | return NULL; 273 | } 274 | 275 | hashtable *ht = ht_map[idx].ht; 276 | mylock(ht_map[idx].fd); 277 | 278 | int result = ht_remove(ht, key, key_size); 279 | 280 | myunlock(ht_map[idx].fd); 281 | 282 | if ( result == False) 283 | Py_RETURN_FALSE; 284 | else 285 | Py_RETURN_TRUE; 286 | } 287 | 288 | static PyObject * shmht_foreach(PyObject *self, PyObject *args) 289 | { 290 | int idx; 291 | static PyObject *cb = NULL; 292 | 293 | if (!PyArg_ParseTuple(args, "iO:shmht.foreach", &idx, &cb)) 294 | return NULL; 295 | 296 | if (idx < 0 || idx >= max_ht_map_entries || ht_map[idx].ht == NULL) { 297 | PyErr_Format(shmht_error, "invalid ht id: (%d)", idx); 298 | return NULL; 299 | } 300 | 301 | if (!PyCallable_Check(cb)) { 302 | PyErr_SetString(PyExc_TypeError, "parameter must be callable"); 303 | return NULL; 304 | } 305 | 306 | 307 | hashtable *ht = ht_map[idx].ht; 308 | ht_iter *iter = ht_get_iterator(ht); 309 | 310 | mylock(ht_map[idx].fd); 311 | while (ht_iter_next(iter)) { 312 | ht_str *key = iter->key, *value = iter->value; 313 | PyObject *arglist = Py_BuildValue("(s#s#)", key->str, key->size, value->str, value->size); 314 | PyEval_CallObject(cb, arglist); 315 | Py_DECREF(arglist); 316 | } 317 | myunlock(ht_map[idx].fd); 318 | 319 | free(iter); 320 | 321 | Py_RETURN_NONE; 322 | } 323 | 324 | 325 | // TODO: add an msync() operation. see https://docs.python.org/2/c-api/init.html#thread-state-and-the-global-interpreter-lock for releasing the GIL during blocking I/O 326 | // TODO: add a find_slot() / put_slot_data() operation, so you don't need to hash the key again when you use the same key repeatedly 327 | -------------------------------------------------------------------------------- /hashtable.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | #include "hashtable.h" 15 | 16 | #ifdef __cplusplus 17 | } 18 | #endif 19 | 20 | #define ht_flag_base(ht) ((char *)(ht) + (ht)->flag_offset) 21 | #define ht_bucket_base(ht) ((char *)(ht) + (ht)->bucket_offset) 22 | 23 | static const unsigned ht_magic = 0xBFBF; 24 | 25 | enum bucket_flag { 26 | empty = 0, used = 1, removed = 2 27 | }; 28 | 29 | size_t header_size = 1024; 30 | 31 | #define bucket_size 1280 32 | #define max_key_size 256 33 | #define max_value_size (bucket_size - max_key_size) 34 | 35 | const float max_load_factor = 0.65; 36 | 37 | static const unsigned int primes[] = { 38 | 53, 97, 193, 389, 39 | 769, 1543, 3079, 6151, 40 | 12289, 24593, 49157, 98317, 41 | 196613, 393241, 786433, 1572869, 42 | 3145739, 6291469, 12582917, 25165843, 43 | 50331653, 100663319, 201326611, 402653189, 44 | 805306457, 1610612741 45 | }; 46 | static const unsigned int prime_table_length = sizeof (primes) / sizeof (primes[0]); 47 | 48 | static inline void fill_ht_str(ht_str *s, const char *str, const u_int32 size) { 49 | s->size = size; 50 | memcpy(s->str, str, size); 51 | } 52 | 53 | static unsigned int ht_get_prime_by(size_t capacity) { 54 | unsigned i = 0; 55 | capacity *= 2; 56 | for (i = 0; i < prime_table_length; i++) { 57 | if (primes[i] > capacity) 58 | return primes[i]; 59 | } 60 | return 0; 61 | } 62 | 63 | size_t ht_memory_size(size_t capacity) { 64 | const int flag_size = 1; //char 65 | size_t aligned_capacity = (ht_get_prime_by(capacity) / 4 + 1) * 4; //round up to 4-byte alignment 66 | return header_size //header 67 | + flag_size * aligned_capacity //flag 68 | + bucket_size * aligned_capacity; //bucket 69 | } 70 | 71 | /*dbj2_hash function (copied from libshmht)*/ 72 | static unsigned int dbj2_hash (const char *str, size_t size) { 73 | unsigned long hash = 5381; 74 | while (size--) { 75 | char c = *str++; 76 | hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ 77 | } 78 | return (unsigned int) hash; 79 | } 80 | 81 | BOOL is_equal(const char *a, size_t asize, const char *b, size_t bsize) { 82 | if (asize != bsize) 83 | return False; 84 | return strncmp(a, b, asize) ? False : True; 85 | } 86 | 87 | int ht_is_valid(hashtable *ht) { 88 | return (ht->magic == ht_magic); 89 | } 90 | 91 | /* 92 | * The caller is responsible for the 4-byte alignment of base_addr 93 | * and the size of base_addr should be no less than ht_get_prime_by(capacity) 94 | */ 95 | hashtable* ht_init(void *base_addr, size_t capacity, int force_init) { 96 | hashtable* ht = (hashtable *)base_addr; 97 | if (force_init || !ht_is_valid(ht)) { 98 | ht->magic = ht_magic; 99 | ht->ref_cnt = 0; 100 | 101 | ht->orig_capacity = capacity; 102 | ht->capacity = ht_get_prime_by(capacity); 103 | ht->size = 0; 104 | 105 | ht->flag_offset = header_size; 106 | ht->bucket_offset = ht->flag_offset + (ht->capacity / 4 + 1) * 4; //alignment 107 | 108 | bzero(ht_flag_base(ht), ht->capacity); 109 | } 110 | ht->ref_cnt += 1; 111 | return ht; 112 | } 113 | 114 | static size_t ht_position(hashtable *ht, const char *key, u_int32 key_size, BOOL treat_removed_as_empty) { 115 | char *flag_base = ht_flag_base(ht); 116 | char *bucket_base = ht_bucket_base(ht); 117 | size_t capacity = ht->capacity; 118 | unsigned long hval = dbj2_hash(key, key_size) % capacity; 119 | 120 | size_t i = hval, di = 1; 121 | while (True) { 122 | if (flag_base[i] == empty) 123 | break; 124 | if (flag_base[i] == removed && treat_removed_as_empty) 125 | break; 126 | if (flag_base[i] == used) 127 | { 128 | char *bucket = bucket_base + i * bucket_size; 129 | ht_str* bucket_key = (ht_str *)bucket; 130 | if (is_equal(key, key_size, bucket_key->str, bucket_key->size)) { 131 | break; 132 | } 133 | } 134 | i = (i + di) % capacity; 135 | di++; 136 | if (i == hval) { 137 | //extreme condition: when all flags are 'removed' 138 | bzero(flag_base, capacity); 139 | break; 140 | } 141 | } 142 | return i; 143 | } 144 | 145 | ht_str* ht_get(hashtable *ht, const char *key, u_int32 key_size) { 146 | size_t i = ht_position(ht, key, key_size, False); //'removed' bucket is not 'empty' when searching a chain. 147 | if (ht_flag_base(ht)[i] != used) { 148 | return NULL; 149 | } 150 | char *bucket = ht_bucket_base(ht) + i * bucket_size; 151 | return (ht_str*)(bucket + max_key_size); 152 | } 153 | 154 | int ht_set(hashtable *ht, const char *key, u_int32 key_size, const char *value, u_int32 value_size) { 155 | if (sizeof(u_int32) + key_size >= max_key_size || sizeof(u_int32) + value_size >= max_value_size) { 156 | //the item is too large 157 | fprintf(stderr, "the item is too large: key_size(%u), value(%u)\n", key_size, value_size); 158 | return False; 159 | } 160 | 161 | char *flag_base = ht_flag_base(ht); 162 | char *bucket_base = ht_bucket_base(ht); 163 | 164 | ht_str *bucket_key = NULL, *bucket_value = NULL; 165 | 166 | //if it exists: just find and modify it's value 167 | bucket_value = ht_get(ht, key, key_size); 168 | if (bucket_value) { 169 | fill_ht_str(bucket_value, value, value_size); 170 | return True; 171 | } 172 | 173 | //else: find an available bucket, which can be both 'empty' or 'removed' 174 | size_t i = ht_position(ht, key, key_size, True); 175 | 176 | if (ht->capacity * max_load_factor < ht->size) { 177 | //hash table is over loaded 178 | fprintf(stderr, "hash table is over loaded, capacity=%lu, size=%lu\n", ht->capacity, ht->size); 179 | return False; 180 | } 181 | 182 | ht->size += 1; 183 | flag_base[i] = used; 184 | 185 | char *bucket = bucket_base + i * bucket_size; 186 | bucket_key = (ht_str*)bucket; 187 | bucket_value = (ht_str*)(bucket + max_key_size); 188 | fill_ht_str(bucket_key, key, key_size); 189 | fill_ht_str(bucket_value, value, value_size); 190 | return True; 191 | } 192 | 193 | int ht_remove(hashtable *ht, const char *key, u_int32 key_size) { 194 | size_t i = ht_position(ht, key, key_size, False); //'removed' bucket is not 'empty' when searching a chain. 195 | if (ht_flag_base(ht)[i] != used) { 196 | return False; 197 | } 198 | ht_flag_base(ht)[i] = removed; 199 | ht->size -= 1; 200 | return True; 201 | } 202 | 203 | //don't forget to free(ht_iter) 204 | ht_iter* ht_get_iterator(hashtable *ht) { 205 | ht_iter* iter = ALLOC(ht_iter, 1); 206 | assert(iter != NULL); 207 | iter->ht = ht; 208 | iter->pos = -1; 209 | return iter; 210 | } 211 | 212 | int ht_iter_next(ht_iter* iter) { 213 | size_t i = 0; 214 | hashtable *ht = iter->ht; 215 | char *flag_base = ht_flag_base(ht); 216 | char *bucket_base = ht_bucket_base(ht); 217 | 218 | for (i = iter->pos + 1; i < ht->capacity; i++) { 219 | if (flag_base[i] == used) { 220 | char *bucket = bucket_base + i * bucket_size; 221 | iter->key = (ht_str*)bucket, iter->value = (ht_str*)(bucket + max_key_size); 222 | iter->pos = i; 223 | return True; 224 | } 225 | } 226 | return False; 227 | } 228 | 229 | int ht_destroy(hashtable *ht) { 230 | ht->ref_cnt -= 1; 231 | return ht->ref_cnt == 0 ? True : False; 232 | } 233 | 234 | /* 235 | 236 | //commented out together with 'main' to eliminate compiler's complaint 237 | static void dump_ht_str(ht_str *s) { 238 | if (s) { 239 | printf("%u: %*s\n", s->size, (int)s->size, s->str); 240 | } 241 | else { 242 | printf("(nil)\n"); 243 | } 244 | } 245 | 246 | int main() { 247 | size_t capacity = 500000; 248 | printf("%u\n", ht_get_prime_by(capacity)); 249 | printf("%lu\n", ht_memory_size(capacity)); 250 | void *mem = malloc(ht_memory_size(capacity) + 1); 251 | hashtable *ht = ht_init(mem, capacity, 0); 252 | 253 | ht_set(ht, "hello", 5, "-----", 5); 254 | ht_set(ht, "hello1", 6, "hello1", 6); 255 | ht_set(ht, "hello", 5, "hello", 5); 256 | ht_remove(ht, "hello", 5); 257 | 258 | ht_str* s = NULL; 259 | 260 | s = ht_get(ht, "hello", 5); 261 | dump_ht_str(s); 262 | 263 | s = ht_get(ht, "hello1", 6); 264 | dump_ht_str(s); 265 | 266 | ht_set(ht, "a", 1, "a", 1); 267 | ht_set(ht, "b", 1, "b", 1); 268 | ht_set(ht, "c", 1, "c", 1); 269 | ht_set(ht, "d", 1, "d", 1); 270 | printf("ht->size: %lu\n", ht->size); 271 | 272 | ht_remove(ht, "c", 1); 273 | 274 | hashtable* ht1 = ht_init(mem, capacity, 0); 275 | 276 | ht_iter* iter = ht_get_iterator(ht1); 277 | while (ht_iter_next(iter)) { 278 | ht_str *key = iter->key, *value = iter->value; 279 | printf("%*s => %*s\n", (int)key->size, key->str, (int)value->size, value->str); 280 | } 281 | free(iter); 282 | printf("ht_get_iterator test ok\n"); 283 | 284 | char x[128]; 285 | int i, len; 286 | struct timeval begin, end; 287 | #define ts(tv) (tv.tv_sec + tv.tv_usec / 1000000.0) 288 | 289 | gettimeofday(&begin, NULL); 290 | for (i = 0; i < (int)capacity; i++) { 291 | len = sprintf(x, "%064d", i); 292 | if (ht_set(ht, x, len, x, len) == 0) { 293 | printf("set wrong @ %d\n", i); 294 | return 1; 295 | } 296 | } 297 | gettimeofday(&end, NULL); 298 | printf("set test: %.0lf iops\n", capacity / (ts(end) - ts(begin))); 299 | 300 | gettimeofday(&begin, NULL); 301 | for (i = 0; i < (int)capacity; i++) { 302 | len = sprintf(x, "%064d", i); 303 | ht_str* val = ht_get(ht, x, len); 304 | if (val == NULL || !is_equal(x, len, val->str, val->size)) { 305 | printf("(after set)get wrong @ %d\n", i); 306 | return 1; 307 | } 308 | } 309 | gettimeofday(&end, NULL); 310 | printf("get test: %.0lf iops\n", capacity / (ts(end) - ts(begin))); 311 | 312 | for (i = 0; i < (int)capacity; i += 2) { 313 | len = sprintf(x, "%064d", i); 314 | if (ht_remove(ht, x, len) == 0) { 315 | printf("remove wrong @ %d\n", i); 316 | return 1; 317 | } 318 | len = sprintf(x, "%064d", i + 1); 319 | ht_str* val = ht_get(ht, x, len); 320 | if (val == NULL || !is_equal(x, len, val->str, val->size)) { 321 | printf("(after remove)get wrong @ %d\n", i); 322 | return 1; 323 | } 324 | } 325 | printf("remove/get test ok\n"); 326 | 327 | //while(1) sleep(1000); 328 | return 0; 329 | } 330 | // */ 331 | --------------------------------------------------------------------------------