├── .codelite ├── compilation.db ├── cppcheck.list ├── refactoring.db ├── sftp-workspace-settings.conf ├── subversion.conf ├── test_hash.session ├── test_hash.tags ├── test_hash.workspace.christiaan ├── test_hash.workspace.pretoric └── tweaks.conf ├── COPYING ├── Makefile ├── README.txt ├── rabbit.mk ├── rabbit.txt ├── rabbit ├── int_string.h ├── rabbit_map.h ├── rabbit_set.h ├── unordered_map └── unordered_set ├── rabbit_tests ├── main.cpp ├── rabbit_tests.cbp ├── rabbit_tests.depend ├── rabbit_tests.layout └── time_hash_maps.cpp ├── test_hash.mk ├── test_hash.project ├── test_hash.txt ├── test_hash.workspace └── vs ├── vs.sln ├── vs.v11.suo └── vs ├── ReadMe.txt ├── vs.vcxproj └── vs.vcxproj.filters /.codelite/compilation.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tjizep/rabbit/e11c23f3b2de1bf3a387cefedccfa5ec1686a83f/.codelite/compilation.db -------------------------------------------------------------------------------- /.codelite/cppcheck.list: -------------------------------------------------------------------------------- 1 | c:\dev\test_hash\main.cpp 2 | -------------------------------------------------------------------------------- /.codelite/refactoring.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tjizep/rabbit/e11c23f3b2de1bf3a387cefedccfa5ec1686a83f/.codelite/refactoring.db -------------------------------------------------------------------------------- /.codelite/sftp-workspace-settings.conf: -------------------------------------------------------------------------------- 1 | { 2 | } -------------------------------------------------------------------------------- /.codelite/subversion.conf: -------------------------------------------------------------------------------- 1 | { 2 | "svn-settings": { 3 | "m_repoPath": "C:\\dev\\cpp_all\\rabbit" 4 | } 5 | } -------------------------------------------------------------------------------- /.codelite/test_hash.session: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /.codelite/test_hash.tags: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tjizep/rabbit/e11c23f3b2de1bf3a387cefedccfa5ec1686a83f/.codelite/test_hash.tags -------------------------------------------------------------------------------- /.codelite/test_hash.workspace.christiaan: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /.codelite/test_hash.workspace.pretoric: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /.codelite/tweaks.conf: -------------------------------------------------------------------------------- 1 | { 2 | } -------------------------------------------------------------------------------- /COPYING: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Christiaan Pretorius 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean All 2 | 3 | All: 4 | @echo "----------Building project:[ rabbit - Release ]----------" 5 | @$(MAKE) -f "rabbit.mk" 6 | clean: 7 | @echo "----------Cleaning project:[ rabbit - Release ]----------" 8 | @$(MAKE) -f "rabbit.mk" clean 9 | -------------------------------------------------------------------------------- /README.txt: -------------------------------------------------------------------------------- 1 | # rabbit v 1.2 r2 2 | stl compatible hashtable (rabbit::unordered_map or rabbit::sparse_unordered_map) 3 | 4 | Using: 5 | ------------------------------------------------------------------ 6 | 7 | #include 8 | 9 | void rabbits(){ 10 | rabbit::unordered_map int_map; 11 | int_map.insert(0,1); 12 | if(int_map[0] == 1){ 13 | /// xyz 14 | } 15 | ... 16 | rabbit::sparse_unordered_map sparse_int_map; 17 | int_map.insert(0,1); 18 | if(int_map[0] == 1){ 19 | /// xyz 20 | } 21 | } 22 | 23 | Advantages: 24 | ----------- 25 | 26 | 1. Very Fast and sometimes small or just Fast and Very Small when set_min_load_factor(> 0.7),set_logarithmic(>=4) 27 | you can also use rabbit::sparse_unordered_map to get the same effect 28 | 2. Strong guarantees for hash table size in sparse mode 29 | i.e. Sparse version of hash table is close to the size of google sparse hash 30 | even though it has a step shaped memory use curve 31 | 3. Std api compatible with stl 32 | 4. sparseness can be dialled in dynamically when need arises - only effective after rehash (use set_logarithmic(1..32)) 33 | 34 | Disadvantages 35 | ------------- 36 | 37 | If a rehash takes place during iteration (because of inserts during iteration) the iterator becomes 38 | invalid. It wont crash but it might skip previously added elements. 39 | Best is to rehash to aproximate future size before starting iteration which will cause 40 | inserts. Erases and updates are stable. 41 | 42 | Algorithm Description 43 | --------------------- 44 | 45 | rabbit is both a closed *and* openly addressed hash table. 46 | 47 | Open addressing part 48 | -------------------- 49 | 50 | Keys are located via a truncated linear probe of constant length in case of the dense version. 51 | The linear probe is logarithmically related to the hash size when the sparse flag is set 52 | with the set_logarithmic(>=1) function. 53 | 54 | Rabbit maintains each key associated with two bits seperately. 55 | The first bit is for a keys existence and a second bit is for a collision indicator. 56 | The collision indicator removes the need to search for non existing keys which is a 57 | problem in the standard linear probing algorithm. 58 | 59 | The bits, key value pairs are each stored in separate arrays to provide better CPU cache 60 | behaviour. For instance the existence bits will stay in cache longer so that memory access 61 | to these structures are reduced. 62 | 63 | Closed addressing 64 | ----------------- 65 | 66 | At the end of the key array rabbit also maintains a single bucket. If any key is inserted and 67 | a open slot is not found within the current probe length it is added here. This bucket is a 68 | accessed like a stack although removing items in the middle will not reduce its height. 69 | items are added at the back. 70 | 71 | In the semi dense variation of the algoritm the size of this bucket is maintained at a constant 72 | factor. In the sparse version the single bucket size is a logarithmically increasing number. 73 | 74 | Once the single bucket is full a rehash is performed on a new table with twice as many keys. 75 | In case of the sparse table a load factor of ~0.75 is maintained. 76 | 77 | Safety 78 | ------ 79 | 80 | A randomization protocol is activated when the table rehashes and a minimum load factor is not 81 | reached. 82 | 83 | Changes to algorithm for 1.2 (Minimum load factor) 84 | -------------------------------------------------- 85 | 86 | The probe length is not constant anymore but changes (increases) when keys are added to the bucket 87 | while the minimum load factor value is not reached. A set_min_load_factor(x e (0,1])) function is added. 88 | This can be used to optimize for speed or memory use. 89 | The min. load factor is defaulted to 0.5. 90 | This results in much less variation in memory use while not affecting performance. 91 | Values around 0.25 give high read performance and slightly slower growth while using more memory, 92 | around 0.7 will give much better growth and slightly slower random read while using about half 93 | the memory. 94 | 95 | Experimentation also revealed that the hash table is much less sensitive to bad hash functions after this 96 | change. 97 | 98 | Note 99 | ---- 100 | 101 | The previous version stored keys and values separately which reduced memory use when 102 | sizeof(key)+sizeof(value) < sizeof(std::pair). This behaviour isn't 103 | available anymore as the penalty for random read access is usually too high. 104 | -------------------------------------------------------------------------------- /rabbit.mk: -------------------------------------------------------------------------------- 1 | ## 2 | ## Auto Generated makefile by CodeLite IDE 3 | ## any manual changes will be erased 4 | ## 5 | ## Release 6 | ProjectName :=rabbit 7 | ConfigurationName :=Release 8 | WorkspacePath := "C:\dev\cpp_all\rabbit" 9 | ProjectPath := "C:\dev\cpp_all\rabbit" 10 | IntermediateDirectory :=./Release 11 | OutDir := $(IntermediateDirectory) 12 | CurrentFileName := 13 | CurrentFilePath := 14 | CurrentFileFullPath := 15 | User :=christiaan 16 | Date :=06/19/15 17 | CodeLitePath :="C:\Program Files\CodeLite" 18 | LinkerName :=C:/TDM-GCC-64/bin/g++.exe 19 | SharedObjectLinkerName :=C:/TDM-GCC-64/bin/g++.exe -shared -fPIC 20 | ObjectSuffix :=.o 21 | DependSuffix :=.o.d 22 | PreprocessSuffix :=.i 23 | DebugSwitch :=-g 24 | IncludeSwitch :=-I 25 | LibrarySwitch :=-l 26 | OutputSwitch :=-o 27 | LibraryPathSwitch :=-L 28 | PreprocessorSwitch :=-D 29 | SourceSwitch :=-c 30 | OutputFile :=$(IntermediateDirectory)/$(ProjectName) 31 | Preprocessors :=$(PreprocessorSwitch)NDEBUG 32 | ObjectSwitch :=-o 33 | ArchiveOutputSwitch := 34 | PreprocessOnlySwitch :=-E 35 | ObjectsFileList :="rabbit.txt" 36 | PCHCompileFlags := 37 | MakeDirCommand :=makedir 38 | RcCmpOptions := 39 | RcCompilerName :=C:/TDM-GCC-64/bin/windres.exe 40 | LinkOptions := 41 | IncludePath := $(IncludeSwitch). $(IncludeSwitch). $(IncludeSwitch)C:/dev/cpp_all/repo/sparsehash-2.0.2/src/windows $(IncludeSwitch)C:/dev/cpp_all/repo/sparsehash-2.0.2/src 42 | IncludePCH := 43 | RcIncludePath := 44 | Libs := $(LibrarySwitch)psapi 45 | ArLibs := "psapi" 46 | LibPath := $(LibraryPathSwitch). 47 | 48 | ## 49 | ## Common variables 50 | ## AR, CXX, CC, AS, CXXFLAGS and CFLAGS can be overriden using an environment variables 51 | ## 52 | AR := C:/TDM-GCC-64/bin/ar.exe rcu 53 | CXX := C:/TDM-GCC-64/bin/g++.exe 54 | CC := C:/TDM-GCC-64/bin/gcc.exe 55 | CXXFLAGS := -O3 -fexpensive-optimizations -std=c++11 -Wall $(Preprocessors) 56 | CFLAGS := -O2 -Wall $(Preprocessors) 57 | ASFLAGS := 58 | AS := C:/TDM-GCC-64/bin/as.exe 59 | 60 | 61 | ## 62 | ## User defined environment variables 63 | ## 64 | CodeLiteDir:=C:\Program Files\CodeLite 65 | Objects0=$(IntermediateDirectory)/tests_main.cpp$(ObjectSuffix) $(IntermediateDirectory)/time_hash_maps.cpp$(ObjectSuffix) 66 | 67 | 68 | 69 | Objects=$(Objects0) 70 | 71 | ## 72 | ## Main Build Targets 73 | ## 74 | .PHONY: all clean PreBuild PrePreBuild PostBuild 75 | all: $(OutputFile) 76 | 77 | $(OutputFile): $(IntermediateDirectory)/.d $(Objects) 78 | @$(MakeDirCommand) $(@D) 79 | @echo "" > $(IntermediateDirectory)/.d 80 | @echo $(Objects0) > $(ObjectsFileList) 81 | $(LinkerName) $(OutputSwitch)$(OutputFile) @$(ObjectsFileList) $(LibPath) $(Libs) $(LinkOptions) 82 | 83 | $(IntermediateDirectory)/.d: 84 | @$(MakeDirCommand) "./Release" 85 | 86 | PreBuild: 87 | 88 | 89 | ## 90 | ## Objects 91 | ## 92 | $(IntermediateDirectory)/tests_main.cpp$(ObjectSuffix): tests/main.cpp $(IntermediateDirectory)/tests_main.cpp$(DependSuffix) 93 | $(CXX) $(IncludePCH) $(SourceSwitch) "C:/dev/cpp_all/rabbit/tests/main.cpp" $(CXXFLAGS) $(ObjectSwitch)$(IntermediateDirectory)/tests_main.cpp$(ObjectSuffix) $(IncludePath) 94 | $(IntermediateDirectory)/tests_main.cpp$(DependSuffix): tests/main.cpp 95 | @$(CXX) $(CXXFLAGS) $(IncludePCH) $(IncludePath) -MG -MP -MT$(IntermediateDirectory)/tests_main.cpp$(ObjectSuffix) -MF$(IntermediateDirectory)/tests_main.cpp$(DependSuffix) -MM "tests/main.cpp" 96 | 97 | $(IntermediateDirectory)/tests_main.cpp$(PreprocessSuffix): tests/main.cpp 98 | @$(CXX) $(CXXFLAGS) $(IncludePCH) $(IncludePath) $(PreprocessOnlySwitch) $(OutputSwitch) $(IntermediateDirectory)/tests_main.cpp$(PreprocessSuffix) "tests/main.cpp" 99 | 100 | $(IntermediateDirectory)/time_hash_maps.cpp$(ObjectSuffix): time_hash_maps.cpp $(IntermediateDirectory)/time_hash_maps.cpp$(DependSuffix) 101 | $(CXX) $(IncludePCH) $(SourceSwitch) "C:/dev/cpp_all/rabbit/time_hash_maps.cpp" $(CXXFLAGS) $(ObjectSwitch)$(IntermediateDirectory)/time_hash_maps.cpp$(ObjectSuffix) $(IncludePath) 102 | $(IntermediateDirectory)/time_hash_maps.cpp$(DependSuffix): time_hash_maps.cpp 103 | @$(CXX) $(CXXFLAGS) $(IncludePCH) $(IncludePath) -MG -MP -MT$(IntermediateDirectory)/time_hash_maps.cpp$(ObjectSuffix) -MF$(IntermediateDirectory)/time_hash_maps.cpp$(DependSuffix) -MM "time_hash_maps.cpp" 104 | 105 | $(IntermediateDirectory)/time_hash_maps.cpp$(PreprocessSuffix): time_hash_maps.cpp 106 | @$(CXX) $(CXXFLAGS) $(IncludePCH) $(IncludePath) $(PreprocessOnlySwitch) $(OutputSwitch) $(IntermediateDirectory)/time_hash_maps.cpp$(PreprocessSuffix) "time_hash_maps.cpp" 107 | 108 | 109 | -include $(IntermediateDirectory)/*$(DependSuffix) 110 | ## 111 | ## Clean 112 | ## 113 | clean: 114 | $(RM) -r ./Release/ 115 | 116 | 117 | -------------------------------------------------------------------------------- /rabbit.txt: -------------------------------------------------------------------------------- 1 | ./Release/tests_main.cpp.o ./Release/time_hash_maps.cpp.o 2 | -------------------------------------------------------------------------------- /rabbit/int_string.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | namespace rabbit { 8 | 9 | class int_string { 10 | public: 11 | typedef unsigned char size_type; 12 | typedef unsigned long long string_int; 13 | private: 14 | static const size_t D = 3; 15 | size_type size; 16 | size_t hash_val; 17 | string_int ints[D]; 18 | const string_int * end() const { 19 | return (&ints[D]); 20 | } 21 | const string_int * begin() const { 22 | return (&ints[0]); 23 | } 24 | void copy_chars(string_int* dest, const char * src, size_type len) { 25 | size_type lm = std::min(len, sizeof(string_int)); 26 | char * data = (char *)(dest); 27 | *dest = string_int(); 28 | for (size_type i = size_type(); i < lm; ++i) { 29 | data[i] = src[i]; 30 | } 31 | } 32 | 33 | void copy_string(const char * source, size_t l) { 34 | size_type isize = sizeof(string_int); 35 | size = (size_type)std::min(D*isize-1, l); 36 | size_type remaining = size; 37 | string_int * d = ints; 38 | const char * s = source; 39 | while (d < end()) { 40 | size_type todo = std::min(remaining, isize); 41 | copy_chars(d, s, todo); 42 | s += todo; 43 | ++d; 44 | remaining -= todo; 45 | } 46 | hash_val = fnv_1a_bytes(); 47 | } 48 | 49 | void copy_string(const std::string& s) { 50 | copy_string(s.c_str(), s.size()); 51 | } 52 | 53 | public: 54 | int_string() 55 | : size(size_type()) 56 | , hash_val(size_t()){ 57 | for (int i = 0; i < D; ++i) 58 | ints[i] = string_int(); 59 | } 60 | int_string(const std::string& s) { 61 | copy_string(s); 62 | } 63 | int_string(const char * s) { 64 | copy_string(s, std::strlen(s)); 65 | } 66 | int_string(const int_string& s) { 67 | *this = s; 68 | 69 | } 70 | int_string& operator=(const std::string& s) { 71 | copy_string(s); 72 | assert(size < sizeof(string_int) * D); 73 | return *this; 74 | } 75 | int_string& operator=(const int_string& s) { 76 | string_int * d = ints; 77 | const string_int * rd = s.ints; 78 | while (d < end()) { 79 | *d = *rd; 80 | ++d; 81 | ++rd; 82 | } 83 | size = s.size; 84 | hash_val = s.hash_val; 85 | return *this; 86 | } 87 | bool operator==(const int_string& s) const { 88 | if (size != s.size) return false; 89 | const string_int * d = ints; 90 | const string_int * rd = s.ints; 91 | while (d < end()) { 92 | if (*d != *rd) 93 | return false; 94 | ++d; 95 | ++rd; 96 | } 97 | return true; 98 | } 99 | bool operator!=(const int_string& s) const { 100 | return !(*this == s); 101 | } 102 | bool operator<(const int_string& s) const { 103 | const string_int * d = ints; 104 | const string_int * rd = s.ints; 105 | while (d < end()) { 106 | if (*d != *rd) 107 | return *d < *rd; 108 | ++d; 109 | ++rd; 110 | } 111 | return size < s.size; 112 | } 113 | const char * c_str() const { 114 | return (const char *)&ints[0]; 115 | } 116 | std::string to_string() const { 117 | return std::string(this->c_str(), size); 118 | } 119 | 120 | size_t fnv_1a_bytes(const unsigned char *bytes, size_t count) const { 121 | const unsigned long long FNV64prime = 0x00000100000001B3ull; 122 | const unsigned long long FNV64basis = 0xCBF29CE484222325ull; 123 | size_t r = FNV64basis; 124 | for (size_t a = 0; a < count; ++a){ 125 | r ^= (size_t)bytes[a]; // folding of one byte at a time 126 | r *= FNV64prime; 127 | } 128 | return r; 129 | } 130 | 131 | size_t fnv_1a_bytes()const{ 132 | size_t r = size; 133 | const char * s = c_str(); 134 | return fnv_1a_bytes((const unsigned char *)s,r); 135 | } 136 | 137 | size_t hash() const { 138 | if(hash_val) return hash_val; 139 | //return fnv_1a(); 140 | return fnv_1a_bytes(); 141 | } 142 | size_t bad_hash() const { 143 | size_t r = 31; 144 | const string_int * i = begin(); 145 | while (i < end()) { 146 | r += 31*(*i); 147 | ++i; 148 | } 149 | return r; 150 | } 151 | }; 152 | template<> 153 | struct rabbit_hash { 154 | size_t operator()(const int_string& k) const { 155 | return k.hash(); 156 | }; 157 | }; 158 | }; 159 | namespace std { 160 | template<> 161 | struct hash { 162 | size_t operator()(const rabbit::int_string& k) const { 163 | return k.hash(); 164 | }; 165 | }; 166 | } 167 | -------------------------------------------------------------------------------- /rabbit/rabbit_map.h: -------------------------------------------------------------------------------- 1 | #ifndef _RABBIT_H_CEP_20150303_ 2 | #define _RABBIT_H_CEP_20150303_ 3 | /** 4 | The MIT License (MIT) 5 | Copyright (c) 2015,2016,2017 Christiaan Pretorius 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | **/ 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | /// the rab-bit hash 35 | /// probably the worlds simplest working hashtable - only kiddingk 36 | /// it uses linear probing for the first level of fallback and then a overflow area or secondary hash 37 | 38 | #ifdef _MSC_VER 39 | #define RABBIT_NOINLINE_PRE _declspec(noinline) 40 | #define RABBIT_NOINLINE_ 41 | #else 42 | #define RABBIT_NOINLINE_PRE 43 | #define RABBIT_NOINLINE_ __attribute__((noinline)) 44 | #endif 45 | namespace rabbit{ 46 | 47 | template 48 | struct _BinMapper{ 49 | typedef typename _Config::size_type size_type; 50 | typedef _Config config_type; 51 | size_type extent; 52 | size_type extent1; 53 | size_type extent2; 54 | size_type primary_bits; 55 | size_type random_val; 56 | unsigned long long gate_bits; 57 | _Config config; 58 | _BinMapper(){ 59 | } 60 | _BinMapper(size_type new_extent,const _Config& config){ 61 | this->config = config; 62 | this->extent = ((size_type)1) << this->config.log2(new_extent); 63 | this->extent1 = this->extent-1; 64 | this->extent2 = this->config.log2(new_extent); 65 | this->primary_bits = extent2; 66 | //std::minstd_rand rd; 67 | //std::mt19937 gen(rd()); 68 | //std::uniform_int_distribution dis(1ll<<4, std::numeric_limits::max()); 69 | this->random_val = 0; //(size_type)dis(gen); 70 | if(new_extent < (1ll<<32ll)){ 71 | this->gate_bits = (1ll<<32ll) - 1ll; 72 | }else{ 73 | this->gate_bits = (1ll<<62ll) - 1ll; 74 | } 75 | } 76 | inline size_type nearest_larger(size_type any){ 77 | size_type l2 = this->config.log2(any); 78 | return (size_type)(2ll << l2); 79 | } 80 | 81 | // FNV-1a hash function for bytes 82 | // https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function 83 | // https://tools.ietf.org/html/draft-eastlake-fnv-13#section-6 84 | // used to attempt to fix bad hashes from code 85 | size_type fnv_1a_bytes(const unsigned char *bytes, size_type count) const { 86 | const unsigned long long FNV64prime = 0x00000100000001B3ull; 87 | const unsigned long long FNV64basis = 0xCBF29CE484222325ull; 88 | size_t r = FNV64basis; 89 | for (size_t a = 0; a < count; ++a){ 90 | r ^= (size_t)bytes[a]; // folding of one byte at a time 91 | r *= FNV64prime; 92 | } 93 | return r; 94 | } 95 | size_type fnv_1a_(size_type other) const { 96 | return fnv_1a_bytes((const unsigned char *)&other,sizeof(other)); 97 | } 98 | size_type randomize(size_type other) const { 99 | size_type rand_other = other;// fnv_1a_(); 100 | size_type r = rand_other >> this->primary_bits; 101 | return rand_other + ((r*r) >> 2); // fnv_1a_(other + ((r*r) >> 2)); //(other ^ random_val) & this->extent1; 102 | } 103 | size_type operator()(size_type h_n) const { 104 | return h_n & this->extent1 ; 105 | } 106 | double resize_factor() const { 107 | return 2; 108 | } 109 | double recalc_growth_factor(size_type elements) { 110 | return 2; 111 | } 112 | 113 | inline size_type next_size(){ 114 | 115 | double r = recalc_growth_factor(this->extent) * this->extent; 116 | assert(r > (double)extent); 117 | return (size_type)r; 118 | } 119 | }; 120 | template 121 | struct rabbit_hash{ 122 | size_t operator()(const _Ht& k) const{ 123 | return (size_t) std::hash<_Ht>()(k); /// 124 | }; 125 | }; 126 | template<> 127 | struct rabbit_hash{ 128 | unsigned long operator()(const long& k) const{ 129 | return (unsigned long)k; 130 | }; 131 | }; 132 | template<> 133 | struct rabbit_hash{ 134 | inline unsigned long operator()(const unsigned long& k) const{ 135 | return k; 136 | }; 137 | }; 138 | template<> 139 | struct rabbit_hash{ 140 | inline unsigned int operator()(const unsigned int& k) const{ 141 | return k; 142 | }; 143 | }; 144 | template<> 145 | struct rabbit_hash{ 146 | inline unsigned int operator()(const int& k) const{ 147 | return k; 148 | }; 149 | }; 150 | template<> 151 | struct rabbit_hash{ 152 | inline unsigned long long operator()(const unsigned long long& k) const{ 153 | return k; 154 | }; 155 | }; 156 | template<> 157 | struct rabbit_hash{ 158 | inline unsigned long long operator()(const long long& k) const{ 159 | return (unsigned long)k; 160 | }; 161 | }; 162 | 163 | template 164 | class basic_config{ 165 | public: 166 | typedef unsigned long long int _Bt; /// exists ebucket type - not using vector - interface does not support bit bucketing 167 | /// if even more speed is desired but you'r willing to live with a 4 billion key limit then 168 | //typedef unsigned long size_type; 169 | typedef std::size_t size_type; 170 | 171 | size_type log2(size_type n){ 172 | size_type r = 0; 173 | while (n >>= 1) 174 | { 175 | r++; 176 | } 177 | return r; 178 | } 179 | _Bt BUCKET_COUNT ; 180 | _Bt CHAR_BITS ; 181 | _Bt BITS_SIZE ; 182 | _Bt BITS_SIZE1 ; 183 | _Bt ALL_BITS_SET ; 184 | _Bt LOGARITHMIC ; 185 | /// maximum probes per access 186 | size_type MIN_PROBES; /// the minimum starting value of probes which is increased if the bucket is used at a load factor < min load factor 187 | size_type DEFAULT_PROBES; /// if the min load factor is set to a unusable value 188 | size_type PROBE_INCR; 189 | size_type SAFETY_PROBES_FACTOR; /// probes when bad hashes or attacks are detected 190 | size_type BITS_LOG2_SIZE; 191 | /// this distributes the h values which are powers of 2 a little to avoid primary clustering when there is no 192 | /// hash randomizer available 193 | size_type MIN_OVERFLOW; 194 | size_type MIN_EXTENT; 195 | size_type MAX_OVERFLOW_FACTOR ; 196 | size_type SAFETY_OVERFLOW_FACTOR; /// overflow factor when bad hashes or attacks are detected 197 | float DEFAULT_MIN_LOAD_FACTOR; 198 | basic_config(const basic_config& right){ 199 | *this = right; 200 | } 201 | 202 | basic_config& operator=(const basic_config& right){ 203 | BUCKET_COUNT = right.BUCKET_COUNT; 204 | CHAR_BITS = right.CHAR_BITS; 205 | BITS_SIZE = right.BITS_SIZE; 206 | BITS_SIZE1 = right.BITS_SIZE1; 207 | BITS_LOG2_SIZE = right.BITS_LOG2_SIZE; 208 | ALL_BITS_SET = right.ALL_BITS_SET; 209 | MIN_PROBES = right.MIN_PROBES; 210 | DEFAULT_PROBES = right.DEFAULT_PROBES; 211 | PROBE_INCR = right.PROBE_INCR; 212 | SAFETY_PROBES_FACTOR = right.SAFETY_PROBES_FACTOR; 213 | MIN_EXTENT = right.MIN_EXTENT; 214 | MIN_OVERFLOW = right.MIN_OVERFLOW; 215 | MAX_OVERFLOW_FACTOR = right.MAX_OVERFLOW_FACTOR; 216 | SAFETY_OVERFLOW_FACTOR = right.SAFETY_OVERFLOW_FACTOR; 217 | LOGARITHMIC = right.LOGARITHMIC; 218 | DEFAULT_MIN_LOAD_FACTOR = right.DEFAULT_MIN_LOAD_FACTOR; 219 | return *this; 220 | } 221 | 222 | basic_config(){ 223 | BUCKET_COUNT = 1; 224 | CHAR_BITS = 8; 225 | BITS_SIZE = (sizeof(_Bt) * CHAR_BITS); 226 | BITS_SIZE1 = BITS_SIZE-1; 227 | BITS_LOG2_SIZE = (size_type) log2((size_type)BITS_SIZE); 228 | ALL_BITS_SET = ~(_Bt)0; 229 | MIN_PROBES = 1; 230 | PROBE_INCR = 1; 231 | DEFAULT_PROBES = 16; 232 | SAFETY_PROBES_FACTOR = 32; 233 | MIN_EXTENT = 4; /// start size of the hash table 234 | MIN_OVERFLOW = 8; 235 | MAX_OVERFLOW_FACTOR = 1<<16; //BITS_SIZE*8/sizeof(_Bt); 236 | SAFETY_OVERFLOW_FACTOR = 500; 237 | LOGARITHMIC = logarithmic; 238 | DEFAULT_MIN_LOAD_FACTOR = 0.25; 239 | } 240 | }; 241 | template 242 | struct basic_traits{ 243 | typedef typename _InMapper::config_type rabbit_config; 244 | typedef typename rabbit_config::_Bt _Bt; 245 | typedef typename rabbit_config::size_type size_type; 246 | typedef ptrdiff_t difference_type; 247 | typedef _InMapper _Mapper; 248 | }; 249 | typedef basic_traits<_BinMapper > > default_traits; 250 | typedef basic_traits<_BinMapper > > sparse_traits; 251 | 252 | 253 | template 254 | < class _K 255 | , class _V 256 | , class _H = rabbit_hash<_K> 257 | , class _E = std::equal_to<_K> 258 | , class _Allocator = std::allocator<_K> 259 | , class _Traits = default_traits 260 | > 261 | class basic_unordered_map { 262 | public: 263 | typedef _K key_type; 264 | 265 | typedef _V mapped_type; 266 | 267 | typedef std::pair<_K,_V> _ElPair; 268 | typedef std::pair _ConstElPair; 269 | typedef _ElPair value_type; 270 | typedef _ConstElPair const_value_type; 271 | typedef typename _Traits::_Bt _Bt; /// exists ebucket type - not using vector - interface does not support bit bucketing 272 | typedef typename _Traits::size_type size_type; 273 | typedef typename _Traits::rabbit_config rabbit_config; 274 | typedef typename _Traits::_Mapper _Mapper; 275 | typedef typename _Traits::difference_type difference_type; 276 | 277 | typedef _Allocator allocator_type; 278 | typedef _ElPair* pointer; 279 | typedef const _ElPair* const_pointer; 280 | typedef _ElPair& reference; 281 | typedef const _ElPair& const_reference; 282 | // typedef typename _Base::reverse_iterator reverse_iterator; 283 | // typedef typename _Base::const_reverse_iterator 284 | // const_reverse_iterator; 285 | 286 | typedef _E key_equal; 287 | typedef _E key_compare; 288 | typedef _H hasher; 289 | 290 | protected: 291 | struct overflow_stats{ 292 | size_type start_elements; 293 | size_type end_elements; 294 | overflow_stats() : start_elements(0),end_elements(0){} 295 | }; 296 | 297 | struct _KeySegment{ 298 | 299 | public: 300 | _Bt overflows; 301 | _Bt exists; 302 | private: 303 | void set_bit(_Bt& w, _Bt index, bool f){ 304 | 305 | #ifdef _MSC_VER 306 | #pragma warning(disable:4804) 307 | #endif 308 | _Bt m = (_Bt)1ul << index;// the bit mask 309 | w ^= (-f ^ w) & m; 310 | ///w = (w & ~m) | (-f & m); 311 | } 312 | 313 | public: 314 | //_ElPair keys[sizeof(_Bt) * 8]; 315 | 316 | inline bool all_exists() const { 317 | return (exists == ~(_Bt)0); 318 | } 319 | 320 | inline bool none_exists() const { 321 | return (exists == (_Bt)0); 322 | } 323 | 324 | inline bool is_exists(_Bt bit) const { 325 | return ((exists >> bit) & (_Bt)1ul); 326 | } 327 | 328 | inline bool is_overflows(_Bt bit) const { 329 | return ((overflows >> bit) & (_Bt)1ul); 330 | } 331 | 332 | inline void set_exists(_Bt index, bool f){ 333 | set_bit(exists,index,f); 334 | } 335 | 336 | inline void toggle_exists(_Bt index){ 337 | exists ^= ((_Bt)1 << index); 338 | } 339 | 340 | void set_overflows(_Bt index, bool f){ 341 | set_bit(overflows,index,f); 342 | } 343 | 344 | void clear(){ 345 | exists = 0; 346 | overflows = 0; 347 | } 348 | _KeySegment(){ 349 | exists = 0; 350 | overflows = 0; 351 | } 352 | }; 353 | public: 354 | typedef _KeySegment _Segment; 355 | /// the vector that will contain the segmented mapping pairs and flags 356 | typedef std::vector<_Segment, _Allocator> _Segments; 357 | typedef std::vector<_ElPair, _Allocator> _Keys; 358 | 359 | struct hash_kernel{ 360 | /// settings configuration 361 | rabbit_config config; 362 | size_type elements; 363 | size_type initial_probes; 364 | size_type probes; 365 | size_type rand_probes; /// used when there might be an attack 366 | size_type last_modified; 367 | /// the existence bit set is a factor of BITS_SIZE+1 less than the extent 368 | _Segment* clusters;///a.k.a. pages 369 | _ElPair* keys; 370 | 371 | size_type overflow; 372 | size_type overflow_elements; 373 | overflow_stats stats; 374 | _Mapper key_mapper; 375 | _H hf; 376 | _E eq_f; 377 | float mf; 378 | float min_lf; 379 | size_type buckets; 380 | size_type removed; 381 | _Allocator allocator; 382 | _K empty_key; 383 | size_type logarithmic; 384 | size_type collisions; 385 | typename _Allocator::template rebind<_Segment>::other get_segment_allocator() { 386 | return typename _Allocator::template rebind<_Segment>::other(allocator) ; 387 | } 388 | typename _Allocator::template rebind<_ElPair>::other get_el_allocator(){ 389 | return typename _Allocator::template rebind<_ElPair>::other(allocator) ; 390 | } 391 | typename _Allocator::template rebind<_V>::other get_value_allocator(){ 392 | return typename _Allocator::template rebind<_V>::other(allocator) ; 393 | } 394 | size_type capacity() const { 395 | return get_data_size(); 396 | } 397 | 398 | /// the minimum load factor 399 | float load_factor() const{ 400 | if(!elements) return 0; 401 | return (float)((double)elements/(double)bucket_count()); 402 | } 403 | float collision_factor() const{ 404 | return (float)((double)collisions/(double)bucket_count()); 405 | } 406 | 407 | /// there are a variable ammount of buckets there are at most this much 408 | /// 409 | size_type bucket_count() const { 410 | if(!elements) return 0; 411 | return get_data_size(); 412 | } 413 | /// the size of a bucket can be calculated based on the 414 | /// hash value of its first occupant 415 | /// mainly to satisfy stl conventions 416 | size_type bucket_size ( size_type n ) const{ 417 | size_type pos = n; 418 | if (!overflows_(pos)) { 419 | if (exists_(pos) && map_key(get_key(pos)) == n) 420 | return 1; 421 | else return 0; 422 | } 423 | size_type m = pos + probes; 424 | size_type r = 0; 425 | for(; pos < m;++pos){ 426 | if(!exists_(pos)){ 427 | }else if(map_key(get_key(pos)) == n){ 428 | ++r; 429 | } 430 | } 431 | size_type e = end(); 432 | for(pos=get_o_start(); pos < e; ){ 433 | if(!exists_(pos)){ 434 | }else if(map_key(get_key(pos)) == n){ 435 | ++r; 436 | } 437 | ++pos; 438 | } 439 | return r; 440 | } 441 | 442 | float max_load_factor() const { 443 | return mf; 444 | } 445 | 446 | void max_load_factor ( float z ){ 447 | mf = z; 448 | } 449 | 450 | void min_load_factor( float z ){ 451 | this->min_lf = z; 452 | } 453 | float min_load_factor() const { 454 | return this->min_lf; 455 | } 456 | /// total data size, never less than than size() 457 | size_type get_data_size() const { 458 | return get_extent()+initial_probes+overflow; 459 | } 460 | 461 | /// the overflow start 462 | size_type get_o_start() const { 463 | return get_extent()+initial_probes; 464 | } 465 | 466 | size_type get_segment_number(size_type pos) const { 467 | return (pos >> config.BITS_LOG2_SIZE); 468 | } 469 | 470 | _Bt get_segment_index(size_type pos) const { 471 | return (_Bt)(pos & (config.BITS_SIZE1)); 472 | } 473 | 474 | _Segment &get_segment(size_type pos) { 475 | return clusters[pos >> config.BITS_LOG2_SIZE]; 476 | } 477 | 478 | const _Segment &get_segment(size_type pos) const { 479 | return clusters[get_segment_number(pos)]; 480 | } 481 | 482 | inline const _ElPair &get_pair(size_type pos) const { 483 | return keys[pos]; 484 | //return get_segment(pos).keys[get_segment_index(pos)]; 485 | } 486 | 487 | inline _ElPair& get_pair(size_type pos) { 488 | //return get_segment(pos).keys[get_segment_index(pos)]; 489 | return keys[pos]; 490 | } 491 | 492 | const _K & get_key(size_type pos) const { 493 | return get_pair(pos).first; 494 | } 495 | 496 | const _V & get_value(size_type pos) const { 497 | return get_pair(pos).second; 498 | } 499 | 500 | 501 | _K & get_key(size_type pos) { 502 | return get_pair(pos).first; 503 | } 504 | 505 | _V & get_value(size_type pos) { 506 | return get_pair(pos).second; 507 | } 508 | 509 | void set_segment_key(size_type pos, const _K &k) { 510 | get_pair(pos).first = k; 511 | } 512 | 513 | void destroy_segment_value(size_type pos){ 514 | get_pair(pos).second = _V(); 515 | } 516 | 517 | _V* create_segment_value(size_type pos) { 518 | _V* r = &(get_pair(pos).second); 519 | return r; 520 | } 521 | 522 | void set_segment_value(size_type pos, const _V &v) { 523 | get_pair(pos).second = v; 524 | } 525 | 526 | void set_exists(size_type pos, bool f){ 527 | last_modified = pos; 528 | get_segment(pos).set_exists(get_segment_index(pos),f); 529 | } 530 | 531 | void set_overflows(size_type pos, bool f){ 532 | get_segment(pos).set_overflows(get_segment_index(pos),f); 533 | } 534 | 535 | inline bool exists_(size_type pos) const { 536 | return get_segment(pos).is_exists(get_segment_index(pos)); 537 | } 538 | 539 | inline bool overflows_(size_type pos) const { 540 | return get_segment(pos).is_overflows(get_segment_index(pos)); 541 | } 542 | 543 | inline size_type hash_key(const _K& k) const { 544 | return (size_type)_H()(k); 545 | } 546 | 547 | inline size_type map_key(const _K& k) const { 548 | return map_hash(hash_key(k)); 549 | } 550 | 551 | inline size_type map_hash(size_type h) const { 552 | if (this->rand_probes) 553 | return key_mapper(randomize(h)); 554 | return key_mapper(h); 555 | } 556 | 557 | inline size_type map_rand_key(const _K& k) const { 558 | size_type h = (size_type)_H()(k); 559 | return map_hash(h); 560 | } 561 | 562 | inline size_type map_rand_key(const _K& k, size_type origin) const { 563 | return origin; 564 | } 565 | 566 | size_type get_e_size() const { 567 | return (size_type) (get_data_size()/config.BITS_SIZE)+1; 568 | } 569 | 570 | void free_data(){ 571 | 572 | if(clusters) { 573 | size_type esize = get_e_size(); 574 | for(size_type e = 0; e < get_data_size(); ++e){ 575 | get_el_allocator().destroy(&keys[e]); 576 | } 577 | get_el_allocator().deallocate(keys,get_data_size()); 578 | for(size_type c = 0; c < esize; ++c){ 579 | get_segment_allocator().destroy(&clusters[c]); 580 | } 581 | get_segment_allocator().deallocate(clusters,get_e_size()); 582 | } 583 | 584 | clusters = nullptr; 585 | } 586 | 587 | double get_resize_factor() const { 588 | return key_mapper.resize_factor(); 589 | } 590 | 591 | size_type get_probes() const { 592 | return this->probes; 593 | } 594 | size_type get_rand_probes() const { 595 | return this->rand_probes; 596 | } 597 | void set_rand_probes(){ 598 | this->rand_probes = this->probes; 599 | } 600 | 601 | void set_rand_probes(size_type rand_probes){ 602 | this->rand_probes = rand_probes; 603 | } 604 | 605 | /// clears all data and resize the new data vector to the parameter 606 | void resize_clear(size_type new_extent){ 607 | /// inverse of factor used to determine overflow list 608 | /// when overflow list is full rehash starts 609 | free_data(); 610 | 611 | key_mapper = _Mapper(new_extent,config); 612 | 613 | mf = 1.0; 614 | assert(config.MAX_OVERFLOW_FACTOR > 0); 615 | if(is_logarithmic()){ 616 | probes = config.log2(new_extent)*logarithmic; 617 | overflow = config.log2(new_extent)*logarithmic; 618 | }else{ 619 | if(min_load_factor() < 0.01){ 620 | probes = config.DEFAULT_PROBES; 621 | }else{ 622 | probes = config.MIN_PROBES; 623 | } 624 | //std::cout << "rehash " << std::endl; 625 | 626 | overflow = std::max(config.MIN_OVERFLOW, new_extent / (config.MAX_OVERFLOW_FACTOR)); 627 | 628 | } 629 | if(rand_probes ){ 630 | //std::cout << "setting safety values " << std::endl; 631 | //overflow = std::max(new_extent / config.SAFETY_OVERFLOW_FACTOR,overflow); 632 | //probes *= config.SAFETY_PROBES_FACTOR; 633 | } 634 | initial_probes = probes; 635 | //std::cout << "rehash with overflow:" << overflow << std::endl; 636 | elements = 0; 637 | removed = 0; 638 | collisions = 0; 639 | empty_key = _K(); 640 | overflow_elements = get_o_start(); 641 | size_type esize = get_e_size(); 642 | keys = get_el_allocator().allocate(get_data_size()); 643 | clusters = get_segment_allocator().allocate(esize); 644 | _KeySegment ks; 645 | _ElPair element; 646 | for(size_type e = 0; e < get_data_size(); ++e){ 647 | get_el_allocator().construct(&keys[e],element); 648 | } 649 | for(size_type c = 0; c < esize; ++c){ 650 | get_segment_allocator().construct(&clusters[c],ks); 651 | } 652 | set_exists(get_data_size(),true); 653 | buckets = 0; 654 | 655 | }; 656 | 657 | void clear(){ 658 | size_type esize = get_e_size(); 659 | for(size_type c = 0; c < esize; ++c){ 660 | clusters[c].clear(); 661 | } 662 | _ElPair element; 663 | for(size_type e = 0; e < get_data_size(); ++e){ 664 | keys[e] = element; 665 | } 666 | set_exists(get_data_size(),true); 667 | collisions = 0; 668 | elements = 0; 669 | removed = 0; 670 | rand_probes = 0; 671 | } 672 | 673 | hash_kernel(const key_compare& compare,const allocator_type& allocator) 674 | : clusters(nullptr), eq_f(compare), mf(1.0f), min_lf(config.DEFAULT_MIN_LOAD_FACTOR), allocator(allocator),logarithmic(config.LOGARITHMIC) 675 | { 676 | resize_clear(config.MIN_EXTENT); 677 | } 678 | 679 | hash_kernel() : clusters(nullptr), mf(1.0f), min_lf(config.DEFAULT_MIN_LOAD_FACTOR),logarithmic(config.LOGARITHMIC) 680 | { 681 | resize_clear(config.MIN_EXTENT); 682 | } 683 | 684 | hash_kernel(const hash_kernel& right) : clusters(nullptr), mf(1.0f), min_lf(config.DEFAULT_MIN_LOAD_FACTOR),logarithmic(config.LOGARITHMIC) 685 | { 686 | *this = right; 687 | } 688 | 689 | ~hash_kernel(){ 690 | free_data(); 691 | } 692 | inline size_type get_extent() const { 693 | return key_mapper.extent; 694 | } 695 | void set_logarithmic(size_type loga){ 696 | logarithmic = loga; 697 | } 698 | size_type get_logarithmic() const { 699 | return this->logarithmic; 700 | } 701 | bool is_logarithmic() const { 702 | return this->logarithmic > 0; 703 | } 704 | hash_kernel& operator=(const hash_kernel& right){ 705 | config = right.config; 706 | key_mapper = right.key_mapper; 707 | free_data(); 708 | buckets = right.buckets; 709 | removed = right.removed; 710 | mf = right.mf; 711 | min_lf = right.min_lf; 712 | elements = right.elements; 713 | collisions = right.collisions; 714 | size_type esize = get_e_size(); 715 | clusters = get_segment_allocator().allocate(esize); 716 | keys = right.keys; 717 | std::copy(clusters, right.clusters, right.clusters+esize); 718 | return *this; 719 | } 720 | inline bool raw_equal_key(size_type pos,const _K& k) const { 721 | const _K& l = get_key(pos); ///.key(get_segment_index(pos)); 722 | return eq_f(l, k) ; 723 | } 724 | inline bool segment_equal_key_exists(size_type pos,const _K& k) const { 725 | _Bt index = get_segment_index(pos); 726 | const _Segment& s = get_segment(pos); 727 | return eq_f(get_key(pos), k) && s.is_exists(index) ; 728 | 729 | } 730 | 731 | inline bool equal_key(size_type pos,const _K& k) const { 732 | const _K& l = get_key(pos); 733 | return eq_f(l, k) ; 734 | } 735 | 736 | inline size_type randomize(size_type v) const { 737 | return key_mapper.randomize(v); 738 | } 739 | 740 | inline size_type hash_probe_incr(size_type base, unsigned int i) const { 741 | //if(sizeof(_K) > sizeof(unsigned long long)){ 742 | //if (this->rand_probes){ 743 | //return base + i*i + 1; 744 | //}else{ 745 | return base + i + 1; 746 | //} 747 | } 748 | size_type find_in_bucket(const _K& k, size_type origin) const { 749 | size_type locate = get_o_start(); 750 | size_type pos = locate; 751 | for(;pos < end();++pos){ 752 | if(segment_equal_key_exists(pos,k)){ 753 | return pos; 754 | } 755 | } 756 | 757 | return end(); 758 | } 759 | _V* subscript_bucket(const _K& k, size_type origin){ 760 | size_type locate = overflow_elements; 761 | size_type pos = locate; 762 | for(;pos < end();++pos){ 763 | if(!exists_(pos)){ 764 | break; 765 | } 766 | } 767 | 768 | if(pos != end()){ 769 | overflow_elements++; 770 | if (!this->is_logarithmic()) { 771 | if (this->load_factor() < min_load_factor()) { 772 | if (this->rand_probes) { 773 | this->probes <<= config.PROBE_INCR; 774 | } 775 | else{ 776 | this->probes += config.PROBE_INCR; 777 | } 778 | //std::cout << "increased probes to " << this->probes << " o f bucket " << this->overflow << " min lf " << min_load_factor() << " actual lf " << this->load_factor() << std::endl; 779 | } 780 | } 781 | set_overflows(origin, true); 782 | set_exists(pos, true); 783 | set_segment_key(pos, k); 784 | size_type os = 0; // (overflow_elements - (get_extent() + initial_probes)); 785 | if(os == 1){ 786 | stats.start_elements = elements; 787 | //std::cout << "overflow start: hash table size " << elements << " elements in over flow:" << os << std::endl; 788 | } 789 | 790 | if(overflow_elements == end() && stats.start_elements){ 791 | stats.end_elements = elements; 792 | size_type saved = stats.end_elements - stats.start_elements - os; 793 | double percent_saved = (100.0*((double)saved/(double)elements)); 794 | 795 | // std::cout << "overflow end: hash table size " << elements << " elements in over flow:" << os << " saved : " << saved << 796 | // std::endl << " percent saved " << std::setprecision(4) << percent_saved << 797 | // std::endl; 798 | } 799 | 800 | ++elements; 801 | return create_segment_value(pos); 802 | 803 | } 804 | return nullptr; 805 | } 806 | _V* subscript_rest(const _K& k, size_type origin) 807 | RABBIT_NOINLINE_ { 808 | size_type pos = map_rand_key(k); 809 | size_type base = pos; 810 | for(unsigned int i =0; i < probes && pos < get_extent();++i){ 811 | _Bt si = get_segment_index(pos); 812 | _Segment& s = get_segment(pos); 813 | if(!s.is_exists(si)){ 814 | s.toggle_exists(si); 815 | set_segment_key(pos,k); 816 | 817 | ++collisions; 818 | ++elements; 819 | set_overflows(origin, true); 820 | return create_segment_value(pos); 821 | } 822 | pos = hash_probe_incr(base,i); 823 | } 824 | 825 | 826 | return subscript_bucket(k,origin); 827 | } 828 | _V* subscript(const _K& k){ 829 | size_type pos = map_key(k); 830 | _Bt si = get_segment_index(pos); 831 | _Segment& s = get_segment(pos); 832 | 833 | bool key_exists = s.is_exists(si); 834 | bool key_overflows = s.is_overflows(si); 835 | if(!key_exists && !key_overflows){ 836 | s.toggle_exists(si); 837 | set_segment_key(pos,k); 838 | ++elements; 839 | return create_segment_value(pos); 840 | }else if(key_exists && equal_key(pos,k)){ 841 | return &(get_value(pos)); 842 | } 843 | size_type h = pos; 844 | if(key_overflows){ 845 | pos = find_rest(k,h); 846 | if(pos != end()){ 847 | return &(get_value(pos)); 848 | } 849 | } 850 | return subscript_rest(k,h); 851 | } 852 | size_type erase_rest(const _K& k, size_type origin) 853 | RABBIT_NOINLINE_ /// this function must never be inlined 854 | { 855 | size_type pos = find_rest(k,origin); 856 | 857 | if(pos != (*this).end()){ 858 | set_exists(pos, false); 859 | ++removed; 860 | set_segment_key(pos, empty_key); 861 | destroy_segment_value(pos); 862 | --elements; 863 | return 1; 864 | } 865 | return 0; 866 | } 867 | size_type erase(const _K& k){ 868 | 869 | size_type pos = map_key(k); 870 | 871 | _Bt si = get_segment_index(pos); 872 | _Segment& s = get_segment(pos); 873 | if(s.is_exists(si) && equal_key(pos,k)){ ///get_segment(pos).exists == ALL_BITS_SET || 874 | set_segment_key(pos, empty_key); 875 | s.toggle_exists(si); 876 | destroy_segment_value(pos); 877 | --elements; 878 | ++removed; 879 | return 1; 880 | } 881 | if(!s.is_overflows(si)){ 882 | return 0; 883 | }else 884 | return erase_rest(k, pos); 885 | 886 | } 887 | /// not used (could be used where hash table must actually shrink too) 888 | bool is_small() const { 889 | return (get_extent() > (config.MIN_EXTENT << 3)) && (elements < get_extent()/8); 890 | } 891 | 892 | size_type count(const _K& k) const { 893 | size_type pos =(*this).find(k); 894 | if(pos == (*this).end()){ 895 | return 0; 896 | }else return 1; 897 | } 898 | const _V& at(const _K& k) const { 899 | size_type pos = find(k); 900 | if(pos != (*this).end()){ 901 | return get_value(pos); 902 | } 903 | throw std::exception(); 904 | } 905 | _V& direct(size_type pos) { 906 | return get_value(pos); 907 | } 908 | _V& at(const _K& k) { 909 | size_type pos = find(k); 910 | if(pos != (*this).end()){ 911 | return get_value(pos); 912 | } 913 | throw std::exception(); 914 | } 915 | 916 | bool get(const _K& k, _V& v) const { 917 | size_type pos = find(k); 918 | if(pos != (*this).end()){ 919 | v = get_value(pos); 920 | return true; 921 | } 922 | return false; 923 | } 924 | 925 | /// probabilistic check if key with given hash exists 926 | /// false indicates the key definitely will not be found 927 | /// else well have to do a full find 928 | 929 | bool could_have(size_type origin){ 930 | size_type pos = map_hash(origin); 931 | _Bt index = get_segment_index(origin); 932 | const _Segment& s = get_segment(origin); 933 | return (s.is_exists(index) || s.is_overflows(index)); 934 | } 935 | 936 | size_type find_rest_not_empty(const _K& k, size_type origin) const 937 | RABBIT_NOINLINE_ 938 | { 939 | 940 | /// randomization step for attack mitigation 941 | size_type pos = map_rand_key(k,origin); 942 | size_type base = pos; 943 | for(unsigned int i = 0; i < probes && pos < get_extent();){ 944 | if(equal_key(pos,k)) return pos; 945 | pos = hash_probe_incr(base,i); 946 | ++i; 947 | } 948 | _Bt index = get_segment_index(origin); 949 | const _Segment& s = get_segment(origin); 950 | if(!s.is_overflows(index)){ 951 | return end(); 952 | 953 | } 954 | return find_in_bucket(k,origin); 955 | 956 | } 957 | size_type find_rest(const _K& k, size_type origin) const 958 | RABBIT_NOINLINE_ 959 | { 960 | /// randomization step for attack mitigation 961 | size_type pos = map_rand_key(k); 962 | size_type base = pos; 963 | for(unsigned int i =0; i < probes && pos < get_extent();){// 964 | _Bt si = get_segment_index(pos); 965 | if(segment_equal_key_exists(pos,k)){ 966 | return pos; 967 | } 968 | pos = hash_probe_incr(base,i); 969 | ++i; 970 | } 971 | return find_in_bucket(k,origin); 972 | } 973 | size_type find_empty(const _K& k, const size_type& unmapped) const 974 | RABBIT_NOINLINE_ 975 | { 976 | size_type pos = map_hash(unmapped); 977 | _Bt index = get_segment_index(pos); 978 | const _Segment& s = get_segment(pos); 979 | if(s.is_exists(index) && equal_key(pos,k) ){ ///get_segment(pos).exists == ALL_BITS_SET || 980 | return pos; 981 | } 982 | if(!s.is_overflows(index)){ 983 | return end(); 984 | } 985 | return find_rest(k, pos); 986 | } 987 | 988 | inline size_type find_non_empty(const _K& k,const size_type& unmapped) const { 989 | size_type pos = map_hash(unmapped); 990 | if(equal_key(pos,k)) return pos; 991 | return find_rest_not_empty(k, pos); 992 | } 993 | 994 | inline size_type find(const _K& k,const size_type& unmapped) const { 995 | 996 | bool is_empty = eq_f(empty_key,k); // && sizeof(_K) <= sizeof(size_type); 997 | if(is_empty){ 998 | return find_empty(k, unmapped); 999 | }else{ 1000 | return find_non_empty(k,unmapped); 1001 | } 1002 | } 1003 | 1004 | size_type find(const _K& k) const { 1005 | 1006 | size_type pos = hash_key(k); 1007 | return find(k,pos); 1008 | } 1009 | 1010 | size_type begin() const { 1011 | if(!elements) 1012 | return end(); 1013 | 1014 | size_type pos = 0; 1015 | _Bt index = 0; 1016 | 1017 | const _Bt bits_size = config.BITS_SIZE; 1018 | size_type e = end(); 1019 | while(pos < e){ 1020 | const _Segment &seg = get_segment(pos); 1021 | index = get_segment_index(pos); 1022 | if(seg.exists == 0){ 1023 | pos += bits_size; 1024 | }else{ 1025 | if(seg.is_exists(index)) 1026 | break; 1027 | ++pos; 1028 | } 1029 | } 1030 | return pos ; 1031 | } 1032 | size_type end() const { 1033 | 1034 | return get_data_size(); 1035 | } 1036 | size_type size() const { 1037 | return elements; 1038 | } 1039 | size_type get_collisions() const { 1040 | return collisions; 1041 | } 1042 | typedef std::shared_ptr ptr; 1043 | }; /// hash_kernel 1044 | public: 1045 | struct const_iterator; 1046 | struct iterator { 1047 | typedef hash_kernel* kernel_ptr; 1048 | const basic_unordered_map* h; 1049 | size_type pos; 1050 | friend struct const_iterator; 1051 | protected: 1052 | _Bt index; 1053 | _Bt exists; 1054 | _Bt bsize; 1055 | kernel_ptr get_kernel() const { 1056 | return h->current.get(); 1057 | } 1058 | kernel_ptr get_kernel() { 1059 | return h->current.get(); 1060 | } 1061 | 1062 | void increment() { 1063 | ++pos; 1064 | ++index; 1065 | if (index == bsize) { 1066 | auto k = get_kernel(); 1067 | const _Segment& s = k->get_segment(pos); 1068 | exists = s.exists; 1069 | index = k->get_segment_index(pos); 1070 | } 1071 | 1072 | } 1073 | public: 1074 | iterator() : h(nullptr), pos(0) { 1075 | } 1076 | 1077 | iterator(const basic_unordered_map* h, size_type pos, _Bt exists, _Bt index, _Bt bsize) 1078 | : pos(pos), h(h), exists(exists), index(index), bsize(bsize) { 1079 | 1080 | } 1081 | 1082 | iterator(const iterator& r) { 1083 | (*this) = r; 1084 | } 1085 | 1086 | //~iterator() { 1087 | //} 1088 | 1089 | iterator& operator=(const iterator& r) { 1090 | pos = r.pos; 1091 | h = r.h; 1092 | exists = r.exists; 1093 | index = r.index; 1094 | bsize = r.bsize; 1095 | return (*this); 1096 | } 1097 | inline iterator& operator++() { 1098 | do { 1099 | increment(); 1100 | } while ((exists & (((_Bt)1) << index)) == (_Bt)0); 1101 | return (*this); 1102 | } 1103 | iterator operator++(int) { 1104 | iterator t = (*this); 1105 | ++(*this); 1106 | return t; 1107 | } 1108 | const _ElPair& operator*() const { 1109 | return get_kernel()->get_pair((*this).pos); 1110 | } 1111 | inline _ElPair& operator*() { 1112 | return get_kernel()->get_pair((*this).pos); 1113 | } 1114 | inline _ElPair* operator->() const { 1115 | _ElPair* ret = &(get_kernel()->get_pair(pos)); 1116 | return ret; 1117 | } 1118 | inline const _ElPair *operator->() { 1119 | _ElPair* ret = &(get_kernel()->get_pair(pos)); 1120 | return ret; 1121 | } 1122 | inline bool operator==(const iterator& r) const { 1123 | 1124 | return (pos == r.pos); 1125 | } 1126 | bool operator!=(const iterator& r) const { 1127 | 1128 | return (pos != r.pos); 1129 | } 1130 | 1131 | size_type get_pos() const { 1132 | return pos; 1133 | } 1134 | 1135 | }; 1136 | 1137 | struct const_iterator { 1138 | private: 1139 | typedef hash_kernel* kernel_ptr; 1140 | const basic_unordered_map* h; 1141 | _Bt index; 1142 | _Bt exists; 1143 | void check_pointer() const { 1144 | return; 1145 | if(h!=nullptr && h->pcurrent != h->current.get()){ 1146 | std::cout << "invalid cache pointer: not equal to actual" << std::endl; 1147 | } 1148 | } 1149 | inline kernel_ptr get_kernel() const { 1150 | check_pointer(); 1151 | return h->pcurrent; 1152 | } 1153 | inline kernel_ptr get_kernel() { 1154 | check_pointer(); 1155 | return const_cast(h)->pcurrent; // current.get(); 1156 | } 1157 | 1158 | void increment() { 1159 | ++pos; 1160 | ++index; 1161 | auto k = get_kernel(); 1162 | if (index == k->config.BITS_SIZE) { 1163 | const _Segment& s = k->get_segment(pos); 1164 | exists = s.exists; 1165 | index = k->get_segment_index(pos); 1166 | } 1167 | 1168 | } 1169 | public: 1170 | size_type pos; 1171 | 1172 | const_iterator() : h(nullptr){ 1173 | 1174 | } 1175 | //~const_iterator() { 1176 | 1177 | //} 1178 | const_iterator 1179 | ( const basic_unordered_map* h, size_type pos, _Bt exists, _Bt index) 1180 | : pos(pos), h(h), exists(exists), index(index){ 1181 | 1182 | } 1183 | const_iterator(const iterator& r) : h(nullptr){ 1184 | (*this) = r; 1185 | } 1186 | 1187 | const_iterator& operator=(const iterator& r) { 1188 | pos = r.pos; 1189 | h = r.h; 1190 | index = r.index; 1191 | exists = r.exists; 1192 | return (*this); 1193 | } 1194 | 1195 | const_iterator& operator=(const const_iterator& r) { 1196 | pos = r.pos; 1197 | h = r.h; 1198 | index = r.index; 1199 | return (*this); 1200 | } 1201 | 1202 | const_iterator& operator++() { 1203 | do { 1204 | increment(); 1205 | } while ((exists & (((_Bt)1) << index)) == (_Bt)0); 1206 | return (*this); 1207 | } 1208 | const_iterator operator++(int) { 1209 | const_iterator t = (*this); 1210 | ++(*this); 1211 | return t; 1212 | } 1213 | const _ElPair& operator*() const { 1214 | return get_kernel()->get_pair(pos); 1215 | } 1216 | const _ElPair *operator->() const { 1217 | _ElPair* ret = &(get_kernel()->get_pair(pos)); 1218 | return ret; 1219 | } 1220 | 1221 | inline bool operator==(const const_iterator& r) const { 1222 | 1223 | return (pos == r.pos); 1224 | } 1225 | bool operator!=(const const_iterator& r) const { 1226 | 1227 | return (pos != r.pos); 1228 | } 1229 | 1230 | size_type get_pos() const { 1231 | return pos; 1232 | } 1233 | 1234 | }; 1235 | 1236 | protected: 1237 | /// the default config for each hash instance 1238 | rabbit_config default_config; 1239 | key_compare key_c; 1240 | allocator_type alloc; 1241 | 1242 | void rehash(){ 1243 | size_type to = current->key_mapper.next_size(); 1244 | rehash(to); 1245 | } 1246 | void set_current(typename hash_kernel::ptr c){ 1247 | current = c; 1248 | pcurrent = c.get(); 1249 | } 1250 | 1251 | 1252 | typename hash_kernel::ptr current; 1253 | typedef std::vector kernel_stack; 1254 | hash_kernel* pcurrent; 1255 | inline void create_current(){ 1256 | if(current==nullptr) 1257 | set_current(std::allocate_shared(alloc,key_c,alloc)); 1258 | } 1259 | 1260 | iterator from_pos_empty(size_type pos) const { 1261 | return iterator(this, pos, 0, 0, 0); 1262 | } 1263 | 1264 | iterator from_pos(size_type pos) const { 1265 | const _Segment& s = pcurrent->get_segment(pos); 1266 | _Bt index = pcurrent->get_segment_index(pos); 1267 | _Bt bsize = pcurrent->config.BITS_SIZE; 1268 | return iterator(this,pos,s.exists,index,bsize); 1269 | } 1270 | 1271 | public: 1272 | float load_factor() const{ 1273 | if(current==nullptr) return 0; 1274 | return current->load_factor(); 1275 | } 1276 | size_type bucket_count() const { 1277 | if(current==nullptr) return 0; 1278 | return current->bucket_count(); 1279 | } 1280 | size_type bucket_size ( size_type n ) const{ 1281 | if(current==nullptr) return 0; 1282 | return current->bucket_size ( n ); 1283 | } 1284 | float max_load_factor() const { 1285 | if(current==nullptr) return 1; 1286 | return current->max_load_factor(); 1287 | } 1288 | 1289 | void max_load_factor ( float z ){ 1290 | create_current(); 1291 | current->max_load_factor(z); 1292 | } 1293 | bool empty() const { 1294 | if(current==nullptr) return true; 1295 | return current->size() == 0; 1296 | } 1297 | void reserve(size_type atleast){ 1298 | create_current(); 1299 | rehash((size_type)((double)atleast*current->get_resize_factor())); 1300 | } 1301 | void resize(size_type atleast) { 1302 | create_current(); 1303 | size_type calc = current->key_mapper.nearest_larger(atleast); 1304 | rehash(calc,false); // avoid randomization 1305 | } 1306 | /// called when we dont want pure stl semantics 1307 | void rehash(size_type to_, bool check_lf = true) { 1308 | create_current(); 1309 | rabbit_config config; 1310 | size_type to = std::max(to_, config.MIN_EXTENT); 1311 | /// can cause oom e because of recursive rehash'es 1312 | 1313 | typename hash_kernel::ptr rehashed = std::allocate_shared(alloc); 1314 | size_type extent = current->get_extent(); 1315 | size_type new_extent = to; 1316 | size_type nrand_probes = current->get_rand_probes(); 1317 | hash_kernel * reh = rehashed.get(); 1318 | hash_kernel * cur = current.get(); 1319 | try{ 1320 | //std::cout << " load factor " << current->load_factor() << " for " << current->size() << " elements and collision factor " << current->collision_factor() << std::endl; 1321 | //std::cout << " capacity " << current->capacity() << std::endl; 1322 | if(check_lf && current->load_factor() < 0.15){ 1323 | //std::cout << "possible attack/bad hash detected : using random probes : " << current->get_probes() << " : " << extent << " : " << current->get_logarithmic() << std::endl; 1324 | nrand_probes = 1; 1325 | } 1326 | rehashed->set_logarithmic(current->get_logarithmic()); 1327 | rehashed->mf = (*this).current->mf; 1328 | rehashed->min_load_factor(this->current->min_load_factor()); 1329 | rehashed->set_rand_probes(nrand_probes); 1330 | rehashed->resize_clear(new_extent); 1331 | using namespace std; 1332 | while(true){ 1333 | iterator e = end(); 1334 | size_type ctr = 0; 1335 | bool rerehashed = false; 1336 | //_K k; 1337 | for(iterator i = begin();i != e;++i){ 1338 | //std::swap(k,(*i).first); 1339 | _V* v = reh->subscript((*i).first); 1340 | if(v != nullptr){ 1341 | *v = i->second; 1342 | /// a cheap check to illuminate subtle bugs during development 1343 | if(++ctr != rehashed->elements){ 1344 | cout << "iterations " << ctr << " elements " << rehashed->elements << " extent " << rehashed->get_extent() << endl; 1345 | cout << "inside rehash " << rehashed->get_extent() << endl; 1346 | cout << "new " << rehashed->elements << " current size:" << current->elements << endl; 1347 | throw bad_alloc(); 1348 | } 1349 | }else{ 1350 | //std::cout << "rehashing in rehash " << ctr << " of " << current->elements << std::endl; 1351 | rerehashed = true; 1352 | new_extent = rehashed->key_mapper.next_size(); 1353 | rehashed = std::allocate_shared(alloc); 1354 | rehashed->resize_clear(new_extent); 1355 | rehashed->mf = (*this).current->mf; 1356 | rehashed->min_load_factor(this->current->min_load_factor()); 1357 | rehashed->set_rand_probes(nrand_probes); 1358 | rehashed->set_logarithmic(current->get_logarithmic()); 1359 | reh = rehashed.get(); 1360 | 1361 | // i = begin(); // start over 1362 | //ctr = 0; 1363 | break; 1364 | 1365 | } 1366 | } 1367 | if(rehashed->elements == current->elements){ 1368 | break; 1369 | }else if(!rerehashed){ 1370 | cout << "hash error: unequal key count - retry rehash " << endl; 1371 | cout << "iterations " << ctr << " elements " << rehashed->elements << " extent " << rehashed->get_extent() << endl; 1372 | cout << "new " << rehashed->elements << " current size:" << current->elements << endl; 1373 | throw bad_alloc(); 1374 | }else{ 1375 | //cout << "re-rehash iterations " << ctr << " elements " << rehashed->elements << " extent " << rehashed->get_extent() << endl; 1376 | 1377 | //rehashed->resize_clear(rehashed->get_extent()); 1378 | //break; 1379 | } 1380 | 1381 | } 1382 | 1383 | }catch(std::bad_alloc &e){ 1384 | std::cout << "bad allocation: rehash failed in temp phase :" << new_extent << std::endl; 1385 | size_t t = 0; 1386 | std::cin >> t; 1387 | throw e; 1388 | } 1389 | set_current(rehashed); 1390 | 1391 | } 1392 | void clear(){ 1393 | if(current!=nullptr) 1394 | current->clear(); 1395 | pcurrent = nullptr; 1396 | current = nullptr; 1397 | ///set_current(std::allocate_shared(alloc)); 1398 | } 1399 | 1400 | void clear(const key_compare& compare,const allocator_type& allocator){ 1401 | set_current(std::allocate_shared(allocator,compare, allocator)); 1402 | } 1403 | 1404 | basic_unordered_map() :current(nullptr),pcurrent(nullptr){ 1405 | // 1406 | } 1407 | 1408 | basic_unordered_map(const key_compare& compare,const allocator_type& allocator) : key_c(compare),alloc(allocator),pcurrent(nullptr){ 1409 | 1410 | } 1411 | 1412 | basic_unordered_map(const basic_unordered_map& right) { 1413 | *this = right; 1414 | } 1415 | 1416 | ~basic_unordered_map(){ 1417 | 1418 | } 1419 | 1420 | void swap(basic_unordered_map& with){ 1421 | typename hash_kernel::ptr t = with.current; 1422 | with.set_current(this->current); 1423 | this->set_current(t); 1424 | } 1425 | 1426 | void move(basic_unordered_map& from){ 1427 | (*this).set_current(from.current); 1428 | from.set_current(nullptr); 1429 | } 1430 | 1431 | basic_unordered_map& operator=(const basic_unordered_map& right){ 1432 | (*this).set_current(std::allocate_shared(alloc)); 1433 | (*this).reserve(right.size()); 1434 | const_iterator e = right.end(); 1435 | for(const_iterator c = right.begin(); c!=e;++c){ 1436 | (*this)[(*c).first] = (*c).second; 1437 | } 1438 | 1439 | return *this; 1440 | } 1441 | 1442 | hasher hash_function() const { 1443 | return (this->current->hf); 1444 | } 1445 | 1446 | key_equal key_eq() const { 1447 | if(current!=nullptr) 1448 | return (this->current->eq_f); 1449 | return key_equal(); 1450 | } 1451 | 1452 | void set_min_load_factor(float x){ 1453 | create_current(); 1454 | current->min_load_factor(x); 1455 | } 1456 | iterator insert(const _K& k,const _V& v){ 1457 | create_current(); 1458 | (*this)[k] = v; 1459 | return from_pos(current->last_modified); 1460 | } 1461 | 1462 | iterator insert(const std::pair<_K,_V>& p){ 1463 | 1464 | return iterator(this, insert(p.first, p.second)); 1465 | } 1466 | /// generic template copy 1467 | template 1468 | iterator insert(_Iter start, _Iter _afterLast){ 1469 | create_current(); 1470 | for(_Iter i = start; i != _afterLast; ++i){ 1471 | insert((*i).first, (*i).second); 1472 | } 1473 | return from_pos(current->last_modified); 1474 | } 1475 | /// fast getter that doesnt use iterators and doesnt change the table without letting you know 1476 | bool get(const _K& k, _V& v) const { 1477 | if(current!=nullptr) 1478 | return (*this).current->get(k,v); 1479 | return false; 1480 | } 1481 | /// throws a exception when value could not match the key 1482 | const _V& at(const _K& k) const { 1483 | if(current == nullptr) throw std::exception(); 1484 | return (*this).current->at(k); 1485 | } 1486 | _V& at(const _K& k) { 1487 | create_current(); 1488 | return (*this).current->at(k); 1489 | } 1490 | 1491 | bool error(const _K& k){ 1492 | _V *rv = current->subscript(k); 1493 | return rv==nullptr; 1494 | } 1495 | 1496 | _V& operator[](const _K& k){ 1497 | create_current(); 1498 | _V *rv = current->subscript(k); 1499 | while(rv == nullptr){ 1500 | this->rehash(); 1501 | rv = current->subscript(k); 1502 | } 1503 | return *rv; 1504 | } 1505 | size_type erase(const _K& k){ 1506 | if(current==nullptr) return size_type(); 1507 | //if(current->is_small()){ 1508 | // rehash(1); 1509 | //} 1510 | return current->erase(k); 1511 | } 1512 | size_type erase(iterator i){ 1513 | return erase((*i).first); 1514 | } 1515 | size_type erase(const_iterator i){ 1516 | return erase((*i).first); 1517 | } 1518 | size_type count(const _K& k) const { 1519 | if(current == nullptr)return size_type(); 1520 | return current->count(k); 1521 | } 1522 | iterator find(const _K& k) const { 1523 | if(current == nullptr) return from_pos_empty(size_type()); 1524 | return from_pos(current->find(k)); 1525 | } 1526 | iterator begin() const { 1527 | if(current==nullptr) return from_pos_empty(size_type()); 1528 | return from_pos(current->begin()); 1529 | } 1530 | iterator end() const { 1531 | if(current==nullptr) return from_pos_empty(size_type()); 1532 | return from_pos(current->end()); 1533 | } 1534 | const_iterator cbegin() const { 1535 | return begin(); 1536 | } 1537 | const_iterator cend() const { 1538 | return end(); 1539 | } 1540 | size_type size() const { 1541 | if(current==nullptr)return size_type(); 1542 | return current->size(); 1543 | } 1544 | void set_logarithmic(size_type logarithmic){ 1545 | create_current(); 1546 | this->current->set_logarithmic(logarithmic); 1547 | } 1548 | }; 1549 | /// the stl compatible unordered map interface 1550 | template 1551 | < class _Kty 1552 | , class _Ty 1553 | , class _Hasher = rabbit_hash<_Kty> 1554 | , class _Keyeq = std::equal_to<_Kty> 1555 | , class _Alloc = std::allocator > 1556 | , class _Traits = default_traits 1557 | > 1558 | class unordered_map : public basic_unordered_map<_Kty, _Ty, _Hasher, _Keyeq, _Alloc, _Traits> 1559 | { // hash table of {key, mapped} values, unique keys 1560 | public: 1561 | typedef basic_unordered_map<_Kty, _Ty, _Hasher, _Keyeq, _Alloc, _Traits> _Base; 1562 | 1563 | typedef unordered_map<_Kty, _Ty, _Hasher, _Keyeq, _Alloc, _Traits> _Myt; 1564 | 1565 | typedef _Hasher hasher; 1566 | typedef _Kty key_type; 1567 | typedef _Ty mapped_type; 1568 | typedef _Keyeq key_equal; 1569 | typedef typename _Base::key_compare key_compare; 1570 | 1571 | // typedef typename _Base::value_compare value_compare; 1572 | typedef typename _Base::allocator_type allocator_type; 1573 | typedef typename _Base::size_type size_type; 1574 | typedef typename _Base::difference_type difference_type; 1575 | typedef typename _Base::pointer pointer; 1576 | typedef typename _Base::const_pointer const_pointer; 1577 | typedef typename _Base::reference reference; 1578 | typedef typename _Base::const_reference const_reference; 1579 | typedef typename _Base::iterator iterator; 1580 | typedef typename _Base::const_iterator const_iterator; 1581 | // typedef typename _Base::reverse_iterator reverse_iterator; 1582 | // typedef typename _Base::const_reverse_iterator 1583 | // const_reverse_iterator; 1584 | typedef typename _Base::value_type value_type; 1585 | 1586 | typedef typename _Base::iterator local_iterator; 1587 | typedef typename _Base::const_iterator const_local_iterator; 1588 | 1589 | unordered_map() 1590 | : _Base(key_compare(), allocator_type()) 1591 | { // construct empty map from defaults 1592 | } 1593 | 1594 | explicit unordered_map(const allocator_type& a) 1595 | : _Base(key_compare(), a) 1596 | { // construct empty map from defaults, allocator 1597 | } 1598 | 1599 | unordered_map(const _Myt& _Right) 1600 | : _Base(_Right) 1601 | { // construct map by copying _Right 1602 | } 1603 | 1604 | //unordered_map(const _Myt& _Right, const allocator_type& _Al) 1605 | // : _Base(_Right, _Al) 1606 | // { // construct map by copying _Right, allocator 1607 | // } 1608 | 1609 | explicit unordered_map(size_type _Buckets) 1610 | : _Base(key_compare(), allocator_type()) 1611 | { // construct empty map from defaults, ignore initial size 1612 | this->rehash(_Buckets); 1613 | } 1614 | 1615 | unordered_map(size_type _Buckets, const hasher& _Hasharg) 1616 | : _Base(key_compare(_Hasharg), allocator_type()) 1617 | { // construct empty map from hasher 1618 | this->rehash(_Buckets); 1619 | } 1620 | 1621 | unordered_map 1622 | ( size_type _Buckets 1623 | , const hasher& _Hasharg 1624 | , const _Keyeq& _Keyeqarg 1625 | ) 1626 | : _Base(key_compare(_Hasharg, _Keyeqarg), allocator_type()) 1627 | { // construct empty map from hasher and equality comparator 1628 | this->rehash(_Buckets); 1629 | } 1630 | 1631 | unordered_map 1632 | ( size_type _Buckets 1633 | , const hasher& _Hasharg 1634 | , const _Keyeq& _Keyeqarg 1635 | , const allocator_type& a 1636 | ) 1637 | : _Base(key_compare(_Hasharg, _Keyeqarg), a) 1638 | { // construct empty map from hasher and equality comparator 1639 | this->rehash(_Buckets); 1640 | } 1641 | 1642 | template 1643 | unordered_map 1644 | ( _Iter _First 1645 | , _Iter _Last 1646 | ) 1647 | : _Base(key_compare(), allocator_type()) 1648 | { // construct map from sequence, defaults 1649 | _Base::insert(_First, _Last); 1650 | } 1651 | 1652 | template 1653 | unordered_map 1654 | ( _Iter _First 1655 | , _Iter _Last 1656 | , size_type _Buckets 1657 | ) 1658 | : _Base(key_compare(), allocator_type()) 1659 | { // construct map from sequence, ignore initial size 1660 | this->rehash(_Buckets); 1661 | _Base::insert(_First, _Last); 1662 | } 1663 | 1664 | template 1665 | unordered_map 1666 | ( _Iter _First 1667 | , _Iter _Last 1668 | , size_type _Buckets 1669 | , const hasher& _Hasharg 1670 | ) 1671 | : _Base(key_compare(_Hasharg), allocator_type()) 1672 | { 1673 | this->rehash(_Buckets); 1674 | _Base::insert(_First, _Last); 1675 | } 1676 | 1677 | template 1678 | unordered_map 1679 | ( _Iter _First 1680 | , _Iter _Last 1681 | , size_type _Buckets 1682 | , const hasher& _Hasharg 1683 | , const _Keyeq& _Keyeqarg 1684 | ) 1685 | : _Base(key_compare(_Hasharg, _Keyeqarg), allocator_type()) 1686 | { 1687 | this->rehash(_Buckets); 1688 | _Base::insert(_First, _Last); 1689 | } 1690 | 1691 | template 1692 | unordered_map 1693 | ( _Iter _First 1694 | , _Iter _Last 1695 | , size_type _Buckets 1696 | , const hasher& _Hasharg 1697 | , const _Keyeq& _Keyeqarg 1698 | , const allocator_type& _Al 1699 | ) 1700 | : _Base(key_compare(_Hasharg, _Keyeqarg), _Al) 1701 | { 1702 | this->rehash(_Buckets); 1703 | _Base::insert(_First, _Last); 1704 | } 1705 | 1706 | _Myt& operator=(const _Myt& _Right){ // assign by copying _Right 1707 | _Base::operator=(_Right); 1708 | return (*this); 1709 | } 1710 | 1711 | unordered_map(_Myt&& from) 1712 | { 1713 | _Base::move(from); 1714 | } 1715 | 1716 | unordered_map(_Myt&& from, const allocator_type& _Al) 1717 | : _Base(key_compare(), _Al) 1718 | { // construct map by moving _Right, allocator 1719 | _Base::move(from); 1720 | } 1721 | 1722 | _Myt& operator=(_Myt&& from){ // assign by moving _Right 1723 | _Base::move(from); 1724 | return (*this); 1725 | } 1726 | const mapped_type& at(const key_type& k) const { 1727 | return _Base::at(k); 1728 | } 1729 | 1730 | mapped_type& at(const key_type& k) { 1731 | return _Base::at(k); 1732 | } 1733 | 1734 | mapped_type& operator[](const key_type& k){ 1735 | // find element matching _Keyval or insert with default mapped 1736 | return _Base::operator[](k); 1737 | } 1738 | 1739 | // find element matching _Keyval or insert with default mapped 1740 | mapped_type& operator[](key_type&& k){ 1741 | return (*this)[k]; 1742 | } 1743 | 1744 | void swap(_Myt& _Right){ // exchange contents with non-movable _Right 1745 | _Base::swap(_Right); 1746 | } 1747 | }; 1748 | 1749 | 1750 | template 1751 | < class _Kty 1752 | , class _Ty 1753 | , class _Hasher = rabbit_hash<_Kty> 1754 | , class _Keyeq = std::equal_to<_Kty> 1755 | , class _Alloc = std::allocator > 1756 | , class _Traits = sparse_traits 1757 | > 1758 | class sparse_unordered_map : public unordered_map<_Kty, _Ty, _Hasher, _Keyeq, _Alloc, _Traits> 1759 | { 1760 | public: 1761 | typedef _Hasher hasher; 1762 | typedef _Kty key_type; 1763 | typedef _Ty mapped_type; 1764 | typedef _Keyeq key_equal; 1765 | typedef unordered_map<_Kty, _Ty, _Hasher, _Keyeq, _Alloc, _Traits> _Base; 1766 | typedef typename _Base::key_compare key_compare; 1767 | 1768 | // typedef typename _Base::value_compare value_compare; 1769 | typedef typename _Base::allocator_type allocator_type; 1770 | typedef typename _Base::size_type size_type; 1771 | typedef typename _Base::difference_type difference_type; 1772 | typedef typename _Base::pointer pointer; 1773 | typedef typename _Base::const_pointer const_pointer; 1774 | typedef typename _Base::reference reference; 1775 | typedef typename _Base::const_reference const_reference; 1776 | typedef typename _Base::iterator iterator; 1777 | typedef typename _Base::const_iterator const_iterator; 1778 | // typedef typename _Base::reverse_iterator reverse_iterator; 1779 | // typedef typename _Base::const_reverse_iterator 1780 | // const_reverse_iterator; 1781 | typedef typename _Base::value_type value_type; 1782 | 1783 | typedef typename _Base::iterator local_iterator; 1784 | typedef typename _Base::const_iterator const_local_iterator; 1785 | sparse_unordered_map(){ 1786 | } 1787 | ~sparse_unordered_map(){ 1788 | } 1789 | }; 1790 | 1791 | }; // rab-bit 1792 | 1793 | #endif /// _RABBIT_H_CEP_20150303_ 1794 | -------------------------------------------------------------------------------- /rabbit/rabbit_set.h: -------------------------------------------------------------------------------- 1 | #ifndef _RABBIT_H_CEP_20150303_ 2 | #define _RABBIT_H_CEP_20150303_ 3 | /** 4 | The MIT License (MIT) 5 | Copyright (c) 2015 Christiaan Pretorius 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 20 | THE SOFTWARE. 21 | **/ 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | /// the rab-bit hash 34 | /// probably the worlds simplest working hashtable - only kiddingk 35 | /// it uses linear probing for the first level of fallback and then a overflow area or secondary hash 36 | 37 | #ifdef _MSC_VER 38 | #define RABBIT_NOINLINE_PRE _declspec(noinline) 39 | #define RABBIT_NOINLINE_ 40 | #else 41 | #define RABBIT_NOINLINE_PRE 42 | #define RABBIT_NOINLINE_ __attribute__((noinline)) 43 | #endif 44 | namespace rabbit { 45 | 46 | /// a very basic version of std::pair which keeps references only 47 | struct end_iterator { 48 | }; 49 | template 50 | < class _Ty1 51 | , class _Ty2 52 | > 53 | struct ref_pair { 54 | // store references to a pair of values 55 | 56 | typedef ref_pair<_Ty1, _Ty2> _Myt; 57 | typedef _Ty1 first_type; 58 | typedef _Ty2 second_type; 59 | 60 | // construct from specified non const values 61 | ref_pair(_Ty1& _Val1, _Ty2& _Val2) 62 | : first(_Val1) 63 | , second(_Val2) 64 | { 65 | } 66 | /// rely on the compiler default to do this 67 | //ref_pair(_Myt& _Right) 68 | //: first(_Right.first) 69 | //, second(_Right.second) 70 | //{ 71 | //} 72 | _Myt& operator=(const _Myt& _Right) { 73 | first = _Right.first; 74 | second = _Right.second; 75 | return (*this); 76 | } 77 | 78 | operator std::pair<_Ty1, _Ty2>() { 79 | return std::make_pair(first, second); 80 | } 81 | 82 | operator const std::pair<_Ty1, _Ty2>() const { 83 | return std::make_pair(first, second); 84 | } 85 | 86 | _Ty1& first; // the first stored value 87 | _Ty2& second; // the second stored value 88 | }; 89 | 90 | 91 | template 92 | struct _BinMapper { 93 | typedef typename _Config::size_type size_type; 94 | typedef _Config config_type; 95 | size_type extent; 96 | size_type extent1; 97 | size_type extent2; 98 | size_type primary_bits; 99 | size_type random_val; 100 | unsigned long long gate_bits; 101 | _Config config; 102 | _BinMapper() { 103 | } 104 | _BinMapper(size_type new_extent, const _Config& config) { 105 | this->config = config; 106 | this->extent = ((size_type)1) << this->config.log2(new_extent); 107 | this->extent1 = this->extent - 1; 108 | this->extent2 = this->config.log2(new_extent); 109 | this->primary_bits = extent2; 110 | //std::minstd_rand rd; 111 | //std::mt19937 gen(rd()); 112 | //std::uniform_int_distribution dis(1ll<<4, std::numeric_limits::max()); 113 | this->random_val = 0; //(size_type)dis(gen); 114 | if (new_extent < (1ll << 32ll)) { 115 | this->gate_bits = (1ll << 32ll) - 1ll; 116 | } 117 | else { 118 | this->gate_bits = (1ll << 62ll) - 1ll; 119 | } 120 | } 121 | inline size_type nearest_larger(size_type any) { 122 | size_type l2 = this->config.log2(any); 123 | return (size_type)(2ll << (l2 + 1ll)); 124 | } 125 | inline size_type randomize(size_type other) const { 126 | size_type r = other >> this->primary_bits; 127 | return other + (r*r); //(other ^ random_val) & this->extent1; 128 | } 129 | inline size_type operator()(size_type h_n) const { 130 | size_type h = h_n & this->gate_bits; 131 | //h += (h>>this->primary_bits); 132 | return h & this->extent1; // 133 | 134 | } 135 | double resize_factor() const { 136 | return 2; 137 | } 138 | double recalc_growth_factor(size_type elements) { 139 | return 2; 140 | } 141 | 142 | inline size_type next_size() { 143 | 144 | double r = recalc_growth_factor(this->extent) * this->extent; 145 | assert(r > (double)extent); 146 | return (size_type)r; 147 | } 148 | }; 149 | template 150 | struct rabbit_hash { 151 | size_t operator()(const _Ht& k) const { 152 | return (size_t)std::hash<_Ht>()(k); /// 153 | }; 154 | }; 155 | template<> 156 | struct rabbit_hash { 157 | unsigned long operator()(const long& k) const { 158 | return (unsigned long)k; 159 | }; 160 | }; 161 | template<> 162 | struct rabbit_hash { 163 | inline unsigned long operator()(const unsigned long& k) const { 164 | return k; 165 | }; 166 | }; 167 | template<> 168 | struct rabbit_hash { 169 | inline unsigned int operator()(const unsigned int& k) const { 170 | return k; 171 | }; 172 | }; 173 | template<> 174 | struct rabbit_hash { 175 | inline unsigned int operator()(const int& k) const { 176 | return k; 177 | }; 178 | }; 179 | template<> 180 | struct rabbit_hash { 181 | inline unsigned long long operator()(const unsigned long long& k) const { 182 | return k; 183 | }; 184 | }; 185 | template<> 186 | struct rabbit_hash { 187 | inline unsigned long long operator()(const long long& k) const { 188 | return (unsigned long)k; 189 | }; 190 | }; 191 | template 192 | class basic_config { 193 | public: 194 | typedef unsigned long long int _Bt; /// exists ebucket type - not using vector - interface does not support bit bucketing 195 | /// if even more speed is desired but you'r willing to live with a 4 billion key limit then 196 | //typedef unsigned long size_type; 197 | typedef std::size_t size_type; 198 | 199 | size_type log2(size_type n) { 200 | size_type r = 0; 201 | while (n >>= 1) 202 | { 203 | r++; 204 | } 205 | return r; 206 | } 207 | _Bt CHAR_BITS; 208 | _Bt BITS_SIZE; 209 | _Bt BITS_SIZE1; 210 | _Bt ALL_BITS_SET; 211 | _Bt LOGARITHMIC; 212 | /// maximum probes per access 213 | size_type PROBES; /// a value of 32 gives a little more speed but much larger table size(> twice the size in some cases) 214 | size_type BITS_LOG2_SIZE; 215 | /// this distributes the h values which are powers of 2 a little to avoid primary clustering when there is no 216 | /// hash randomizer available 217 | 218 | size_type MIN_EXTENT; 219 | size_type MAX_OVERFLOW_FACTOR; 220 | 221 | basic_config(const basic_config& right) { 222 | *this = right; 223 | } 224 | 225 | basic_config& operator=(const basic_config& right) { 226 | CHAR_BITS = right.CHAR_BITS; 227 | BITS_SIZE = right.BITS_SIZE; 228 | BITS_SIZE1 = right.BITS_SIZE1; 229 | BITS_LOG2_SIZE = right.BITS_LOG2_SIZE; 230 | ALL_BITS_SET = right.ALL_BITS_SET; 231 | PROBES = right.PROBES; /// a value of 32 gives a little more speed but much larger table size(> twice the size in some cases) 232 | MIN_EXTENT = right.MIN_EXTENT; 233 | MAX_OVERFLOW_FACTOR = right.MAX_OVERFLOW_FACTOR; 234 | LOGARITHMIC = right.LOGARITHMIC; 235 | return *this; 236 | } 237 | 238 | basic_config() { 239 | CHAR_BITS = 8; 240 | BITS_SIZE = (sizeof(_Bt) * CHAR_BITS); 241 | BITS_SIZE1 = BITS_SIZE - 1; 242 | BITS_LOG2_SIZE = (size_type)log2((size_type)BITS_SIZE); 243 | ALL_BITS_SET = ~(_Bt)0; 244 | PROBES = 12; 245 | MIN_EXTENT = 4; /// start size of the hash table 246 | MAX_OVERFLOW_FACTOR = 1<<17; 247 | LOGARITHMIC = logarithmic; 248 | 249 | } 250 | }; 251 | template 252 | struct basic_traits { 253 | typedef typename _InMapper::config_type rabbit_config; 254 | typedef typename rabbit_config::_Bt _Bt; 255 | typedef typename rabbit_config::size_type size_type; 256 | typedef ptrdiff_t difference_type; 257 | typedef _InMapper _Mapper; 258 | }; 259 | typedef basic_traits<_BinMapper> > default_traits; 260 | typedef basic_traits<_BinMapper> > sparse_traits; 261 | 262 | 263 | template 264 | < class _K 265 | , class _V 266 | , class _H = rabbit_hash<_K> 267 | , class _E = std::equal_to<_K> 268 | , class _Allocator = std::allocator<_K> 269 | , class _Traits = default_traits 270 | > 271 | class basic_unordered_set { 272 | public: 273 | typedef _K key_type; 274 | 275 | typedef _V mapped_type; 276 | 277 | typedef ref_pair<_K, _V> _ElPair; 278 | typedef ref_pair _ConstElPair; 279 | typedef _ElPair value_type; 280 | typedef _ConstElPair const_value_type; 281 | typedef typename _Traits::_Bt _Bt; /// exists ebucket type - not using vector - interface does not support bit bucketing 282 | typedef typename _Traits::size_type size_type; 283 | typedef typename _Traits::rabbit_config rabbit_config; 284 | typedef typename _Traits::_Mapper _Mapper; 285 | typedef typename _Traits::difference_type difference_type; 286 | 287 | typedef _Allocator allocator_type; 288 | typedef _ElPair* pointer; 289 | typedef const _ElPair* const_pointer; 290 | typedef _ElPair& reference; 291 | typedef const _ElPair& const_reference; 292 | // typedef typename _Base::reverse_iterator reverse_iterator; 293 | // typedef typename _Base::const_reverse_iterator 294 | // const_reverse_iterator; 295 | 296 | typedef _E key_equal; 297 | typedef _E key_compare; 298 | typedef _H hasher; 299 | typedef _V* _RV; 300 | 301 | static const size_type end_pos = std::numeric_limits::max(); 302 | protected: 303 | struct overflow_stats { 304 | size_type start_elements; 305 | size_type end_elements; 306 | overflow_stats() : start_elements(0), end_elements(0) {} 307 | }; 308 | 309 | struct _KeySegment { 310 | 311 | public: 312 | _Bt overflows; 313 | _Bt exists; 314 | 315 | private: 316 | void set_bit(_Bt& w, _Bt index, bool f) { 317 | 318 | #ifdef _MSC_VER 319 | #pragma warning(disable:4804) 320 | #endif 321 | _Bt m = (_Bt)1ul << index;// the bit mask 322 | w ^= (-f ^ w) & m; 323 | ///w = (w & ~m) | (-f & m); 324 | } 325 | 326 | public: 327 | 328 | inline bool all_exists() const { 329 | return (exists == ~(_Bt)0); 330 | } 331 | 332 | inline bool none_exists() const { 333 | return (exists == (_Bt)0); 334 | } 335 | 336 | inline bool is_exists(_Bt bit) const { 337 | return ((exists >> bit) & (_Bt)1ul); 338 | } 339 | 340 | inline bool is_overflows(_Bt bit) const { 341 | return ((overflows >> bit) & (_Bt)1ul); 342 | } 343 | 344 | inline void set_exists(_Bt index, bool f) { 345 | set_bit(exists, index, f); 346 | } 347 | 348 | inline void toggle_exists(_Bt index) { 349 | exists ^= ((_Bt)1 << index); 350 | } 351 | 352 | void set_overflows(_Bt index, bool f) { 353 | set_bit(overflows, index, f); 354 | } 355 | 356 | void clear() { 357 | exists = 0; 358 | overflows = 0; 359 | } 360 | 361 | _KeySegment() { 362 | exists = 0; 363 | overflows = 0; 364 | } 365 | }; 366 | //typedef _PairSegment _Segment; 367 | typedef _KeySegment _Segment; 368 | /// the vector that will contain the segmented mapping pairs and flags 369 | 370 | typedef std::vector<_Segment, _Allocator> _Segments; 371 | typedef std::vector<_K, _Allocator> _Keys; 372 | typedef std::vector<_V, _Allocator> _Values; 373 | 374 | struct hash_kernel { 375 | 376 | /// settings configuration 377 | rabbit_config config; 378 | size_type elements; 379 | size_type initial_probes; 380 | size_type probes; 381 | size_type rand_probes; /// used when there might be an attack 382 | size_type last_modified; 383 | size_type random_val; 384 | /// the existence bit set is a factor of BITS_SIZE+1 less than the extent 385 | _Segment* clusters;///a.k.a. pages 386 | _Keys keys; 387 | ///_Values values; 388 | _V* values; 389 | 390 | size_type overflow; 391 | size_type overflow_elements; 392 | overflow_stats stats; 393 | _Mapper key_mapper; 394 | _H hf; 395 | _E eq_f; 396 | float mf; 397 | size_type buckets; 398 | size_type removed; 399 | bool keys_overflowed; 400 | _Allocator allocator; 401 | _K empty_key; 402 | //bool sparse; 403 | size_type logarithmic; 404 | 405 | typename _Allocator::template rebind<_Segment>::other get_segment_allocator() { 406 | return typename _Allocator::template rebind<_Segment>::other(allocator); 407 | } 408 | 409 | typename _Allocator::template rebind<_V>::other get_value_allocator() { 410 | return typename _Allocator::template rebind<_V>::other(allocator); 411 | } 412 | 413 | /// the minimum load factor 414 | float load_factor() const { 415 | return (float)((double)elements / (double)bucket_count()); 416 | } 417 | 418 | /// there are a variable ammount of buckets there are at most this much 419 | /// 420 | size_type bucket_count() const { 421 | 422 | return get_data_size(); 423 | } 424 | /// the size of a bucket can be calculated based on the 425 | /// hash value of its first occupant 426 | /// mainly to satisfy stl conventions 427 | size_type bucket_size(size_type n) const { 428 | size_type pos = n; 429 | if (!overflows_(pos)) { 430 | if (exists_(pos) && map_key(get_segment_key(pos)) == n) 431 | return 1; 432 | else return 0; 433 | } 434 | size_type m = pos + probes; 435 | size_type r = 0; 436 | for (; pos < m; ++pos) { 437 | if (!exists_(pos)) { 438 | } 439 | else if (map_key(get_segment_key(pos)) == n) { 440 | ++r; 441 | } 442 | } 443 | size_type e = end(); 444 | for (pos = get_o_start(); pos < e; ) { 445 | if (!exists_(pos)) { 446 | } 447 | else if (map_key(get_segment_key(pos)) == n) { 448 | ++r; 449 | } 450 | ++pos; 451 | } 452 | return r; 453 | } 454 | 455 | float max_load_factor() const { 456 | return mf; 457 | } 458 | 459 | void max_load_factor(float z) { 460 | mf = z; 461 | } 462 | 463 | /// total data size, never less than than size() 464 | size_type get_data_size() const { 465 | return get_extent() + initial_probes + overflow; 466 | } 467 | 468 | /// the overflow start 469 | size_type get_o_start() const { 470 | return get_extent() + initial_probes; 471 | } 472 | 473 | size_type get_segment_number(size_type pos) const { 474 | return (pos >> config.BITS_LOG2_SIZE); 475 | } 476 | 477 | _Bt get_segment_index(size_type pos) const { 478 | return (_Bt)(pos & (config.BITS_SIZE1)); 479 | } 480 | 481 | _Segment &get_segment(size_type pos) { 482 | return clusters[pos >> config.BITS_LOG2_SIZE]; 483 | } 484 | 485 | const _Segment &get_segment(size_type pos) const { 486 | return clusters[get_segment_number(pos)]; 487 | } 488 | 489 | const _ElPair get_segment_pair(size_type pos) const { 490 | return std::make_pair(get_segment_key(pos), get_segment_value(pos)); 491 | 492 | } 493 | 494 | const _K & get_segment_key(size_type pos) const { 495 | return keys[pos]; 496 | } 497 | 498 | _ElPair get_segment_pair(size_type pos) { 499 | return _ElPair(get_segment_key(pos), get_segment_value(pos)); 500 | } 501 | 502 | _K & get_segment_key(size_type pos) { 503 | return keys[pos]; 504 | } 505 | const _V & get_segment_value(size_type pos) const { 506 | return values[pos]; 507 | } 508 | 509 | _V & get_segment_value(size_type pos) { 510 | return values[pos]; 511 | } 512 | void set_segment_key(size_type pos, const _K &k) { 513 | keys[pos] = k; 514 | } 515 | void destroy_segment_value(size_type pos) { 516 | values[pos].~_V(); 517 | } 518 | _V* create_segment_value(size_type pos, const _V &v) { 519 | _V* r = &values[pos]; 520 | new (r) _V(v); 521 | return r; 522 | } 523 | _V* create_segment_value(size_type pos) { 524 | _V* r = &values[pos]; 525 | new (r) _V(); 526 | return r; 527 | } 528 | void set_segment_value(size_type pos, const _V &v) { 529 | values[pos] = v; 530 | } 531 | 532 | void set_exists(size_type pos, bool f) { 533 | last_modified = pos; 534 | get_segment(pos).set_exists(get_segment_index(pos), f); 535 | } 536 | 537 | void set_overflows(size_type pos, bool f) { 538 | get_segment(pos).set_overflows(get_segment_index(pos), f); 539 | } 540 | 541 | inline bool exists_(size_type pos) const { 542 | return get_segment(pos).is_exists(get_segment_index(pos)); 543 | } 544 | 545 | inline bool overflows_(size_type pos) const { 546 | return get_segment(pos).is_overflows(get_segment_index(pos)); 547 | } 548 | 549 | inline bool overflowed_(size_type pos) const { 550 | return false; 551 | } 552 | 553 | inline size_type map_key(const _K& k) const { 554 | size_type h = (size_type)_H()(k); 555 | 556 | return key_mapper(h); 557 | 558 | } 559 | inline size_type map_rand_key(const _K& k) const { 560 | size_type h = (size_type)_H()(k); 561 | if (this->rand_probes) 562 | return key_mapper(randomize(h)); 563 | return key_mapper(h); // 564 | } 565 | size_type get_e_size() const { 566 | return (size_type)(get_data_size() / config.BITS_SIZE) + 1; 567 | } 568 | 569 | void clear_data() { 570 | if (values) { 571 | for (size_type pos = 0; pos < get_data_size(); ++pos) { 572 | if (exists_(pos)) { 573 | get_value_allocator().destroy(&values[pos]); 574 | } 575 | } 576 | } 577 | } 578 | 579 | void free_values() { 580 | if (values) { 581 | clear_data(); 582 | get_value_allocator().deallocate(values, get_data_size()); 583 | values = nullptr; 584 | } 585 | } 586 | void free_data() { 587 | free_values(); 588 | if (clusters) { 589 | size_type esize = get_e_size(); 590 | for (size_type c = 0; c < esize; ++c) { 591 | get_segment_allocator().destroy(&clusters[c]); 592 | } 593 | get_segment_allocator().deallocate(clusters, get_e_size()); 594 | } 595 | values = nullptr; 596 | clusters = nullptr; 597 | } 598 | double get_resize_factor() const { 599 | return key_mapper.resize_factor(); 600 | } 601 | size_type get_probes() const { 602 | return this->probes; 603 | } 604 | void set_rand_probes() { 605 | this->rand_probes = this->probes; 606 | } 607 | void set_rand_probes(size_type rand_probes) { 608 | this->rand_probes = rand_probes; 609 | } 610 | 611 | /// clears all data and resize the new data vector to the parameter 612 | void resize_clear(size_type new_extent) { 613 | /// inverse of factor used to determine overflow list 614 | /// when overflow list is full rehash starts 615 | free_data(); 616 | 617 | key_mapper = _Mapper(new_extent, config); 618 | 619 | mf = 1.0; 620 | assert(config.MAX_OVERFLOW_FACTOR > 0); 621 | if (is_logarithmic()) { 622 | probes = config.log2(new_extent)*logarithmic; 623 | overflow = config.log2(new_extent)*logarithmic; 624 | } 625 | else { 626 | probes = config.PROBES; //config.log2(new_extent); //-config.log2(config.MIN_EXTENT/2); /// start probes config.PROBES; // 627 | overflow = std::max(config.PROBES, new_extent / config.MAX_OVERFLOW_FACTOR); //config.log2(new_extent); // *16* 628 | } 629 | 630 | rand_probes = 0; 631 | 632 | initial_probes = probes; 633 | //std::cout << "rehash with overflow:" << overflow << std::endl; 634 | elements = 0; 635 | removed = 0; 636 | empty_key = _K(); 637 | keys_overflowed = false; 638 | overflow_elements = get_o_start(); 639 | size_type esize = get_e_size(); 640 | keys.resize(get_data_size()); 641 | clusters = get_segment_allocator().allocate(esize); 642 | values = get_value_allocator().allocate(get_data_size()); 643 | _KeySegment ks; 644 | for (size_type c = 0; c < esize; ++c) { 645 | get_segment_allocator().construct(&clusters[c], ks); 646 | } 647 | set_exists(get_data_size(), true); 648 | buckets = 0; 649 | 650 | }; 651 | 652 | void clear() { 653 | size_type esize = get_e_size(); 654 | //clear_data(); 655 | for (size_type c = 0; c < esize; ++c) { 656 | clusters[c].clear(); 657 | } 658 | set_exists(get_data_size(), true); 659 | elements = 0; 660 | removed = 0; 661 | //resize_clear(config.MIN_EXTENT); 662 | } 663 | 664 | hash_kernel(const key_compare& compare, const allocator_type& allocator) 665 | : clusters(nullptr), values(nullptr), eq_f(compare), mf(1.0f), allocator(allocator), logarithmic(config.LOGARITHMIC) { 666 | resize_clear(config.MIN_EXTENT); 667 | } 668 | 669 | hash_kernel() : clusters(nullptr), values(nullptr), mf(1.0f), logarithmic(config.LOGARITHMIC) { 670 | resize_clear(config.MIN_EXTENT); 671 | } 672 | 673 | hash_kernel(const hash_kernel& right) : clusters(nullptr), values(nullptr), mf(1.0f), logarithmic(config.LOGARITHMIC) { 674 | *this = right; 675 | } 676 | 677 | ~hash_kernel() { 678 | free_data(); 679 | } 680 | inline size_type get_extent() const { 681 | return key_mapper.extent; 682 | } 683 | void set_logarithmic(size_type loga) { 684 | logarithmic = loga; 685 | } 686 | size_type get_logarithmic() const { 687 | return this->logarithmic; 688 | } 689 | bool is_logarithmic() const { 690 | return this->logarithmic > 0; 691 | } 692 | hash_kernel& operator=(const hash_kernel& right) { 693 | config = right.config; 694 | key_mapper = right.key_mapper; 695 | free_data(); 696 | buckets = right.buckets; 697 | removed = right.removed; 698 | mf = right.mf; 699 | elements = right.elements; 700 | size_type esize = get_e_size(); 701 | 702 | clusters = get_segment_allocator().allocate(esize); 703 | values = get_value_allocator().allocate(get_data_size()); 704 | 705 | std::copy(values, right.values, right.values + right.get_data_size()); 706 | keys = right.keys; 707 | std::copy(clusters, right.clusters, right.clusters + esize); 708 | 709 | return *this; 710 | } 711 | inline bool raw_equal_key(size_type pos, const _K& k) const { 712 | const _K& l = get_segment_key(pos); ///.key(get_segment_index(pos)); 713 | return eq_f(l, k); 714 | } 715 | inline bool segment_equal_key_exists(size_type pos, const _K& k) const { 716 | _Bt index = get_segment_index(pos); 717 | const _Segment& s = get_segment(pos); 718 | return eq_f(get_segment_key(pos), k) && s.is_exists(index); 719 | 720 | } 721 | 722 | bool equal_key(size_type pos, const _K& k) const { 723 | const _K& l = get_segment_key(pos); 724 | return eq_f(l, k); 725 | } 726 | 727 | inline size_type randomize(size_type v) const { 728 | return key_mapper.randomize(v); 729 | } 730 | 731 | inline size_type hash_probe_incr(size_type i) const { 732 | return 1; 733 | 734 | } 735 | /// when all inputs to this function is unique relative to current hash map(i.e. they dont exist in the hashmap) 736 | /// and there where no erasures. for maximum fillrate in rehash 737 | _V* unique_subscript_rest(const _K& k, size_type origin) { 738 | 739 | size_type pos = map_rand_key(k); 740 | 741 | size_type start = 0; 742 | for (unsigned int i = 0; i < probes && pos < get_extent(); ++i) { 743 | 744 | if (!exists_(pos)) { 745 | set_exists(pos, true); 746 | set_segment_key(pos, k); 747 | set_overflows(origin, true); 748 | keys_overflowed = true; 749 | ++elements; 750 | last_modified = pos; 751 | return create_segment_value(pos); 752 | } 753 | pos += hash_probe_incr(i); 754 | } 755 | 756 | if (overflow_elements < end()) { 757 | pos = overflow_elements++; 758 | 759 | if (!exists_(pos)) { 760 | set_overflows(origin, true); 761 | keys_overflowed = true; 762 | set_exists(pos, true); 763 | set_segment_key(pos, k); 764 | ++elements; 765 | 766 | last_modified = pos; 767 | return create_segment_value(pos); 768 | } 769 | }; 770 | 771 | return nullptr; 772 | } 773 | _V* unique_subscript(const _K& k) { 774 | 775 | /// eventualy an out of memory (bad_allocation) exception will occur 776 | size_type pos = map_key(k); 777 | _Bt si = get_segment_index(pos); 778 | _Segment &s = clusters[pos >> config.BITS_LOG2_SIZE];/// get_segment(pos) 779 | 780 | if (!s.is_exists(si)) { //!s.is_overflows(si) 781 | s.toggle_exists(si); 782 | set_segment_key(pos, k); 783 | ++elements; 784 | 785 | return create_segment_value(pos); 786 | } 787 | 788 | return unique_subscript_rest(k, pos); 789 | } 790 | 791 | _V* subscript_rest(const _K& k, size_type origin) { 792 | size_type pos = map_rand_key(k); 793 | for (unsigned int i = 0; i < probes && pos < get_extent(); ++i) { 794 | _Bt si = get_segment_index(pos); 795 | _Segment& s = get_segment(pos); 796 | if (!s.is_exists(si)) { 797 | s.toggle_exists(si); 798 | //s.key(si)=k; 799 | set_segment_key(pos, k); 800 | ++elements; 801 | set_overflows(origin, true); 802 | keys_overflowed = true; 803 | return create_segment_value(pos); 804 | } 805 | pos += hash_probe_incr(i); 806 | } 807 | 808 | size_type at_empty = end(); 809 | 810 | if (overflow_elements < end()) { 811 | if (!exists_(overflow_elements)) { 812 | at_empty = overflow_elements++; 813 | 814 | } 815 | } 816 | else if (removed) { 817 | size_type e = end(); 818 | for (pos = get_o_start(); pos < e; ) { 819 | if (!exists_(pos)) { 820 | 821 | at_empty = pos; break; 822 | } 823 | ++pos; 824 | } 825 | } 826 | 827 | pos = at_empty; 828 | if (pos != end()) { 829 | set_overflows(origin, true); 830 | keys_overflowed = true; 831 | set_exists(pos, true); 832 | set_segment_key(pos, k); 833 | size_type os = (overflow_elements - (get_extent() + initial_probes)); 834 | if (os == 1) { 835 | stats.start_elements = elements; 836 | //std::cout << "overflow start: hash table size " << elements << " elements in over flow:" << os << std::endl; 837 | } 838 | 839 | if (overflow_elements == end() && stats.start_elements) { 840 | stats.end_elements = elements; 841 | size_type saved = stats.end_elements - stats.start_elements - os; 842 | double percent_saved = (100.0*((double)saved / (double)elements)); 843 | 844 | // std::cout << "overflow end: hash table size " << elements << " elements in over flow:" << os << " saved : " << saved << 845 | // std::endl << " percent saved " << std::setprecision(4) << percent_saved << 846 | // std::endl; 847 | } 848 | 849 | ++elements; 850 | return create_segment_value(pos); 851 | 852 | } 853 | return nullptr; 854 | } 855 | _V* subscript(const _K& k) { 856 | size_type pos = map_key(k); 857 | _Bt si = get_segment_index(pos); 858 | _Segment& s = get_segment(pos); 859 | bool key_exists = s.is_exists(si); 860 | //key_overflows = s.is_overflows(si); 861 | if (!key_exists) { //!key_overflows && 862 | s.toggle_exists(si); 863 | set_segment_key(pos, k); 864 | ++elements; 865 | return create_segment_value(pos); 866 | } 867 | else if (key_exists && equal_key(pos, k)) { 868 | return &(get_segment_value(pos)); 869 | } 870 | bool key_overflows = s.is_overflows(si); 871 | size_type h = pos; 872 | if (key_overflows) { 873 | pos = find_rest(k, h); 874 | if (pos != end()) { 875 | return &(get_segment_value(pos)); 876 | } 877 | } 878 | return subscript_rest(k, h); 879 | } 880 | size_type erase_rest(const _K& k, size_type origin) 881 | RABBIT_NOINLINE_ /// this function must never be inlined 882 | { 883 | size_type pos = find_rest(k, origin); 884 | 885 | if (pos != (*this).end()) { 886 | set_exists(pos, false); 887 | ++removed; 888 | set_segment_key(pos, empty_key); 889 | destroy_segment_value(pos); 890 | --elements; 891 | if (pos >= get_o_start()) { 892 | size_type c = get_o_start(); 893 | for (; c < overflow_elements; ++c) { 894 | if (origin == map_key(get_segment_key(c))) { 895 | break; 896 | } 897 | } 898 | } 899 | return 1; 900 | } 901 | return 0; 902 | } 903 | size_type erase(const _K& k) { 904 | 905 | size_type pos = map_key(k); 906 | 907 | _Bt si = get_segment_index(pos); 908 | _Segment& s = get_segment(pos); 909 | if (s.is_exists(si) && equal_key(pos, k)) { ///get_segment(pos).exists == ALL_BITS_SET || 910 | set_segment_key(pos, empty_key); 911 | s.toggle_exists(si); 912 | destroy_segment_value(pos); 913 | --elements; 914 | ++removed; 915 | return 1; 916 | } 917 | if (!s.is_overflows(si)) { 918 | return 0; 919 | } 920 | else 921 | return erase_rest(k, pos); 922 | 923 | } 924 | /// not used (could be used where hash table must actually shrink too) 925 | bool is_small() const { 926 | return (get_extent() > (config.MIN_EXTENT << 3)) && (elements < get_extent() / 8); 927 | } 928 | 929 | size_type count(const _K& k) const { 930 | size_type pos = (*this).find(k); 931 | if (pos == (*this).end()) { 932 | return 0; 933 | } 934 | else return 1; 935 | } 936 | const _V& at(const _K& k) const { 937 | size_type pos = find(k); 938 | if (pos != (*this).end()) { 939 | return get_segment_value(pos); 940 | } 941 | throw std::exception(); 942 | } 943 | _V& at(const _K& k) { 944 | size_type pos = find(k); 945 | if (pos != (*this).end()) { 946 | return get_segment_value(pos); 947 | } 948 | throw std::exception(); 949 | } 950 | 951 | bool get(const _K& k, _V& v) const { 952 | size_type pos = find(k); 953 | if (pos != (*this).end()) { 954 | v = get_segment_value(pos); 955 | return true; 956 | } 957 | return false; 958 | } 959 | 960 | size_type find_rest(const _K& k, size_type origin) const { 961 | /// randomization step for attack mitigation 962 | size_type pos = map_rand_key(k); 963 | 964 | for (unsigned int i = 0; i < probes && pos < get_extent();) { 965 | _Bt si = get_segment_index(pos); 966 | if (segment_equal_key_exists(pos, k)) { 967 | return pos; 968 | } 969 | pos += hash_probe_incr(i); 970 | ++i; 971 | } 972 | 973 | for (pos = get_o_start(); pos < overflow_elements; ) { 974 | if (equal_key(pos, k) && exists_(pos)) return pos; 975 | ++pos; 976 | } 977 | 978 | return end(); 979 | } 980 | size_type find(const _K& k, size_type& pos) const { 981 | pos = map_key(k); 982 | bool is_empty = eq_f(empty_key, k); 983 | 984 | if (is_empty) { 985 | _Bt index = get_segment_index(pos); 986 | const _Segment& s = get_segment(pos); 987 | if (s.is_exists(index) && equal_key(pos, k)) { ///get_segment(pos).exists == ALL_BITS_SET || 988 | return pos; 989 | } 990 | if (!s.is_overflows(index)) { 991 | return end(); 992 | } 993 | } 994 | else { 995 | if (equal_key(pos, k)) return pos; 996 | 997 | } 998 | _Bt index = get_segment_index(pos); 999 | const _Segment& s = get_segment(pos); 1000 | if (!s.is_overflows(index)) { 1001 | return end(); 1002 | } 1003 | 1004 | return find_rest(k, pos); 1005 | } 1006 | size_type find(const _K& k) const { 1007 | 1008 | size_type pos; 1009 | return find(k, pos); 1010 | } 1011 | 1012 | size_type begin() const { 1013 | if (elements == 0) 1014 | return end(); 1015 | size_type pos = 0; 1016 | while (!exists_(pos)) { 1017 | ++pos; 1018 | 1019 | } 1020 | return pos; 1021 | } 1022 | size_type end() const { 1023 | return get_data_size(); 1024 | } 1025 | size_type size() const { 1026 | return elements; 1027 | } 1028 | typedef std::shared_ptr ptr; 1029 | }; /// hash_kernel 1030 | typedef std::shared_ptr _KernelPtr; 1031 | typedef std::vector<_KernelPtr> _Kernels; 1032 | _Kernels versions; 1033 | public: 1034 | 1035 | struct iterator { 1036 | typedef hash_kernel* kernel_ptr; 1037 | const basic_unordered_set* h; 1038 | size_type pos; 1039 | mutable char rdata[sizeof(_ElPair)]; 1040 | private: 1041 | _Bt index; 1042 | _Bt exists; 1043 | _Bt bsize; 1044 | const kernel_ptr get_kernel() const { 1045 | return h->pcurrent; 1046 | 1047 | } 1048 | kernel_ptr get_kernel() { 1049 | return h->pcurrent; 1050 | } 1051 | void set_index() { 1052 | if (h != nullptr && !is_end(*this)) {// 1053 | const _Segment& s = get_kernel()->get_segment(pos); 1054 | exists = s.exists; 1055 | index = get_kernel()->get_segment_index(pos); 1056 | bsize = get_kernel()->config.BITS_SIZE; 1057 | } 1058 | } 1059 | void check_index() { 1060 | 1061 | } 1062 | void increment() { 1063 | ++pos; 1064 | ++index; 1065 | if (index == bsize) { 1066 | set_index(); 1067 | } 1068 | 1069 | } 1070 | public: 1071 | iterator() : h(nullptr), pos(0) { 1072 | } 1073 | 1074 | iterator(const end_iterator&) : h(nullptr), pos(end_pos) { 1075 | } 1076 | iterator(const basic_unordered_set* h, size_type pos) : pos(pos) { 1077 | this->h = h; 1078 | set_index(); 1079 | } 1080 | 1081 | iterator(const iterator& r) { 1082 | (*this) = r; 1083 | } 1084 | 1085 | //~iterator() { 1086 | //} 1087 | 1088 | iterator& operator=(const iterator& r) { 1089 | pos = r.pos; 1090 | h = r.h; 1091 | set_index(); 1092 | 1093 | return (*this); 1094 | } 1095 | inline iterator& operator++() { 1096 | do { 1097 | increment(); 1098 | } while ((exists & (((_Bt)1) << index)) == (_Bt)0); 1099 | return (*this); 1100 | } 1101 | iterator operator++(int) { 1102 | iterator t = (*this); 1103 | ++(*this); 1104 | return t; 1105 | } 1106 | inline _V& get_value() { 1107 | return get_kernel()->get_segment_value((*this).pos); 1108 | } 1109 | inline const _V& get_value() const { 1110 | return get_kernel()->get_segment_value((*this).pos); 1111 | } 1112 | inline _K& get_key() { 1113 | return get_kernel()->get_segment_key((*this).pos); 1114 | } 1115 | inline const _K& get_key() const { 1116 | return get_kernel()->get_segment_key((*this).pos); 1117 | } 1118 | const _ElPair operator*() const { 1119 | return get_kernel()->get_segment_pair((*this).pos); 1120 | } 1121 | inline _ElPair operator*() { 1122 | return get_kernel()->get_segment_pair((*this).pos); 1123 | } 1124 | inline _ElPair* operator->() const { 1125 | /// can reconstruct multiple times on same memory because _ElPair is only references 1126 | _ElPair* ret = new ((void *)rdata) _ElPair(get_kernel()->get_segment_pair(pos)); 1127 | return ret; 1128 | } 1129 | inline const _ElPair *operator->() { 1130 | /// can reconstruct multiple times on same memory because _ElPair is only references 1131 | _ElPair* ret = new ((void *)rdata) _ElPair(get_kernel()->get_segment_pair(pos)); 1132 | return ret; 1133 | } 1134 | inline bool operator==(const iterator& r) const { 1135 | if (r.pos == end_pos) return is_end(); 1136 | return (pos == r.pos); 1137 | } 1138 | bool operator!=(const iterator& r) const { 1139 | if (r.pos == end_pos) return !is_end(); 1140 | return (pos != r.pos); 1141 | } 1142 | inline bool operator==(const end_iterator& r) const { 1143 | return is_end(); 1144 | } 1145 | bool operator!=(const end_iterator& r) const { 1146 | return !is_end(); 1147 | 1148 | } 1149 | bool is_end(const iterator& r) const { 1150 | if (h == nullptr) return pos == end_pos; 1151 | return r.pos >= get_kernel()->end(); 1152 | } 1153 | bool is_end() const { 1154 | return is_end(*this); 1155 | } 1156 | size_type get_pos() const { 1157 | return pos; 1158 | } 1159 | 1160 | }; 1161 | 1162 | struct const_iterator { 1163 | private: 1164 | typedef hash_kernel* kernel_ptr; 1165 | const basic_unordered_set* h; 1166 | //mutable kernel_ptr h; 1167 | _Bt index; 1168 | _Bt exists; 1169 | mutable char rdata[sizeof(_ElPair)]; 1170 | inline const kernel_ptr get_kernel() const { 1171 | 1172 | return h->pcurrent; // current.get(); 1173 | } 1174 | inline kernel_ptr get_kernel() { 1175 | 1176 | return const_cast(h)->pcurrent; // current.get(); 1177 | } 1178 | void set_index() { 1179 | if (get_kernel() != nullptr && !is_end(*this)) { /// 1180 | const _Segment& s = get_kernel()->get_segment(pos); 1181 | exists = s.exists; 1182 | index = get_kernel()->get_segment_index(pos); 1183 | } 1184 | } 1185 | void check_index() { 1186 | 1187 | } 1188 | void increment() { 1189 | ++pos; 1190 | ++index; 1191 | if (index == get_kernel()->config.BITS_SIZE) { 1192 | set_index(); 1193 | } 1194 | 1195 | } 1196 | public: 1197 | size_type pos; 1198 | 1199 | const_iterator() : h(nullptr){ 1200 | 1201 | } 1202 | const_iterator(const end_iterator&) : h(nullptr), pos(end_pos) { 1203 | } 1204 | //~const_iterator() { 1205 | 1206 | //} 1207 | const_iterator(const basic_unordered_set* h, size_type pos) : pos(pos) { 1208 | this->h = h; // ->current.get(); 1209 | set_index(); 1210 | } 1211 | const_iterator(const iterator& r) : h(nullptr){ 1212 | (*this) = r; 1213 | } 1214 | 1215 | const_iterator& operator=(const iterator& r) { 1216 | pos = r.pos; 1217 | h = r.h; 1218 | set_index(); 1219 | return (*this); 1220 | } 1221 | 1222 | const_iterator& operator=(const const_iterator& r) { 1223 | pos = r.pos; 1224 | h = r.h; 1225 | index = r.index; 1226 | return (*this); 1227 | } 1228 | 1229 | const_iterator& operator++() { 1230 | do { 1231 | increment(); 1232 | } while ((exists & (((_Bt)1) << index)) == (_Bt)0); 1233 | //increment(); 1234 | //while ((exists & (((_Bt)1) << index)) == (_Bt)0) { 1235 | // increment(); 1236 | //} 1237 | 1238 | 1239 | return (*this); 1240 | } 1241 | const_iterator operator++(int) { 1242 | return (*this); 1243 | } 1244 | const _ElPair operator*() const { 1245 | return get_kernel()->get_segment_pair(pos); 1246 | 1247 | } 1248 | const _ElPair *operator->() const { 1249 | /// can reconstruct multiple times on same memory because _ElPair is only references 1250 | _ElPair* ret = new ((void *)rdata) _ElPair(get_kernel()->get_segment_pair(pos)); 1251 | return ret; 1252 | } 1253 | 1254 | inline bool operator==(const const_iterator& r) const { 1255 | if (r.pos == end_pos) return is_end(); 1256 | return (pos == r.pos); 1257 | } 1258 | bool operator!=(const const_iterator& r) const { 1259 | if (r.pos == end_pos) return !is_end(); 1260 | return (pos != r.pos); 1261 | } 1262 | bool is_end(const const_iterator& r) const { 1263 | if (h == nullptr) return false; 1264 | return r.pos >= get_kernel()->end(); 1265 | } 1266 | bool is_end() const { 1267 | return is_end(*this); 1268 | } 1269 | size_type get_pos() const { 1270 | return pos; 1271 | } 1272 | 1273 | }; 1274 | 1275 | protected: 1276 | /// the default config for each hash instance 1277 | rabbit_config default_config; 1278 | key_compare key_c; 1279 | allocator_type alloc; 1280 | 1281 | void rehash() { 1282 | size_type to = current->key_mapper.next_size(); 1283 | rehash(to); 1284 | } 1285 | 1286 | void set_current(typename hash_kernel::ptr c) { 1287 | pcurrent = c.get(); 1288 | current = c; 1289 | } 1290 | 1291 | typename hash_kernel::ptr current; 1292 | hash_kernel* pcurrent; 1293 | inline void create_current() { 1294 | if (current == nullptr) 1295 | 1296 | set_current(std::allocate_shared(alloc, key_c, alloc)); 1297 | } 1298 | public: 1299 | float load_factor() const { 1300 | if (current == nullptr) return 0; 1301 | return current->load_factor(); 1302 | } 1303 | size_type bucket_count() const { 1304 | if (current == nullptr) return 0; 1305 | return current->bucket_count(); 1306 | } 1307 | size_type bucket_size(size_type n) const { 1308 | if (current == nullptr) return 0; 1309 | return current->bucket_size(n); 1310 | } 1311 | float max_load_factor() const { 1312 | if (current == nullptr) 1; 1313 | return current->max_load_factor(); 1314 | } 1315 | 1316 | void max_load_factor(float z) { 1317 | create_current(); 1318 | current->max_load_factor(z); 1319 | } 1320 | bool empty() const { 1321 | if (current == nullptr) return true; 1322 | return current->size() == 0; 1323 | } 1324 | void reserve(size_type atleast) { 1325 | create_current(); 1326 | rehash((size_type)((double)atleast*current->get_resize_factor())); 1327 | } 1328 | void resize(size_type atleast) { 1329 | create_current(); 1330 | rehash(current->key_mapper.nearest_larger(atleast)); 1331 | } 1332 | void rehash(size_type to_) { 1333 | create_current(); 1334 | rabbit_config config; 1335 | size_type to = std::max(to_, config.MIN_EXTENT); 1336 | /// can cause oom e because of recursive rehash'es 1337 | 1338 | typename hash_kernel::ptr rehashed = std::allocate_shared(alloc); 1339 | size_type extent = current->get_extent(); 1340 | size_type new_extent = to; 1341 | size_type nrand_probes = 0; 1342 | hash_kernel * reh = rehashed.get(); 1343 | hash_kernel * cur = current.get(); 1344 | try { 1345 | 1346 | rehashed->set_logarithmic(current->get_logarithmic()); 1347 | rehashed->resize_clear(new_extent); 1348 | rehashed->mf = (*this).current->mf; 1349 | //std::cout << " load factor " << current->load_factor() << std::endl; 1350 | if (current->load_factor() < 0.2) { 1351 | /// std::cout << "possible attack/bad hash detected : using random probes : " << current->get_probes() << std::endl; 1352 | nrand_probes = 1; 1353 | rehashed->set_rand_probes(nrand_probes); 1354 | } 1355 | using namespace std; 1356 | 1357 | 1358 | while (true) { 1359 | iterator e = end(); 1360 | size_type ctr = 0; 1361 | bool rerehashed = false; 1362 | 1363 | //_K k; 1364 | for (iterator i = begin(); i != e; ++i) { 1365 | //std::swap(k,(*i).first); 1366 | _RV v = rehashed->unique_subscript((*i).first); 1367 | if (v != nullptr) { 1368 | *v = i->second; 1369 | /// a cheap check to illuminate subtle bugs during development 1370 | if (++ctr != rehashed->elements) { 1371 | cout << "iterations " << ctr << " elements " << rehashed->elements << " extent " << rehashed->get_extent() << endl; 1372 | cout << "inside rehash " << rehashed->get_extent() << endl; 1373 | cout << "new " << rehashed->elements << " current size:" << current->elements << endl; 1374 | throw bad_alloc(); 1375 | } 1376 | 1377 | } 1378 | else { 1379 | //std::cout << "rehashing in rehash " << ctr << " of " << current->elements << std::endl; 1380 | rerehashed = true; 1381 | new_extent = rehashed->key_mapper.next_size(); 1382 | rehashed = std::allocate_shared(alloc); 1383 | rehashed->resize_clear(new_extent); 1384 | rehashed->mf = (*this).current->mf; 1385 | rehashed->set_rand_probes(nrand_probes); 1386 | // i = begin(); // start over 1387 | //ctr = 0; 1388 | break; 1389 | 1390 | } 1391 | } 1392 | if (rehashed->elements == current->elements) { 1393 | break; 1394 | } 1395 | else if (!rerehashed) { 1396 | cout << "hash error: unequal key count - retry rehash " << endl; 1397 | cout << "iterations " << ctr << " elements " << rehashed->elements << " extent " << rehashed->get_extent() << endl; 1398 | cout << "new " << rehashed->elements << " current size:" << current->elements << endl; 1399 | throw bad_alloc(); 1400 | } 1401 | else { 1402 | //cout << "re-rehashing iterations " << ctr << " elements " << rehashed->elements << " extent " << rehashed->get_extent() << endl; 1403 | //rehashed->resize_clear(rehashed->get_extent()); 1404 | //break; 1405 | } 1406 | 1407 | }/// for 1408 | 1409 | } 1410 | catch (std::bad_alloc &e) { 1411 | std::cout << "bad allocation: rehash failed in temp phase :" << new_extent << std::endl; 1412 | size_t t = 0; 1413 | std::cin >> t; 1414 | throw e; 1415 | } 1416 | set_current(rehashed); 1417 | 1418 | } 1419 | void clear() { 1420 | if (current != nullptr) 1421 | current->clear(); 1422 | current = nullptr; 1423 | ///set_current(std::allocate_shared(alloc)); 1424 | } 1425 | 1426 | void clear(const key_compare& compare, const allocator_type& allocator) { 1427 | set_current(std::allocate_shared(allocator, compare, allocator)); 1428 | } 1429 | 1430 | basic_unordered_set() :current(nullptr) { 1431 | // 1432 | } 1433 | 1434 | basic_unordered_set(const key_compare& compare, const allocator_type& allocator) : key_c(compare), alloc(allocator) { 1435 | 1436 | } 1437 | 1438 | basic_unordered_set(const basic_unordered_set& right) { 1439 | *this = right; 1440 | } 1441 | 1442 | ~basic_unordered_set() { 1443 | 1444 | } 1445 | 1446 | void swap(basic_unordered_set& with) { 1447 | typename hash_kernel::ptr t = with.current; 1448 | with.set_current(this->current); 1449 | this->set_current(t); 1450 | } 1451 | 1452 | void move(basic_unordered_set& from) { 1453 | (*this).current = from.current; 1454 | from.current = nullptr; 1455 | } 1456 | 1457 | basic_unordered_set& operator=(const basic_unordered_set& right) { 1458 | (*this).set_current(std::allocate_shared(alloc)); 1459 | (*this).reserve(right.size()); 1460 | const_iterator e = right.end(); 1461 | for (const_iterator c = right.begin(); c != e; ++c) { 1462 | (*this)[(*c).first] = (*c).second; 1463 | } 1464 | 1465 | return *this; 1466 | } 1467 | 1468 | hasher hash_function() const { 1469 | return (this->current->hf); 1470 | } 1471 | 1472 | key_equal key_eq() const { 1473 | if (current != nullptr) 1474 | return (this->current->eq_f); 1475 | return key_equal(); 1476 | } 1477 | iterator insert(const _K& k, const _V& v) { 1478 | create_current(); 1479 | (*this)[k] = v; 1480 | return iterator(this, current->last_modified); 1481 | } 1482 | 1483 | iterator insert(const std::pair<_K, _V>& p) { 1484 | 1485 | return iterator(this, insert(p.first, p.second)); 1486 | } 1487 | /// generic template copy 1488 | template 1489 | iterator insert(_Iter start, _Iter _afterLast) { 1490 | create_current(); 1491 | for (_Iter i = start; i != _afterLast; ++i) { 1492 | insert((*i).first, (*i).second); 1493 | } 1494 | return iterator(this, current->last_modified); 1495 | } 1496 | /// fast getter that doesnt use iterators and doesnt change the table without letting you know 1497 | bool get(const _K& k, _V& v) const { 1498 | if (current != nullptr) 1499 | return (*this).current->get(k, v); 1500 | return false; 1501 | } 1502 | /// throws a exception when value could not match the key 1503 | const _V& at(const _K& k) const { 1504 | if (current == nullptr) throw std::exception(); 1505 | return (*this).current->at(k); 1506 | } 1507 | _V& at(const _K& k) { 1508 | create_current(); 1509 | return (*this).current->at(k); 1510 | } 1511 | 1512 | bool error(const _K& k) { 1513 | _V *rv = current->subscript(k); 1514 | return rv == nullptr; 1515 | } 1516 | 1517 | _V& operator[](const _K& k) { 1518 | create_current(); 1519 | _V *rv = current->subscript(k); 1520 | while (rv == nullptr) { 1521 | this->rehash(); 1522 | rv = current->subscript(k); 1523 | } 1524 | return *rv; 1525 | } 1526 | size_type erase(const _K& k) { 1527 | if (current == nullptr) return size_type(); 1528 | //if(current->is_small()){ 1529 | // rehash(1); 1530 | //} 1531 | return current->erase(k); 1532 | } 1533 | size_type erase(iterator i) { 1534 | return erase((*i).first); 1535 | } 1536 | size_type erase(const_iterator i) { 1537 | return erase((*i).first); 1538 | } 1539 | size_type count(const _K& k) const { 1540 | if (current == nullptr)return size_type(); 1541 | return current->count(k); 1542 | } 1543 | iterator find(const _K& k) const { 1544 | if (current == nullptr) return iterator(this, size_type()); 1545 | 1546 | return iterator(this, current->find(k)); 1547 | } 1548 | iterator begin() const { 1549 | if (current == nullptr)return iterator(this, size_type()); 1550 | return iterator(this, current->begin()); 1551 | } 1552 | end_iterator end() const { 1553 | return end_iterator(); // iterator(end_pos); 1554 | } 1555 | const_iterator cbegin() const { 1556 | if (current == nullptr)return const_iterator(this, size_type()); 1557 | return const_iterator(this, current->begin()); 1558 | } 1559 | const_iterator cend() const { 1560 | return iterator(end_pos); 1561 | } 1562 | size_type size() const { 1563 | if (current == nullptr)return size_type(); 1564 | return current->size(); 1565 | } 1566 | void set_logarithmic(size_type logarithmic) { 1567 | create_current(); 1568 | this->current->set_logarithmic(logarithmic); 1569 | } 1570 | }; 1571 | 1572 | /// the unordered set 1573 | template > 1574 | class unordered_set : public basic_unordered_set<_K, char, _H> { 1575 | protected: 1576 | typedef basic_unordered_set<_K, char, _H> _Container; 1577 | public: 1578 | 1579 | 1580 | unordered_set() { 1581 | } 1582 | 1583 | ~unordered_set() { 1584 | } 1585 | 1586 | void insert(const _K& k) { 1587 | _Container::insert(k, '0'); 1588 | } 1589 | 1590 | }; /// unordered set 1591 | }; // rab-bit 1592 | 1593 | #endif /// _RABBIT_H_CEP_20150303_ 1594 | -------------------------------------------------------------------------------- /rabbit/unordered_map: -------------------------------------------------------------------------------- 1 | /// defines a map 2 | #pragma once 3 | #include 4 | //#include 5 | -------------------------------------------------------------------------------- /rabbit/unordered_set: -------------------------------------------------------------------------------- 1 | /// defines a set 2 | #pragma once 3 | #include 4 | -------------------------------------------------------------------------------- /rabbit_tests/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #ifdef _MSC_VER 13 | #endif 14 | #include 15 | #ifdef _HAS_GOOGLE_HASH_ 16 | #include 17 | #endif 18 | #include 19 | #include 20 | 21 | #define _HAS_STD_HASH_ 22 | #ifdef _HAS_GOOGLE_HASH_ 23 | #include 24 | #include 25 | #include 26 | #endif 27 | #include 28 | 29 | #ifdef _WIN32 30 | #define _CRT_SECURE_NO_WARNINGS 31 | #include 32 | #include 33 | double get_proc_mem_use(const double MB = 1024.0*1024.0) { 34 | 35 | 36 | PROCESS_MEMORY_COUNTERS memCounter; 37 | bool result = (GetProcessMemoryInfo(GetCurrentProcess(), 38 | &memCounter, 39 | sizeof(memCounter)) != 0); 40 | if (result) { 41 | return memCounter.WorkingSetSize / MB; 42 | } 43 | 44 | return 0.0; 45 | } 46 | 47 | #else 48 | #include 49 | double get_proc_mem_use(const double MB = 1024.0*1024.0) { 50 | return 0.0; 51 | } 52 | #endif 53 | namespace conversion { 54 | void to_t(long long inp, std::string& out) { 55 | #ifdef _MSC_VER 56 | out = std::to_string(inp); 57 | #else 58 | out = dynamic_cast< std::ostringstream & >((std::ostringstream() << std::dec << inp)).str(); 59 | #endif 60 | 61 | } 62 | void to_t(std::string inp, std::string& out) { 63 | out = inp; 64 | } 65 | void to_t(rabbit::int_string inp, rabbit::int_string& out) { 66 | out = inp; 67 | } 68 | template 69 | void to_t(_In in, std::string& out) { 70 | out = std::to_string(in); 71 | } 72 | template 73 | void to_t(_In in, rabbit::int_string& out) { 74 | out = std::to_string(in); 75 | } 76 | template 77 | void to_t(_In in, long long& out) { 78 | out = in; 79 | } 80 | template 81 | void to_t(_In in, long& out) { 82 | out = (long)in; 83 | } 84 | 85 | template 86 | void to_t(_In in, int& out) { 87 | out = (int)in; 88 | } 89 | template 90 | void to_t(_In in, unsigned long long& out) { 91 | out = (unsigned long long)in; 92 | } 93 | 94 | template 95 | void to_t(_In in, unsigned long& out) { 96 | out = (unsigned long)in; 97 | } 98 | 99 | void to_t(const std::string& in, unsigned long& out) { 100 | out = std::stoul(in); 101 | } 102 | 103 | void to_t(const std::string& in, long& out) { 104 | out = std::stol(in); 105 | } 106 | 107 | void to_t(const std::string& in, long long& out) { 108 | out = std::stoll(in); 109 | } 110 | 111 | void to_t(const std::string& in, unsigned long long& out) { 112 | out = std::stoull(in); 113 | } 114 | 115 | template 116 | void to_t(_In in, unsigned int& out) { 117 | out = (unsigned int)in; 118 | } 119 | 120 | template 121 | void to_t(_In in, double& out) { 122 | out = (double)in; 123 | } 124 | 125 | template 126 | void to_t(_In in, float& out) { 127 | out = (float)in; 128 | } 129 | }; 130 | static const int64_t SEED = 13; 131 | static std::mt19937_64 generator(SEED); 132 | 133 | 134 | template< class _T,class _V> 135 | class tester { 136 | public: 137 | typedef _V _ValueType; 138 | 139 | typedef _T _InputField; 140 | 141 | typedef std::vector<_InputField> _Script; 142 | #ifdef _MSC_VER 143 | unsigned long long get_max(long) const { 144 | return 1l << 31l; 145 | } 146 | unsigned long long get_max(long long) const { 147 | return 1ll << 62ll; 148 | } 149 | unsigned long long get_max(unsigned long) const { 150 | return ~((unsigned long)0); 151 | } 152 | unsigned long long get_max(unsigned long long) const { 153 | return ~((unsigned long long)0); 154 | } 155 | void gen_random(size_t count, _Script& script) { 156 | double start = get_proc_mem_use(); 157 | //std::minstd_rand rd; 158 | //std::linear_congruential_engine gen(6); 159 | std::mt19937 gen(6); 160 | 161 | std::uniform_int_distribution dis(0, get_max(long long())); 162 | /// script creation is not benched 163 | _InputField v; 164 | for (size_t r = 0; r < count; ++r) { 165 | conversion::to_t(dis(gen), v);//dis(gen) 166 | script.push_back(v); 167 | } 168 | printf("memory used by script: %.4g MB\n", get_proc_mem_use() - start); 169 | } 170 | #else 171 | void gen_random(size_t count, _Script& script) { 172 | double start = get_proc_mem_use(); 173 | //std::minstd_rand rd; 174 | std::mt19937 gen(6); 175 | 176 | std::uniform_int_distribution dis(std::numeric_limits::min(), std::numeric_limits::max()); 177 | /// script creation is not benched 178 | _InputField v; 179 | for (size_t r = 0; r < count; ++r) { 180 | conversion::to_t(dis(gen), v);//dis(gen) 181 | script.push_back(v); 182 | } 183 | printf("memory used by script: %.4g MB\n", get_proc_mem_use() - start); 184 | } 185 | #endif 186 | void gen_random_shuffle(size_t count, _Script& script) { 187 | double start = get_proc_mem_use(); 188 | /// script creation is not benched 189 | _InputField v; 190 | for (size_t r = 0; r < count; ++r) { 191 | conversion::to_t(r, v); 192 | script.push_back(v); 193 | } 194 | std::shuffle(script.begin(), script.end(), generator); 195 | printf("memory used by script: %.4g MB\n", get_proc_mem_use() - start); 196 | } 197 | /// shuffled list of integers with max(rand())-min(rand()) or ~16-bit width 198 | void gen_random_shuffle_16(size_t count, _Script& script) { 199 | double start = get_proc_mem_use(); 200 | //std::mt19937 gen(6); 201 | //std::uniform_int_distribution<_ValueType> dis(0, 1<<15); 202 | 203 | /// script creation is not benched 204 | _InputField v; 205 | for (size_t r = 0; r < count; ++r) { 206 | conversion::to_t(r, v); 207 | script.push_back(v); 208 | } 209 | std::random_shuffle(script.begin(), script.end()); 210 | printf("memory used by script: %.4g MB\n", get_proc_mem_use() - start); 211 | } 212 | void gen_seq(size_t count, _Script& script) { 213 | double start = get_proc_mem_use(); 214 | /// script creation is not benched 215 | _InputField v; 216 | for (size_t r = 0; r < count; ++r) { 217 | conversion::to_t(r, v); 218 | script.push_back(v); 219 | } 220 | printf("memory used by script: %.4g MB\n", get_proc_mem_use() - start); 221 | } 222 | template 223 | long empty_test(_MapT &h) { 224 | long errors = 0; 225 | if(h.size()!=0){ 226 | printf("ERROR: Empty: failed size\n"); 227 | ++errors; 228 | } 229 | if(!h.empty()){ 230 | printf("ERROR: Empty: failed empty\n"); 231 | ++errors; 232 | } 233 | if(h.begin() != h.end()) { 234 | printf("ERROR: Empty: failed begin end\n"); 235 | ++errors; 236 | }; 237 | if(h.find(_T()) != h.end()) { 238 | printf("ERROR: Empty: failed find end\n"); 239 | ++errors; 240 | }; 241 | if(h.cbegin() != h.cend()) { 242 | printf("ERROR: Empty: failed begin end\n"); 243 | ++errors; 244 | }; 245 | if(h.bucket_count() != 0) { 246 | printf("ERROR: Empty: failed bucket count\n"); 247 | ++errors; 248 | }; 249 | if(h.load_factor() != 0) { 250 | printf("ERROR: Empty: failed load factor\n"); 251 | ++errors; 252 | }; 253 | if(h.max_load_factor() != 1) { 254 | printf("ERROR: Empty: failed max load factor\n"); 255 | ++errors; 256 | }; 257 | return errors; 258 | } 259 | template 260 | long not_empty_test(_MapT &h) { 261 | long errors = 0; 262 | if(h.size()==0) { 263 | printf("ERROR: Not Empty: failed size\n"); 264 | ++errors; 265 | }; 266 | if(h.empty()) { 267 | printf("ERROR: Not Empty: failed empty\n"); 268 | ++errors; 269 | }; 270 | if(h.begin() == h.end()) { 271 | printf("ERROR: Not Empty: begin end\n"); 272 | ++errors; 273 | }; 274 | if(h.find(h.begin()->first) == h.end()) { 275 | printf("ERROR: Empty: failed find end\n"); 276 | ++errors; 277 | }; 278 | if(h.cbegin() == h.cend()) { 279 | printf("ERROR: Not Empty: failed cbegin cend\n"); 280 | ++errors; 281 | }; 282 | if(h.bucket_count() == 0) { 283 | printf("ERROR: Not Empty: failed bucket count\n"); 284 | ++errors; 285 | }; 286 | if(h.load_factor() == 0) { 287 | printf("ERROR: Not Empty: failed load factor\n"); 288 | ++errors; 289 | }; 290 | if(h.max_load_factor() != 1) { 291 | printf("ERROR: Not Empty: failed max load factor\n"); 292 | ++errors; 293 | };; 294 | return errors; 295 | } 296 | template 297 | void erase_test(_MapT &h, const _Script& script) { 298 | double mem_start = get_proc_mem_use(); 299 | std::chrono::steady_clock::time_point start_erase = std::chrono::steady_clock::now(); 300 | size_t count = script.size(); 301 | size_t s = count / 10; 302 | size_t hs = script.size(); 303 | long errors = empty_test(h); 304 | long erases = 0; 305 | if(errors){ 306 | printf("ERROR: empty test failed\n"); 307 | } 308 | _ValueType value; 309 | for (size_t k = 0; k < count ; ++k) { 310 | conversion::to_t(k + 1, value); 311 | h[script[k]] = value; 312 | } 313 | for (size_t k = 0; k < count / 2; ++k) { 314 | conversion::to_t(k + 1, value); 315 | auto f = h.find(script[k]); 316 | if (f != h.end() && f->second == value) { 317 | if (!h.erase(script[k])) { 318 | printf("ERROR: could not erase %ld\n", (long int)k); 319 | ++errors; 320 | }; 321 | ++erases; 322 | if (h.count(script[k]) != 0) { 323 | printf("ERROR: erase not reported %ld\n", (long int)k); 324 | } 325 | if (k % s == 0) { 326 | std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); 327 | 328 | printf("%ld: %ld in hash, test total %.4g secs\n", (long)k, (long)h.size(), (double)(std::chrono::duration_cast(end - start_erase).count()) / (1000000.0)); 329 | } 330 | } 331 | } 332 | if(erases == 0){ 333 | printf("ERROR: erases are 0 %ld\n", (long int)erases); 334 | ++errors; 335 | } 336 | for (size_t k = 0; k < count / 2; ++k) { 337 | if (h.count(script[k]) != 0) { 338 | printf("ERROR: could find %ld\n", (long int)k); 339 | ++errors; 340 | }; 341 | } 342 | for (size_t k = count / 2; k < count; ++k) { 343 | if (h.count(script[k]) == 0) { 344 | printf("ERROR: could not find %ld\n", (long int)k); 345 | ++errors; 346 | }; 347 | } 348 | unsigned long const_ctr = 0; 349 | for(typename _MapT::const_iterator c = h.begin(); c!=h.end(); ++c){ 350 | ++const_ctr; 351 | } 352 | errors += not_empty_test(h); 353 | if(const_ctr != h.size()){ 354 | printf("ERROR: const iterator not counting %ld\n", (long int)const_ctr); 355 | ++errors; 356 | } 357 | for (size_t k = 0; k < count / 2; ++k) { 358 | conversion::to_t(k + 1, value); 359 | h[script[k]] = value; 360 | } 361 | if (h.size() != hs) { 362 | ++errors; 363 | printf("ERROR: container invalid size %ld != %ld\n", (long int)h.size(), (long int)hs); 364 | } 365 | for(typename _MapT::iterator c = h.begin(); c!=h.end(); ++c){ 366 | h.erase(c); 367 | } 368 | printf("INFO: container erased size %ld \n", (long int)h.size()); 369 | errors += empty_test(h); 370 | std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); 371 | 372 | printf("erase test total %.4g secs. mem used %.4g MB : %ld errors\n", (double)(std::chrono::duration_cast(end - start_erase).count()) / (1000000.0), get_proc_mem_use() - mem_start, errors); 373 | 374 | 375 | } 376 | template 377 | void bench_hash(_MapT& h, const _Script& script) { 378 | /// create a list of random numbers and add to test script 379 | double mem_start = get_proc_mem_use(); 380 | std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now(); 381 | typename _MapT::size_type count = (typename _MapT::size_type)script.size(); 382 | 383 | typename _MapT::size_type s = count / 10; 384 | 385 | for (size_t j = 0; j < count; ++j) { 386 | h[script[j]] = j; 387 | if (j % s == 0) { 388 | std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); 389 | 390 | printf("%ld: %ld in hash, bench total %.4g secs\n", (long)j, (long)h.size(), (double)(std::chrono::duration_cast(end - start).count()) / (1000000.0)); 391 | } 392 | } 393 | std::chrono::steady_clock::time_point start_read = std::chrono::steady_clock::now(); 394 | /// check what is 395 | for (size_t k = 0; k < count; ++k) { 396 | if (h.count(script[k]) == 0) { 397 | printf("ERROR: could not find %ld\n", (long int)k); 398 | }; 399 | auto f = h.find(script[k]); 400 | 401 | if (f == h.end() || (*f).second != k) { 402 | if (f == h.end()) { 403 | printf("ERROR: counted data does not exist %ld\n", (long int)k); 404 | } 405 | else { 406 | auto n2v = script[k]; 407 | f = h.find(script[n2v]); 408 | if (f == h.end() || (*f).second != n2v) { 409 | printf("ERROR: could not iterator find %ld\n", (long int)k); 410 | } 411 | } 412 | } 413 | } 414 | 415 | std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); 416 | 417 | printf("time total %.4g secs read %.4g secs. mem used %.4g MB\n", (double)(std::chrono::duration_cast(end - start).count()) / (1000000.0), (double)(std::chrono::duration_cast(end - start_read).count()) / (1000000.0), get_proc_mem_use() - mem_start); 418 | 419 | 420 | 421 | } 422 | template 423 | void bench_hash_simple(_MapT& h, const _Script& script) { 424 | /// create a list of random numbers and add to test script 425 | double mem_start = get_proc_mem_use(); 426 | std::chrono::steady_clock::time_point start = std::chrono::steady_clock::now(); 427 | size_t count = script.size(); 428 | //h.rehash(count); 429 | size_t s = count / 10; 430 | _V value; 431 | for (size_t j = 0; j < count; ++j) { 432 | 433 | h[script[j]] = script[j]; 434 | if (j % s == 0) { 435 | std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); 436 | 437 | printf("%ld: %ld in hash, bench total %.4g secs, %.4g MB\n",(long)j,(long)h.size(),(double)(std::chrono::duration_cast(end - start).count())/(1000000.0),get_proc_mem_use()-mem_start); 438 | } 439 | } 440 | 441 | printf("%ld in hash, writes total %.4g millis, %.4g MB\n", (long)h.size(), (double)(std::chrono::duration_cast(std::chrono::steady_clock::now() - start).count()) / (1000.0), get_proc_mem_use() - mem_start); 442 | //int t; 443 | //std::cin >> t; 444 | std::chrono::steady_clock::time_point start_read = std::chrono::steady_clock::now(); 445 | /// check what is 446 | for (size_t k = 0; k < count; ++k) { 447 | auto f = h.find(script[k]); 448 | if (f == h.end() || f->second != script[k]) { 449 | printf("ERROR: could not find %ld\n", (long int)k); 450 | } 451 | } 452 | 453 | std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now(); 454 | 455 | printf("time total %.4g millis read %.4g millis. mem used %.4g MB\n", (double)(std::chrono::duration_cast(end - start).count()) / (1000.0), (double)(std::chrono::duration_cast(end - start_read).count()) / (1000.0), get_proc_mem_use() - mem_start); 456 | 457 | } 458 | }; 459 | template 460 | void test_dense_hash(typename tester<_T,_V>::_Script& script, size_t ts) { 461 | #ifdef _HAS_GOOGLE_HASH_ 462 | printf("google dense hash test\n"); 463 | typedef ::google::dense_hash_map<_T, typename tester<_T,_V>::_ValueType > _Map; // 464 | _Map h; 465 | _T c, c1; 466 | conversion::to_t(-1l, c); 467 | conversion::to_t(-2l, c1); 468 | h.set_deleted_key(c); 469 | h.set_empty_key(c1); 470 | tester<_T,_V> t; 471 | t.bench_hash_simple(h, script); 472 | #endif 473 | } 474 | 475 | template 476 | void test_sparse_hash(typename tester<_T,_V>::_Script& script, size_t ts) { 477 | #ifdef _HAS_GOOGLE_HASH_ 478 | printf("google sparse hash test\n"); 479 | typedef ::google::sparse_hash_map<_T, typename tester<_T,_V>::_ValueType> _Map; 480 | _Map h; 481 | tester<_T,_V> t; 482 | t.bench_hash_simple(h, script); 483 | #endif 484 | } 485 | 486 | template 487 | void test_rabbit_hash(typename tester<_T,_V>::_Script& script, size_t ts) { 488 | printf("rabbit hash test\n"); 489 | typedef rabbit::unordered_map<_T, typename tester<_T,_V>::_ValueType> _Map; 490 | _Map h; 491 | h.set_min_load_factor(0.25); 492 | tester<_T,_V> t; 493 | t.bench_hash_simple(h, script); 494 | 495 | } 496 | template 497 | void test_rabbit_sparse_hash(typename tester<_T,_V>::_Script& script, size_t ts) { 498 | printf("rabbit sparse hash test\n"); 499 | typedef rabbit::sparse_unordered_map<_T, typename tester<_T,_V>::_ValueType> _Map; 500 | _Map h; 501 | tester<_T,_V> t; 502 | t.bench_hash_simple(h, script); 503 | 504 | } 505 | template 506 | void test_rabbit_hash_unit(size_t ts) { 507 | printf("rabbit hash erase test\n"); 508 | typedef rabbit::unordered_map::_ValueType> _Map; 509 | _Map h; 510 | typename tester::_Script script; 511 | tester t; 512 | t.gen_random_shuffle(ts, script); 513 | //t.bench_hash(h, script); 514 | t.erase_test(h, script); 515 | 516 | } 517 | 518 | template 519 | void test_std_hash(typename tester::_Script& script, size_t ts) { 520 | #ifdef _HAS_STD_HASH_ 521 | printf("std hash test\n"); 522 | typedef std::unordered_map::_ValueType> _Map; 523 | _Map h; 524 | 525 | tester t; 526 | 527 | t.bench_hash_simple(h, script); 528 | #endif 529 | } 530 | 531 | extern int google_times(int iters); 532 | //extern int unique_running_insertion(); 533 | //extern int unique_scattered_lookup(); 534 | void more_tests() { 535 | //unique_scattered_lookup(); 536 | //unique_running_insertion(); 537 | } 538 | struct test_data{ 539 | enum{ 540 | SEQUENTIAL, 541 | NARROWEST, 542 | NARROW, 543 | WIDE 544 | }; 545 | test_data(int val) : val(val){ 546 | } 547 | test_data(const test_data& right) :val(right.val){ 548 | } 549 | test_data& operator=(const test_data& right){ 550 | val = right.val; 551 | return *this; 552 | } 553 | bool operator==(const test_data& right) const { 554 | return val == right.val; 555 | } 556 | bool operator==(const int& right) const { 557 | return val == right; 558 | } 559 | int val; 560 | }; 561 | 562 | struct test_type{ 563 | bool rabbit; 564 | bool rabbit_sparse; 565 | bool rabbit_unit; 566 | bool dense; 567 | bool sparse; 568 | bool std_container; 569 | bool google_tests; 570 | }; 571 | void test_random_int(test_data data, test_type test, size_t ts) { 572 | 573 | //typedef rabbit::int_string _K; 574 | //typedef rabbit::int_string _V; 575 | 576 | typedef unsigned long long _K; 577 | typedef unsigned long long _V; 578 | 579 | //typedef std::string _K; 580 | //typedef std::string _V; 581 | 582 | tester<_K,_V>::_Script script; 583 | tester<_K,_V> t; 584 | if(data == test_data::WIDE){ 585 | t.gen_random(ts, script); 586 | }else if(data == test_data::NARROW){ 587 | t.gen_random_shuffle(ts, script); 588 | }else if(data == test_data::NARROWEST){ 589 | t.gen_random_shuffle_16(ts, script); 590 | }else if(data == test_data::SEQUENTIAL){ 591 | t.gen_seq(ts, script); 592 | } 593 | 594 | if(test.dense) 595 | test_dense_hash<_K,_V>(script, ts); 596 | 597 | if(test.rabbit) 598 | test_rabbit_hash<_K,_V>(script, ts); 599 | if(test.rabbit_sparse) 600 | test_rabbit_sparse_hash<_K,_V>(script, ts); 601 | if(test.rabbit_unit) 602 | test_rabbit_hash_unit<_K,_V>(ts/10); 603 | if(test.sparse) 604 | test_sparse_hash<_K,_V>(script, ts); 605 | if(test.std_container) 606 | test_std_hash<_K,_V>(script, ts); 607 | #ifdef _HAS_GOOGLE_HASH_ 608 | if(test.google_tests) 609 | google_times((int)ts); 610 | #endif 611 | } 612 | int main(int argc, char **argv) 613 | { 614 | 615 | size_t ts = 10000000; 616 | test_type test; 617 | test.dense = true; 618 | test.rabbit = true; 619 | test.rabbit_unit = false; 620 | test.rabbit_sparse = false; 621 | test.sparse = false; 622 | test.std_container = true; 623 | test.google_tests = true; 624 | test_random_int(test_data::NARROW,test,ts); 625 | 626 | //more_tests(); 627 | return 0; 628 | } 629 | -------------------------------------------------------------------------------- /rabbit_tests/rabbit_tests.cbp: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 61 | 62 | -------------------------------------------------------------------------------- /rabbit_tests/rabbit_tests.depend: -------------------------------------------------------------------------------- 1 | # depslib dependency file v1.0 2 | 1485238655 source:c:\dev\cpp_all\rabbit\rabbit_tests\main.cpp 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 1430471197 c:\dev\cpp_all\repo\sparsehash-2.0.2\src\windows\sparsehash\internal\sparseconfig.h 24 | 25 | 1476541189 c:\dev\cpp_all\rabbit\rabbit\unordered_map 26 | 27 | 28 | 1476539852 c:\dev\cpp_all\rabbit\rabbit\rabbit.h 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 1329936582 c:\dev\cpp_all\repo\sparsehash-2.0.2\src\sparsehash\type_traits.h 42 | 43 | 44 | 45 | 46 | 1329936582 c:\dev\cpp_all\repo\sparsehash-2.0.2\src\sparsehash\template_util.h 47 | 48 | 49 | 1329936582 c:\dev\cpp_all\repo\sparsehash-2.0.2\src\sparsehash\dense_hash_map 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 1329936582 c:\dev\cpp_all\repo\sparsehash-2.0.2\src\sparsehash\internal\densehashtable.h 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 1329936582 c:\dev\cpp_all\repo\sparsehash-2.0.2\src\sparsehash\internal\hashtable-common.h 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 1329936582 c:\dev\cpp_all\repo\sparsehash-2.0.2\src\sparsehash\internal\libc_allocator_with_realloc.h 81 | 82 | 83 | 84 | 85 | 86 | 1329936582 c:\dev\cpp_all\repo\sparsehash-2.0.2\src\sparsehash\sparse_hash_map 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 1329936582 c:\dev\cpp_all\repo\sparsehash-2.0.2\src\sparsehash\internal\sparsehashtable.h 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 1329936582 c:\dev\cpp_all\repo\sparsehash-2.0.2\src\sparsehash\sparsetable 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 1480258689 source:c:\dev\cpp_all\rabbit\time_hash_maps.cpp 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 1428698982 c:\dev\cpp_all\repo\sparsehash-2.0.2\src\windows\config.h 149 | 150 | "windows/port.h" 151 | 152 | 1329936582 c:\dev\cpp_all\repo\sparsehash-2.0.2\src\windows\port.h 153 | 154 | "config.h" 155 | 156 | 157 | 158 | 159 | 1430471197 f:\repo\sparsehash-2.0.2\src\windows\sparsehash\internal\sparseconfig.h 160 | 161 | 1329936582 f:\repo\sparsehash-2.0.2\src\sparsehash\type_traits.h 162 | 163 | 164 | 165 | 166 | 1329936582 f:\repo\sparsehash-2.0.2\src\sparsehash\template_util.h 167 | 168 | 169 | 1329936582 f:\repo\sparsehash-2.0.2\src\sparsehash\dense_hash_map 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 1329936582 f:\repo\sparsehash-2.0.2\src\sparsehash\internal\densehashtable.h 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 1329936582 f:\repo\sparsehash-2.0.2\src\sparsehash\internal\hashtable-common.h 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 1329936582 f:\repo\sparsehash-2.0.2\src\sparsehash\internal\libc_allocator_with_realloc.h 201 | 202 | 203 | 204 | 205 | 206 | 1329936582 f:\repo\sparsehash-2.0.2\src\sparsehash\sparse_hash_map 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 1329936582 f:\repo\sparsehash-2.0.2\src\sparsehash\internal\sparsehashtable.h 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 1329936582 f:\repo\sparsehash-2.0.2\src\sparsehash\sparsetable 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 1428698982 f:\repo\sparsehash-2.0.2\src\windows\config.h 245 | 246 | "windows/port.h" 247 | 248 | 1329936582 f:\repo\sparsehash-2.0.2\src\windows\port.h 249 | 250 | "config.h" 251 | 252 | 253 | 254 | 255 | 1430471197 c:\repo\sparsehash-2.0.2\src\windows\sparsehash\internal\sparseconfig.h 256 | 257 | 1428698982 c:\repo\sparsehash-2.0.2\src\windows\config.h 258 | 259 | "windows/port.h" 260 | 261 | 1329936582 c:\repo\sparsehash-2.0.2\src\windows\port.h 262 | 263 | "config.h" 264 | 265 | 266 | 267 | 268 | 1329936582 c:\repo\sparsehash-2.0.2\src\sparsehash\type_traits.h 269 | 270 | 271 | 272 | 273 | 1329936582 c:\repo\sparsehash-2.0.2\src\sparsehash\template_util.h 274 | 275 | 276 | 1329936582 c:\repo\sparsehash-2.0.2\src\sparsehash\dense_hash_map 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 1329936582 c:\repo\sparsehash-2.0.2\src\sparsehash\internal\densehashtable.h 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 1329936582 c:\repo\sparsehash-2.0.2\src\sparsehash\internal\hashtable-common.h 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 1329936582 c:\repo\sparsehash-2.0.2\src\sparsehash\internal\libc_allocator_with_realloc.h 308 | 309 | 310 | 311 | 312 | 313 | 1329936582 c:\repo\sparsehash-2.0.2\src\sparsehash\sparse_hash_map 314 | 315 |