├── .codelite
├── compilation.db
├── cppcheck.list
├── refactoring.db
├── sftp-workspace-settings.conf
├── subversion.conf
├── test_hash.session
├── test_hash.tags
├── test_hash.workspace.christiaan
├── test_hash.workspace.pretoric
└── tweaks.conf
├── COPYING
├── Makefile
├── README.txt
├── rabbit.mk
├── rabbit.txt
├── rabbit
├── int_string.h
├── rabbit_map.h
├── rabbit_set.h
├── unordered_map
└── unordered_set
├── rabbit_tests
├── main.cpp
├── rabbit_tests.cbp
├── rabbit_tests.depend
├── rabbit_tests.layout
└── time_hash_maps.cpp
├── test_hash.mk
├── test_hash.project
├── test_hash.txt
├── test_hash.workspace
└── vs
├── vs.sln
├── vs.v11.suo
└── vs
├── ReadMe.txt
├── vs.vcxproj
└── vs.vcxproj.filters
/.codelite/compilation.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tjizep/rabbit/e11c23f3b2de1bf3a387cefedccfa5ec1686a83f/.codelite/compilation.db
--------------------------------------------------------------------------------
/.codelite/cppcheck.list:
--------------------------------------------------------------------------------
1 | c:\dev\test_hash\main.cpp
2 |
--------------------------------------------------------------------------------
/.codelite/refactoring.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tjizep/rabbit/e11c23f3b2de1bf3a387cefedccfa5ec1686a83f/.codelite/refactoring.db
--------------------------------------------------------------------------------
/.codelite/sftp-workspace-settings.conf:
--------------------------------------------------------------------------------
1 | {
2 | }
--------------------------------------------------------------------------------
/.codelite/subversion.conf:
--------------------------------------------------------------------------------
1 | {
2 | "svn-settings": {
3 | "m_repoPath": "C:\\dev\\cpp_all\\rabbit"
4 | }
5 | }
--------------------------------------------------------------------------------
/.codelite/test_hash.session:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/.codelite/test_hash.tags:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tjizep/rabbit/e11c23f3b2de1bf3a387cefedccfa5ec1686a83f/.codelite/test_hash.tags
--------------------------------------------------------------------------------
/.codelite/test_hash.workspace.christiaan:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.codelite/test_hash.workspace.pretoric:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.codelite/tweaks.conf:
--------------------------------------------------------------------------------
1 | {
2 | }
--------------------------------------------------------------------------------
/COPYING:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2015 Christiaan Pretorius
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
1 | .PHONY: clean All
2 |
3 | All:
4 | @echo "----------Building project:[ rabbit - Release ]----------"
5 | @$(MAKE) -f "rabbit.mk"
6 | clean:
7 | @echo "----------Cleaning project:[ rabbit - Release ]----------"
8 | @$(MAKE) -f "rabbit.mk" clean
9 |
--------------------------------------------------------------------------------
/README.txt:
--------------------------------------------------------------------------------
1 | # rabbit v 1.2 r2
2 | stl compatible hashtable (rabbit::unordered_map or rabbit::sparse_unordered_map)
3 |
4 | Using:
5 | ------------------------------------------------------------------
6 |
7 | #include
8 |
9 | void rabbits(){
10 | rabbit::unordered_map int_map;
11 | int_map.insert(0,1);
12 | if(int_map[0] == 1){
13 | /// xyz
14 | }
15 | ...
16 | rabbit::sparse_unordered_map sparse_int_map;
17 | int_map.insert(0,1);
18 | if(int_map[0] == 1){
19 | /// xyz
20 | }
21 | }
22 |
23 | Advantages:
24 | -----------
25 |
26 | 1. Very Fast and sometimes small or just Fast and Very Small when set_min_load_factor(> 0.7),set_logarithmic(>=4)
27 | you can also use rabbit::sparse_unordered_map to get the same effect
28 | 2. Strong guarantees for hash table size in sparse mode
29 | i.e. Sparse version of hash table is close to the size of google sparse hash
30 | even though it has a step shaped memory use curve
31 | 3. Std api compatible with stl
32 | 4. sparseness can be dialled in dynamically when need arises - only effective after rehash (use set_logarithmic(1..32))
33 |
34 | Disadvantages
35 | -------------
36 |
37 | If a rehash takes place during iteration (because of inserts during iteration) the iterator becomes
38 | invalid. It wont crash but it might skip previously added elements.
39 | Best is to rehash to aproximate future size before starting iteration which will cause
40 | inserts. Erases and updates are stable.
41 |
42 | Algorithm Description
43 | ---------------------
44 |
45 | rabbit is both a closed *and* openly addressed hash table.
46 |
47 | Open addressing part
48 | --------------------
49 |
50 | Keys are located via a truncated linear probe of constant length in case of the dense version.
51 | The linear probe is logarithmically related to the hash size when the sparse flag is set
52 | with the set_logarithmic(>=1) function.
53 |
54 | Rabbit maintains each key associated with two bits seperately.
55 | The first bit is for a keys existence and a second bit is for a collision indicator.
56 | The collision indicator removes the need to search for non existing keys which is a
57 | problem in the standard linear probing algorithm.
58 |
59 | The bits, key value pairs are each stored in separate arrays to provide better CPU cache
60 | behaviour. For instance the existence bits will stay in cache longer so that memory access
61 | to these structures are reduced.
62 |
63 | Closed addressing
64 | -----------------
65 |
66 | At the end of the key array rabbit also maintains a single bucket. If any key is inserted and
67 | a open slot is not found within the current probe length it is added here. This bucket is a
68 | accessed like a stack although removing items in the middle will not reduce its height.
69 | items are added at the back.
70 |
71 | In the semi dense variation of the algoritm the size of this bucket is maintained at a constant
72 | factor. In the sparse version the single bucket size is a logarithmically increasing number.
73 |
74 | Once the single bucket is full a rehash is performed on a new table with twice as many keys.
75 | In case of the sparse table a load factor of ~0.75 is maintained.
76 |
77 | Safety
78 | ------
79 |
80 | A randomization protocol is activated when the table rehashes and a minimum load factor is not
81 | reached.
82 |
83 | Changes to algorithm for 1.2 (Minimum load factor)
84 | --------------------------------------------------
85 |
86 | The probe length is not constant anymore but changes (increases) when keys are added to the bucket
87 | while the minimum load factor value is not reached. A set_min_load_factor(x e (0,1])) function is added.
88 | This can be used to optimize for speed or memory use.
89 | The min. load factor is defaulted to 0.5.
90 | This results in much less variation in memory use while not affecting performance.
91 | Values around 0.25 give high read performance and slightly slower growth while using more memory,
92 | around 0.7 will give much better growth and slightly slower random read while using about half
93 | the memory.
94 |
95 | Experimentation also revealed that the hash table is much less sensitive to bad hash functions after this
96 | change.
97 |
98 | Note
99 | ----
100 |
101 | The previous version stored keys and values separately which reduced memory use when
102 | sizeof(key)+sizeof(value) < sizeof(std::pair). This behaviour isn't
103 | available anymore as the penalty for random read access is usually too high.
104 |
--------------------------------------------------------------------------------
/rabbit.mk:
--------------------------------------------------------------------------------
1 | ##
2 | ## Auto Generated makefile by CodeLite IDE
3 | ## any manual changes will be erased
4 | ##
5 | ## Release
6 | ProjectName :=rabbit
7 | ConfigurationName :=Release
8 | WorkspacePath := "C:\dev\cpp_all\rabbit"
9 | ProjectPath := "C:\dev\cpp_all\rabbit"
10 | IntermediateDirectory :=./Release
11 | OutDir := $(IntermediateDirectory)
12 | CurrentFileName :=
13 | CurrentFilePath :=
14 | CurrentFileFullPath :=
15 | User :=christiaan
16 | Date :=06/19/15
17 | CodeLitePath :="C:\Program Files\CodeLite"
18 | LinkerName :=C:/TDM-GCC-64/bin/g++.exe
19 | SharedObjectLinkerName :=C:/TDM-GCC-64/bin/g++.exe -shared -fPIC
20 | ObjectSuffix :=.o
21 | DependSuffix :=.o.d
22 | PreprocessSuffix :=.i
23 | DebugSwitch :=-g
24 | IncludeSwitch :=-I
25 | LibrarySwitch :=-l
26 | OutputSwitch :=-o
27 | LibraryPathSwitch :=-L
28 | PreprocessorSwitch :=-D
29 | SourceSwitch :=-c
30 | OutputFile :=$(IntermediateDirectory)/$(ProjectName)
31 | Preprocessors :=$(PreprocessorSwitch)NDEBUG
32 | ObjectSwitch :=-o
33 | ArchiveOutputSwitch :=
34 | PreprocessOnlySwitch :=-E
35 | ObjectsFileList :="rabbit.txt"
36 | PCHCompileFlags :=
37 | MakeDirCommand :=makedir
38 | RcCmpOptions :=
39 | RcCompilerName :=C:/TDM-GCC-64/bin/windres.exe
40 | LinkOptions :=
41 | IncludePath := $(IncludeSwitch). $(IncludeSwitch). $(IncludeSwitch)C:/dev/cpp_all/repo/sparsehash-2.0.2/src/windows $(IncludeSwitch)C:/dev/cpp_all/repo/sparsehash-2.0.2/src
42 | IncludePCH :=
43 | RcIncludePath :=
44 | Libs := $(LibrarySwitch)psapi
45 | ArLibs := "psapi"
46 | LibPath := $(LibraryPathSwitch).
47 |
48 | ##
49 | ## Common variables
50 | ## AR, CXX, CC, AS, CXXFLAGS and CFLAGS can be overriden using an environment variables
51 | ##
52 | AR := C:/TDM-GCC-64/bin/ar.exe rcu
53 | CXX := C:/TDM-GCC-64/bin/g++.exe
54 | CC := C:/TDM-GCC-64/bin/gcc.exe
55 | CXXFLAGS := -O3 -fexpensive-optimizations -std=c++11 -Wall $(Preprocessors)
56 | CFLAGS := -O2 -Wall $(Preprocessors)
57 | ASFLAGS :=
58 | AS := C:/TDM-GCC-64/bin/as.exe
59 |
60 |
61 | ##
62 | ## User defined environment variables
63 | ##
64 | CodeLiteDir:=C:\Program Files\CodeLite
65 | Objects0=$(IntermediateDirectory)/tests_main.cpp$(ObjectSuffix) $(IntermediateDirectory)/time_hash_maps.cpp$(ObjectSuffix)
66 |
67 |
68 |
69 | Objects=$(Objects0)
70 |
71 | ##
72 | ## Main Build Targets
73 | ##
74 | .PHONY: all clean PreBuild PrePreBuild PostBuild
75 | all: $(OutputFile)
76 |
77 | $(OutputFile): $(IntermediateDirectory)/.d $(Objects)
78 | @$(MakeDirCommand) $(@D)
79 | @echo "" > $(IntermediateDirectory)/.d
80 | @echo $(Objects0) > $(ObjectsFileList)
81 | $(LinkerName) $(OutputSwitch)$(OutputFile) @$(ObjectsFileList) $(LibPath) $(Libs) $(LinkOptions)
82 |
83 | $(IntermediateDirectory)/.d:
84 | @$(MakeDirCommand) "./Release"
85 |
86 | PreBuild:
87 |
88 |
89 | ##
90 | ## Objects
91 | ##
92 | $(IntermediateDirectory)/tests_main.cpp$(ObjectSuffix): tests/main.cpp $(IntermediateDirectory)/tests_main.cpp$(DependSuffix)
93 | $(CXX) $(IncludePCH) $(SourceSwitch) "C:/dev/cpp_all/rabbit/tests/main.cpp" $(CXXFLAGS) $(ObjectSwitch)$(IntermediateDirectory)/tests_main.cpp$(ObjectSuffix) $(IncludePath)
94 | $(IntermediateDirectory)/tests_main.cpp$(DependSuffix): tests/main.cpp
95 | @$(CXX) $(CXXFLAGS) $(IncludePCH) $(IncludePath) -MG -MP -MT$(IntermediateDirectory)/tests_main.cpp$(ObjectSuffix) -MF$(IntermediateDirectory)/tests_main.cpp$(DependSuffix) -MM "tests/main.cpp"
96 |
97 | $(IntermediateDirectory)/tests_main.cpp$(PreprocessSuffix): tests/main.cpp
98 | @$(CXX) $(CXXFLAGS) $(IncludePCH) $(IncludePath) $(PreprocessOnlySwitch) $(OutputSwitch) $(IntermediateDirectory)/tests_main.cpp$(PreprocessSuffix) "tests/main.cpp"
99 |
100 | $(IntermediateDirectory)/time_hash_maps.cpp$(ObjectSuffix): time_hash_maps.cpp $(IntermediateDirectory)/time_hash_maps.cpp$(DependSuffix)
101 | $(CXX) $(IncludePCH) $(SourceSwitch) "C:/dev/cpp_all/rabbit/time_hash_maps.cpp" $(CXXFLAGS) $(ObjectSwitch)$(IntermediateDirectory)/time_hash_maps.cpp$(ObjectSuffix) $(IncludePath)
102 | $(IntermediateDirectory)/time_hash_maps.cpp$(DependSuffix): time_hash_maps.cpp
103 | @$(CXX) $(CXXFLAGS) $(IncludePCH) $(IncludePath) -MG -MP -MT$(IntermediateDirectory)/time_hash_maps.cpp$(ObjectSuffix) -MF$(IntermediateDirectory)/time_hash_maps.cpp$(DependSuffix) -MM "time_hash_maps.cpp"
104 |
105 | $(IntermediateDirectory)/time_hash_maps.cpp$(PreprocessSuffix): time_hash_maps.cpp
106 | @$(CXX) $(CXXFLAGS) $(IncludePCH) $(IncludePath) $(PreprocessOnlySwitch) $(OutputSwitch) $(IntermediateDirectory)/time_hash_maps.cpp$(PreprocessSuffix) "time_hash_maps.cpp"
107 |
108 |
109 | -include $(IntermediateDirectory)/*$(DependSuffix)
110 | ##
111 | ## Clean
112 | ##
113 | clean:
114 | $(RM) -r ./Release/
115 |
116 |
117 |
--------------------------------------------------------------------------------
/rabbit.txt:
--------------------------------------------------------------------------------
1 | ./Release/tests_main.cpp.o ./Release/time_hash_maps.cpp.o
2 |
--------------------------------------------------------------------------------
/rabbit/int_string.h:
--------------------------------------------------------------------------------
1 | #pragma once
2 | #include
3 | #include
4 | #include
5 | #include
6 |
7 | namespace rabbit {
8 |
9 | class int_string {
10 | public:
11 | typedef unsigned char size_type;
12 | typedef unsigned long long string_int;
13 | private:
14 | static const size_t D = 3;
15 | size_type size;
16 | size_t hash_val;
17 | string_int ints[D];
18 | const string_int * end() const {
19 | return (&ints[D]);
20 | }
21 | const string_int * begin() const {
22 | return (&ints[0]);
23 | }
24 | void copy_chars(string_int* dest, const char * src, size_type len) {
25 | size_type lm = std::min(len, sizeof(string_int));
26 | char * data = (char *)(dest);
27 | *dest = string_int();
28 | for (size_type i = size_type(); i < lm; ++i) {
29 | data[i] = src[i];
30 | }
31 | }
32 |
33 | void copy_string(const char * source, size_t l) {
34 | size_type isize = sizeof(string_int);
35 | size = (size_type)std::min(D*isize-1, l);
36 | size_type remaining = size;
37 | string_int * d = ints;
38 | const char * s = source;
39 | while (d < end()) {
40 | size_type todo = std::min(remaining, isize);
41 | copy_chars(d, s, todo);
42 | s += todo;
43 | ++d;
44 | remaining -= todo;
45 | }
46 | hash_val = fnv_1a_bytes();
47 | }
48 |
49 | void copy_string(const std::string& s) {
50 | copy_string(s.c_str(), s.size());
51 | }
52 |
53 | public:
54 | int_string()
55 | : size(size_type())
56 | , hash_val(size_t()){
57 | for (int i = 0; i < D; ++i)
58 | ints[i] = string_int();
59 | }
60 | int_string(const std::string& s) {
61 | copy_string(s);
62 | }
63 | int_string(const char * s) {
64 | copy_string(s, std::strlen(s));
65 | }
66 | int_string(const int_string& s) {
67 | *this = s;
68 |
69 | }
70 | int_string& operator=(const std::string& s) {
71 | copy_string(s);
72 | assert(size < sizeof(string_int) * D);
73 | return *this;
74 | }
75 | int_string& operator=(const int_string& s) {
76 | string_int * d = ints;
77 | const string_int * rd = s.ints;
78 | while (d < end()) {
79 | *d = *rd;
80 | ++d;
81 | ++rd;
82 | }
83 | size = s.size;
84 | hash_val = s.hash_val;
85 | return *this;
86 | }
87 | bool operator==(const int_string& s) const {
88 | if (size != s.size) return false;
89 | const string_int * d = ints;
90 | const string_int * rd = s.ints;
91 | while (d < end()) {
92 | if (*d != *rd)
93 | return false;
94 | ++d;
95 | ++rd;
96 | }
97 | return true;
98 | }
99 | bool operator!=(const int_string& s) const {
100 | return !(*this == s);
101 | }
102 | bool operator<(const int_string& s) const {
103 | const string_int * d = ints;
104 | const string_int * rd = s.ints;
105 | while (d < end()) {
106 | if (*d != *rd)
107 | return *d < *rd;
108 | ++d;
109 | ++rd;
110 | }
111 | return size < s.size;
112 | }
113 | const char * c_str() const {
114 | return (const char *)&ints[0];
115 | }
116 | std::string to_string() const {
117 | return std::string(this->c_str(), size);
118 | }
119 |
120 | size_t fnv_1a_bytes(const unsigned char *bytes, size_t count) const {
121 | const unsigned long long FNV64prime = 0x00000100000001B3ull;
122 | const unsigned long long FNV64basis = 0xCBF29CE484222325ull;
123 | size_t r = FNV64basis;
124 | for (size_t a = 0; a < count; ++a){
125 | r ^= (size_t)bytes[a]; // folding of one byte at a time
126 | r *= FNV64prime;
127 | }
128 | return r;
129 | }
130 |
131 | size_t fnv_1a_bytes()const{
132 | size_t r = size;
133 | const char * s = c_str();
134 | return fnv_1a_bytes((const unsigned char *)s,r);
135 | }
136 |
137 | size_t hash() const {
138 | if(hash_val) return hash_val;
139 | //return fnv_1a();
140 | return fnv_1a_bytes();
141 | }
142 | size_t bad_hash() const {
143 | size_t r = 31;
144 | const string_int * i = begin();
145 | while (i < end()) {
146 | r += 31*(*i);
147 | ++i;
148 | }
149 | return r;
150 | }
151 | };
152 | template<>
153 | struct rabbit_hash {
154 | size_t operator()(const int_string& k) const {
155 | return k.hash();
156 | };
157 | };
158 | };
159 | namespace std {
160 | template<>
161 | struct hash {
162 | size_t operator()(const rabbit::int_string& k) const {
163 | return k.hash();
164 | };
165 | };
166 | }
167 |
--------------------------------------------------------------------------------
/rabbit/rabbit_map.h:
--------------------------------------------------------------------------------
1 | #ifndef _RABBIT_H_CEP_20150303_
2 | #define _RABBIT_H_CEP_20150303_
3 | /**
4 | The MIT License (MIT)
5 | Copyright (c) 2015,2016,2017 Christiaan Pretorius
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | **/
22 | #include
23 | #include
24 | #include
25 | #include
26 | #include
27 | #include
28 | #include
29 | #include
30 | #include
31 | #include
32 | #include
33 | #include
34 | /// the rab-bit hash
35 | /// probably the worlds simplest working hashtable - only kiddingk
36 | /// it uses linear probing for the first level of fallback and then a overflow area or secondary hash
37 |
38 | #ifdef _MSC_VER
39 | #define RABBIT_NOINLINE_PRE _declspec(noinline)
40 | #define RABBIT_NOINLINE_
41 | #else
42 | #define RABBIT_NOINLINE_PRE
43 | #define RABBIT_NOINLINE_ __attribute__((noinline))
44 | #endif
45 | namespace rabbit{
46 |
47 | template
48 | struct _BinMapper{
49 | typedef typename _Config::size_type size_type;
50 | typedef _Config config_type;
51 | size_type extent;
52 | size_type extent1;
53 | size_type extent2;
54 | size_type primary_bits;
55 | size_type random_val;
56 | unsigned long long gate_bits;
57 | _Config config;
58 | _BinMapper(){
59 | }
60 | _BinMapper(size_type new_extent,const _Config& config){
61 | this->config = config;
62 | this->extent = ((size_type)1) << this->config.log2(new_extent);
63 | this->extent1 = this->extent-1;
64 | this->extent2 = this->config.log2(new_extent);
65 | this->primary_bits = extent2;
66 | //std::minstd_rand rd;
67 | //std::mt19937 gen(rd());
68 | //std::uniform_int_distribution dis(1ll<<4, std::numeric_limits::max());
69 | this->random_val = 0; //(size_type)dis(gen);
70 | if(new_extent < (1ll<<32ll)){
71 | this->gate_bits = (1ll<<32ll) - 1ll;
72 | }else{
73 | this->gate_bits = (1ll<<62ll) - 1ll;
74 | }
75 | }
76 | inline size_type nearest_larger(size_type any){
77 | size_type l2 = this->config.log2(any);
78 | return (size_type)(2ll << l2);
79 | }
80 |
81 | // FNV-1a hash function for bytes
82 | // https://en.wikipedia.org/wiki/Fowler%E2%80%93Noll%E2%80%93Vo_hash_function
83 | // https://tools.ietf.org/html/draft-eastlake-fnv-13#section-6
84 | // used to attempt to fix bad hashes from code
85 | size_type fnv_1a_bytes(const unsigned char *bytes, size_type count) const {
86 | const unsigned long long FNV64prime = 0x00000100000001B3ull;
87 | const unsigned long long FNV64basis = 0xCBF29CE484222325ull;
88 | size_t r = FNV64basis;
89 | for (size_t a = 0; a < count; ++a){
90 | r ^= (size_t)bytes[a]; // folding of one byte at a time
91 | r *= FNV64prime;
92 | }
93 | return r;
94 | }
95 | size_type fnv_1a_(size_type other) const {
96 | return fnv_1a_bytes((const unsigned char *)&other,sizeof(other));
97 | }
98 | size_type randomize(size_type other) const {
99 | size_type rand_other = other;// fnv_1a_();
100 | size_type r = rand_other >> this->primary_bits;
101 | return rand_other + ((r*r) >> 2); // fnv_1a_(other + ((r*r) >> 2)); //(other ^ random_val) & this->extent1;
102 | }
103 | size_type operator()(size_type h_n) const {
104 | return h_n & this->extent1 ;
105 | }
106 | double resize_factor() const {
107 | return 2;
108 | }
109 | double recalc_growth_factor(size_type elements) {
110 | return 2;
111 | }
112 |
113 | inline size_type next_size(){
114 |
115 | double r = recalc_growth_factor(this->extent) * this->extent;
116 | assert(r > (double)extent);
117 | return (size_type)r;
118 | }
119 | };
120 | template
121 | struct rabbit_hash{
122 | size_t operator()(const _Ht& k) const{
123 | return (size_t) std::hash<_Ht>()(k); ///
124 | };
125 | };
126 | template<>
127 | struct rabbit_hash{
128 | unsigned long operator()(const long& k) const{
129 | return (unsigned long)k;
130 | };
131 | };
132 | template<>
133 | struct rabbit_hash{
134 | inline unsigned long operator()(const unsigned long& k) const{
135 | return k;
136 | };
137 | };
138 | template<>
139 | struct rabbit_hash{
140 | inline unsigned int operator()(const unsigned int& k) const{
141 | return k;
142 | };
143 | };
144 | template<>
145 | struct rabbit_hash{
146 | inline unsigned int operator()(const int& k) const{
147 | return k;
148 | };
149 | };
150 | template<>
151 | struct rabbit_hash{
152 | inline unsigned long long operator()(const unsigned long long& k) const{
153 | return k;
154 | };
155 | };
156 | template<>
157 | struct rabbit_hash{
158 | inline unsigned long long operator()(const long long& k) const{
159 | return (unsigned long)k;
160 | };
161 | };
162 |
163 | template
164 | class basic_config{
165 | public:
166 | typedef unsigned long long int _Bt; /// exists ebucket type - not using vector - interface does not support bit bucketing
167 | /// if even more speed is desired but you'r willing to live with a 4 billion key limit then
168 | //typedef unsigned long size_type;
169 | typedef std::size_t size_type;
170 |
171 | size_type log2(size_type n){
172 | size_type r = 0;
173 | while (n >>= 1)
174 | {
175 | r++;
176 | }
177 | return r;
178 | }
179 | _Bt BUCKET_COUNT ;
180 | _Bt CHAR_BITS ;
181 | _Bt BITS_SIZE ;
182 | _Bt BITS_SIZE1 ;
183 | _Bt ALL_BITS_SET ;
184 | _Bt LOGARITHMIC ;
185 | /// maximum probes per access
186 | size_type MIN_PROBES; /// the minimum starting value of probes which is increased if the bucket is used at a load factor < min load factor
187 | size_type DEFAULT_PROBES; /// if the min load factor is set to a unusable value
188 | size_type PROBE_INCR;
189 | size_type SAFETY_PROBES_FACTOR; /// probes when bad hashes or attacks are detected
190 | size_type BITS_LOG2_SIZE;
191 | /// this distributes the h values which are powers of 2 a little to avoid primary clustering when there is no
192 | /// hash randomizer available
193 | size_type MIN_OVERFLOW;
194 | size_type MIN_EXTENT;
195 | size_type MAX_OVERFLOW_FACTOR ;
196 | size_type SAFETY_OVERFLOW_FACTOR; /// overflow factor when bad hashes or attacks are detected
197 | float DEFAULT_MIN_LOAD_FACTOR;
198 | basic_config(const basic_config& right){
199 | *this = right;
200 | }
201 |
202 | basic_config& operator=(const basic_config& right){
203 | BUCKET_COUNT = right.BUCKET_COUNT;
204 | CHAR_BITS = right.CHAR_BITS;
205 | BITS_SIZE = right.BITS_SIZE;
206 | BITS_SIZE1 = right.BITS_SIZE1;
207 | BITS_LOG2_SIZE = right.BITS_LOG2_SIZE;
208 | ALL_BITS_SET = right.ALL_BITS_SET;
209 | MIN_PROBES = right.MIN_PROBES;
210 | DEFAULT_PROBES = right.DEFAULT_PROBES;
211 | PROBE_INCR = right.PROBE_INCR;
212 | SAFETY_PROBES_FACTOR = right.SAFETY_PROBES_FACTOR;
213 | MIN_EXTENT = right.MIN_EXTENT;
214 | MIN_OVERFLOW = right.MIN_OVERFLOW;
215 | MAX_OVERFLOW_FACTOR = right.MAX_OVERFLOW_FACTOR;
216 | SAFETY_OVERFLOW_FACTOR = right.SAFETY_OVERFLOW_FACTOR;
217 | LOGARITHMIC = right.LOGARITHMIC;
218 | DEFAULT_MIN_LOAD_FACTOR = right.DEFAULT_MIN_LOAD_FACTOR;
219 | return *this;
220 | }
221 |
222 | basic_config(){
223 | BUCKET_COUNT = 1;
224 | CHAR_BITS = 8;
225 | BITS_SIZE = (sizeof(_Bt) * CHAR_BITS);
226 | BITS_SIZE1 = BITS_SIZE-1;
227 | BITS_LOG2_SIZE = (size_type) log2((size_type)BITS_SIZE);
228 | ALL_BITS_SET = ~(_Bt)0;
229 | MIN_PROBES = 1;
230 | PROBE_INCR = 1;
231 | DEFAULT_PROBES = 16;
232 | SAFETY_PROBES_FACTOR = 32;
233 | MIN_EXTENT = 4; /// start size of the hash table
234 | MIN_OVERFLOW = 8;
235 | MAX_OVERFLOW_FACTOR = 1<<16; //BITS_SIZE*8/sizeof(_Bt);
236 | SAFETY_OVERFLOW_FACTOR = 500;
237 | LOGARITHMIC = logarithmic;
238 | DEFAULT_MIN_LOAD_FACTOR = 0.25;
239 | }
240 | };
241 | template
242 | struct basic_traits{
243 | typedef typename _InMapper::config_type rabbit_config;
244 | typedef typename rabbit_config::_Bt _Bt;
245 | typedef typename rabbit_config::size_type size_type;
246 | typedef ptrdiff_t difference_type;
247 | typedef _InMapper _Mapper;
248 | };
249 | typedef basic_traits<_BinMapper > > default_traits;
250 | typedef basic_traits<_BinMapper > > sparse_traits;
251 |
252 |
253 | template
254 | < class _K
255 | , class _V
256 | , class _H = rabbit_hash<_K>
257 | , class _E = std::equal_to<_K>
258 | , class _Allocator = std::allocator<_K>
259 | , class _Traits = default_traits
260 | >
261 | class basic_unordered_map {
262 | public:
263 | typedef _K key_type;
264 |
265 | typedef _V mapped_type;
266 |
267 | typedef std::pair<_K,_V> _ElPair;
268 | typedef std::pair _ConstElPair;
269 | typedef _ElPair value_type;
270 | typedef _ConstElPair const_value_type;
271 | typedef typename _Traits::_Bt _Bt; /// exists ebucket type - not using vector - interface does not support bit bucketing
272 | typedef typename _Traits::size_type size_type;
273 | typedef typename _Traits::rabbit_config rabbit_config;
274 | typedef typename _Traits::_Mapper _Mapper;
275 | typedef typename _Traits::difference_type difference_type;
276 |
277 | typedef _Allocator allocator_type;
278 | typedef _ElPair* pointer;
279 | typedef const _ElPair* const_pointer;
280 | typedef _ElPair& reference;
281 | typedef const _ElPair& const_reference;
282 | // typedef typename _Base::reverse_iterator reverse_iterator;
283 | // typedef typename _Base::const_reverse_iterator
284 | // const_reverse_iterator;
285 |
286 | typedef _E key_equal;
287 | typedef _E key_compare;
288 | typedef _H hasher;
289 |
290 | protected:
291 | struct overflow_stats{
292 | size_type start_elements;
293 | size_type end_elements;
294 | overflow_stats() : start_elements(0),end_elements(0){}
295 | };
296 |
297 | struct _KeySegment{
298 |
299 | public:
300 | _Bt overflows;
301 | _Bt exists;
302 | private:
303 | void set_bit(_Bt& w, _Bt index, bool f){
304 |
305 | #ifdef _MSC_VER
306 | #pragma warning(disable:4804)
307 | #endif
308 | _Bt m = (_Bt)1ul << index;// the bit mask
309 | w ^= (-f ^ w) & m;
310 | ///w = (w & ~m) | (-f & m);
311 | }
312 |
313 | public:
314 | //_ElPair keys[sizeof(_Bt) * 8];
315 |
316 | inline bool all_exists() const {
317 | return (exists == ~(_Bt)0);
318 | }
319 |
320 | inline bool none_exists() const {
321 | return (exists == (_Bt)0);
322 | }
323 |
324 | inline bool is_exists(_Bt bit) const {
325 | return ((exists >> bit) & (_Bt)1ul);
326 | }
327 |
328 | inline bool is_overflows(_Bt bit) const {
329 | return ((overflows >> bit) & (_Bt)1ul);
330 | }
331 |
332 | inline void set_exists(_Bt index, bool f){
333 | set_bit(exists,index,f);
334 | }
335 |
336 | inline void toggle_exists(_Bt index){
337 | exists ^= ((_Bt)1 << index);
338 | }
339 |
340 | void set_overflows(_Bt index, bool f){
341 | set_bit(overflows,index,f);
342 | }
343 |
344 | void clear(){
345 | exists = 0;
346 | overflows = 0;
347 | }
348 | _KeySegment(){
349 | exists = 0;
350 | overflows = 0;
351 | }
352 | };
353 | public:
354 | typedef _KeySegment _Segment;
355 | /// the vector that will contain the segmented mapping pairs and flags
356 | typedef std::vector<_Segment, _Allocator> _Segments;
357 | typedef std::vector<_ElPair, _Allocator> _Keys;
358 |
359 | struct hash_kernel{
360 | /// settings configuration
361 | rabbit_config config;
362 | size_type elements;
363 | size_type initial_probes;
364 | size_type probes;
365 | size_type rand_probes; /// used when there might be an attack
366 | size_type last_modified;
367 | /// the existence bit set is a factor of BITS_SIZE+1 less than the extent
368 | _Segment* clusters;///a.k.a. pages
369 | _ElPair* keys;
370 |
371 | size_type overflow;
372 | size_type overflow_elements;
373 | overflow_stats stats;
374 | _Mapper key_mapper;
375 | _H hf;
376 | _E eq_f;
377 | float mf;
378 | float min_lf;
379 | size_type buckets;
380 | size_type removed;
381 | _Allocator allocator;
382 | _K empty_key;
383 | size_type logarithmic;
384 | size_type collisions;
385 | typename _Allocator::template rebind<_Segment>::other get_segment_allocator() {
386 | return typename _Allocator::template rebind<_Segment>::other(allocator) ;
387 | }
388 | typename _Allocator::template rebind<_ElPair>::other get_el_allocator(){
389 | return typename _Allocator::template rebind<_ElPair>::other(allocator) ;
390 | }
391 | typename _Allocator::template rebind<_V>::other get_value_allocator(){
392 | return typename _Allocator::template rebind<_V>::other(allocator) ;
393 | }
394 | size_type capacity() const {
395 | return get_data_size();
396 | }
397 |
398 | /// the minimum load factor
399 | float load_factor() const{
400 | if(!elements) return 0;
401 | return (float)((double)elements/(double)bucket_count());
402 | }
403 | float collision_factor() const{
404 | return (float)((double)collisions/(double)bucket_count());
405 | }
406 |
407 | /// there are a variable ammount of buckets there are at most this much
408 | ///
409 | size_type bucket_count() const {
410 | if(!elements) return 0;
411 | return get_data_size();
412 | }
413 | /// the size of a bucket can be calculated based on the
414 | /// hash value of its first occupant
415 | /// mainly to satisfy stl conventions
416 | size_type bucket_size ( size_type n ) const{
417 | size_type pos = n;
418 | if (!overflows_(pos)) {
419 | if (exists_(pos) && map_key(get_key(pos)) == n)
420 | return 1;
421 | else return 0;
422 | }
423 | size_type m = pos + probes;
424 | size_type r = 0;
425 | for(; pos < m;++pos){
426 | if(!exists_(pos)){
427 | }else if(map_key(get_key(pos)) == n){
428 | ++r;
429 | }
430 | }
431 | size_type e = end();
432 | for(pos=get_o_start(); pos < e; ){
433 | if(!exists_(pos)){
434 | }else if(map_key(get_key(pos)) == n){
435 | ++r;
436 | }
437 | ++pos;
438 | }
439 | return r;
440 | }
441 |
442 | float max_load_factor() const {
443 | return mf;
444 | }
445 |
446 | void max_load_factor ( float z ){
447 | mf = z;
448 | }
449 |
450 | void min_load_factor( float z ){
451 | this->min_lf = z;
452 | }
453 | float min_load_factor() const {
454 | return this->min_lf;
455 | }
456 | /// total data size, never less than than size()
457 | size_type get_data_size() const {
458 | return get_extent()+initial_probes+overflow;
459 | }
460 |
461 | /// the overflow start
462 | size_type get_o_start() const {
463 | return get_extent()+initial_probes;
464 | }
465 |
466 | size_type get_segment_number(size_type pos) const {
467 | return (pos >> config.BITS_LOG2_SIZE);
468 | }
469 |
470 | _Bt get_segment_index(size_type pos) const {
471 | return (_Bt)(pos & (config.BITS_SIZE1));
472 | }
473 |
474 | _Segment &get_segment(size_type pos) {
475 | return clusters[pos >> config.BITS_LOG2_SIZE];
476 | }
477 |
478 | const _Segment &get_segment(size_type pos) const {
479 | return clusters[get_segment_number(pos)];
480 | }
481 |
482 | inline const _ElPair &get_pair(size_type pos) const {
483 | return keys[pos];
484 | //return get_segment(pos).keys[get_segment_index(pos)];
485 | }
486 |
487 | inline _ElPair& get_pair(size_type pos) {
488 | //return get_segment(pos).keys[get_segment_index(pos)];
489 | return keys[pos];
490 | }
491 |
492 | const _K & get_key(size_type pos) const {
493 | return get_pair(pos).first;
494 | }
495 |
496 | const _V & get_value(size_type pos) const {
497 | return get_pair(pos).second;
498 | }
499 |
500 |
501 | _K & get_key(size_type pos) {
502 | return get_pair(pos).first;
503 | }
504 |
505 | _V & get_value(size_type pos) {
506 | return get_pair(pos).second;
507 | }
508 |
509 | void set_segment_key(size_type pos, const _K &k) {
510 | get_pair(pos).first = k;
511 | }
512 |
513 | void destroy_segment_value(size_type pos){
514 | get_pair(pos).second = _V();
515 | }
516 |
517 | _V* create_segment_value(size_type pos) {
518 | _V* r = &(get_pair(pos).second);
519 | return r;
520 | }
521 |
522 | void set_segment_value(size_type pos, const _V &v) {
523 | get_pair(pos).second = v;
524 | }
525 |
526 | void set_exists(size_type pos, bool f){
527 | last_modified = pos;
528 | get_segment(pos).set_exists(get_segment_index(pos),f);
529 | }
530 |
531 | void set_overflows(size_type pos, bool f){
532 | get_segment(pos).set_overflows(get_segment_index(pos),f);
533 | }
534 |
535 | inline bool exists_(size_type pos) const {
536 | return get_segment(pos).is_exists(get_segment_index(pos));
537 | }
538 |
539 | inline bool overflows_(size_type pos) const {
540 | return get_segment(pos).is_overflows(get_segment_index(pos));
541 | }
542 |
543 | inline size_type hash_key(const _K& k) const {
544 | return (size_type)_H()(k);
545 | }
546 |
547 | inline size_type map_key(const _K& k) const {
548 | return map_hash(hash_key(k));
549 | }
550 |
551 | inline size_type map_hash(size_type h) const {
552 | if (this->rand_probes)
553 | return key_mapper(randomize(h));
554 | return key_mapper(h);
555 | }
556 |
557 | inline size_type map_rand_key(const _K& k) const {
558 | size_type h = (size_type)_H()(k);
559 | return map_hash(h);
560 | }
561 |
562 | inline size_type map_rand_key(const _K& k, size_type origin) const {
563 | return origin;
564 | }
565 |
566 | size_type get_e_size() const {
567 | return (size_type) (get_data_size()/config.BITS_SIZE)+1;
568 | }
569 |
570 | void free_data(){
571 |
572 | if(clusters) {
573 | size_type esize = get_e_size();
574 | for(size_type e = 0; e < get_data_size(); ++e){
575 | get_el_allocator().destroy(&keys[e]);
576 | }
577 | get_el_allocator().deallocate(keys,get_data_size());
578 | for(size_type c = 0; c < esize; ++c){
579 | get_segment_allocator().destroy(&clusters[c]);
580 | }
581 | get_segment_allocator().deallocate(clusters,get_e_size());
582 | }
583 |
584 | clusters = nullptr;
585 | }
586 |
587 | double get_resize_factor() const {
588 | return key_mapper.resize_factor();
589 | }
590 |
591 | size_type get_probes() const {
592 | return this->probes;
593 | }
594 | size_type get_rand_probes() const {
595 | return this->rand_probes;
596 | }
597 | void set_rand_probes(){
598 | this->rand_probes = this->probes;
599 | }
600 |
601 | void set_rand_probes(size_type rand_probes){
602 | this->rand_probes = rand_probes;
603 | }
604 |
605 | /// clears all data and resize the new data vector to the parameter
606 | void resize_clear(size_type new_extent){
607 | /// inverse of factor used to determine overflow list
608 | /// when overflow list is full rehash starts
609 | free_data();
610 |
611 | key_mapper = _Mapper(new_extent,config);
612 |
613 | mf = 1.0;
614 | assert(config.MAX_OVERFLOW_FACTOR > 0);
615 | if(is_logarithmic()){
616 | probes = config.log2(new_extent)*logarithmic;
617 | overflow = config.log2(new_extent)*logarithmic;
618 | }else{
619 | if(min_load_factor() < 0.01){
620 | probes = config.DEFAULT_PROBES;
621 | }else{
622 | probes = config.MIN_PROBES;
623 | }
624 | //std::cout << "rehash " << std::endl;
625 |
626 | overflow = std::max(config.MIN_OVERFLOW, new_extent / (config.MAX_OVERFLOW_FACTOR));
627 |
628 | }
629 | if(rand_probes ){
630 | //std::cout << "setting safety values " << std::endl;
631 | //overflow = std::max(new_extent / config.SAFETY_OVERFLOW_FACTOR,overflow);
632 | //probes *= config.SAFETY_PROBES_FACTOR;
633 | }
634 | initial_probes = probes;
635 | //std::cout << "rehash with overflow:" << overflow << std::endl;
636 | elements = 0;
637 | removed = 0;
638 | collisions = 0;
639 | empty_key = _K();
640 | overflow_elements = get_o_start();
641 | size_type esize = get_e_size();
642 | keys = get_el_allocator().allocate(get_data_size());
643 | clusters = get_segment_allocator().allocate(esize);
644 | _KeySegment ks;
645 | _ElPair element;
646 | for(size_type e = 0; e < get_data_size(); ++e){
647 | get_el_allocator().construct(&keys[e],element);
648 | }
649 | for(size_type c = 0; c < esize; ++c){
650 | get_segment_allocator().construct(&clusters[c],ks);
651 | }
652 | set_exists(get_data_size(),true);
653 | buckets = 0;
654 |
655 | };
656 |
657 | void clear(){
658 | size_type esize = get_e_size();
659 | for(size_type c = 0; c < esize; ++c){
660 | clusters[c].clear();
661 | }
662 | _ElPair element;
663 | for(size_type e = 0; e < get_data_size(); ++e){
664 | keys[e] = element;
665 | }
666 | set_exists(get_data_size(),true);
667 | collisions = 0;
668 | elements = 0;
669 | removed = 0;
670 | rand_probes = 0;
671 | }
672 |
673 | hash_kernel(const key_compare& compare,const allocator_type& allocator)
674 | : clusters(nullptr), eq_f(compare), mf(1.0f), min_lf(config.DEFAULT_MIN_LOAD_FACTOR), allocator(allocator),logarithmic(config.LOGARITHMIC)
675 | {
676 | resize_clear(config.MIN_EXTENT);
677 | }
678 |
679 | hash_kernel() : clusters(nullptr), mf(1.0f), min_lf(config.DEFAULT_MIN_LOAD_FACTOR),logarithmic(config.LOGARITHMIC)
680 | {
681 | resize_clear(config.MIN_EXTENT);
682 | }
683 |
684 | hash_kernel(const hash_kernel& right) : clusters(nullptr), mf(1.0f), min_lf(config.DEFAULT_MIN_LOAD_FACTOR),logarithmic(config.LOGARITHMIC)
685 | {
686 | *this = right;
687 | }
688 |
689 | ~hash_kernel(){
690 | free_data();
691 | }
692 | inline size_type get_extent() const {
693 | return key_mapper.extent;
694 | }
695 | void set_logarithmic(size_type loga){
696 | logarithmic = loga;
697 | }
698 | size_type get_logarithmic() const {
699 | return this->logarithmic;
700 | }
701 | bool is_logarithmic() const {
702 | return this->logarithmic > 0;
703 | }
704 | hash_kernel& operator=(const hash_kernel& right){
705 | config = right.config;
706 | key_mapper = right.key_mapper;
707 | free_data();
708 | buckets = right.buckets;
709 | removed = right.removed;
710 | mf = right.mf;
711 | min_lf = right.min_lf;
712 | elements = right.elements;
713 | collisions = right.collisions;
714 | size_type esize = get_e_size();
715 | clusters = get_segment_allocator().allocate(esize);
716 | keys = right.keys;
717 | std::copy(clusters, right.clusters, right.clusters+esize);
718 | return *this;
719 | }
720 | inline bool raw_equal_key(size_type pos,const _K& k) const {
721 | const _K& l = get_key(pos); ///.key(get_segment_index(pos));
722 | return eq_f(l, k) ;
723 | }
724 | inline bool segment_equal_key_exists(size_type pos,const _K& k) const {
725 | _Bt index = get_segment_index(pos);
726 | const _Segment& s = get_segment(pos);
727 | return eq_f(get_key(pos), k) && s.is_exists(index) ;
728 |
729 | }
730 |
731 | inline bool equal_key(size_type pos,const _K& k) const {
732 | const _K& l = get_key(pos);
733 | return eq_f(l, k) ;
734 | }
735 |
736 | inline size_type randomize(size_type v) const {
737 | return key_mapper.randomize(v);
738 | }
739 |
740 | inline size_type hash_probe_incr(size_type base, unsigned int i) const {
741 | //if(sizeof(_K) > sizeof(unsigned long long)){
742 | //if (this->rand_probes){
743 | //return base + i*i + 1;
744 | //}else{
745 | return base + i + 1;
746 | //}
747 | }
748 | size_type find_in_bucket(const _K& k, size_type origin) const {
749 | size_type locate = get_o_start();
750 | size_type pos = locate;
751 | for(;pos < end();++pos){
752 | if(segment_equal_key_exists(pos,k)){
753 | return pos;
754 | }
755 | }
756 |
757 | return end();
758 | }
759 | _V* subscript_bucket(const _K& k, size_type origin){
760 | size_type locate = overflow_elements;
761 | size_type pos = locate;
762 | for(;pos < end();++pos){
763 | if(!exists_(pos)){
764 | break;
765 | }
766 | }
767 |
768 | if(pos != end()){
769 | overflow_elements++;
770 | if (!this->is_logarithmic()) {
771 | if (this->load_factor() < min_load_factor()) {
772 | if (this->rand_probes) {
773 | this->probes <<= config.PROBE_INCR;
774 | }
775 | else{
776 | this->probes += config.PROBE_INCR;
777 | }
778 | //std::cout << "increased probes to " << this->probes << " o f bucket " << this->overflow << " min lf " << min_load_factor() << " actual lf " << this->load_factor() << std::endl;
779 | }
780 | }
781 | set_overflows(origin, true);
782 | set_exists(pos, true);
783 | set_segment_key(pos, k);
784 | size_type os = 0; // (overflow_elements - (get_extent() + initial_probes));
785 | if(os == 1){
786 | stats.start_elements = elements;
787 | //std::cout << "overflow start: hash table size " << elements << " elements in over flow:" << os << std::endl;
788 | }
789 |
790 | if(overflow_elements == end() && stats.start_elements){
791 | stats.end_elements = elements;
792 | size_type saved = stats.end_elements - stats.start_elements - os;
793 | double percent_saved = (100.0*((double)saved/(double)elements));
794 |
795 | // std::cout << "overflow end: hash table size " << elements << " elements in over flow:" << os << " saved : " << saved <<
796 | // std::endl << " percent saved " << std::setprecision(4) << percent_saved <<
797 | // std::endl;
798 | }
799 |
800 | ++elements;
801 | return create_segment_value(pos);
802 |
803 | }
804 | return nullptr;
805 | }
806 | _V* subscript_rest(const _K& k, size_type origin)
807 | RABBIT_NOINLINE_ {
808 | size_type pos = map_rand_key(k);
809 | size_type base = pos;
810 | for(unsigned int i =0; i < probes && pos < get_extent();++i){
811 | _Bt si = get_segment_index(pos);
812 | _Segment& s = get_segment(pos);
813 | if(!s.is_exists(si)){
814 | s.toggle_exists(si);
815 | set_segment_key(pos,k);
816 |
817 | ++collisions;
818 | ++elements;
819 | set_overflows(origin, true);
820 | return create_segment_value(pos);
821 | }
822 | pos = hash_probe_incr(base,i);
823 | }
824 |
825 |
826 | return subscript_bucket(k,origin);
827 | }
828 | _V* subscript(const _K& k){
829 | size_type pos = map_key(k);
830 | _Bt si = get_segment_index(pos);
831 | _Segment& s = get_segment(pos);
832 |
833 | bool key_exists = s.is_exists(si);
834 | bool key_overflows = s.is_overflows(si);
835 | if(!key_exists && !key_overflows){
836 | s.toggle_exists(si);
837 | set_segment_key(pos,k);
838 | ++elements;
839 | return create_segment_value(pos);
840 | }else if(key_exists && equal_key(pos,k)){
841 | return &(get_value(pos));
842 | }
843 | size_type h = pos;
844 | if(key_overflows){
845 | pos = find_rest(k,h);
846 | if(pos != end()){
847 | return &(get_value(pos));
848 | }
849 | }
850 | return subscript_rest(k,h);
851 | }
852 | size_type erase_rest(const _K& k, size_type origin)
853 | RABBIT_NOINLINE_ /// this function must never be inlined
854 | {
855 | size_type pos = find_rest(k,origin);
856 |
857 | if(pos != (*this).end()){
858 | set_exists(pos, false);
859 | ++removed;
860 | set_segment_key(pos, empty_key);
861 | destroy_segment_value(pos);
862 | --elements;
863 | return 1;
864 | }
865 | return 0;
866 | }
867 | size_type erase(const _K& k){
868 |
869 | size_type pos = map_key(k);
870 |
871 | _Bt si = get_segment_index(pos);
872 | _Segment& s = get_segment(pos);
873 | if(s.is_exists(si) && equal_key(pos,k)){ ///get_segment(pos).exists == ALL_BITS_SET ||
874 | set_segment_key(pos, empty_key);
875 | s.toggle_exists(si);
876 | destroy_segment_value(pos);
877 | --elements;
878 | ++removed;
879 | return 1;
880 | }
881 | if(!s.is_overflows(si)){
882 | return 0;
883 | }else
884 | return erase_rest(k, pos);
885 |
886 | }
887 | /// not used (could be used where hash table must actually shrink too)
888 | bool is_small() const {
889 | return (get_extent() > (config.MIN_EXTENT << 3)) && (elements < get_extent()/8);
890 | }
891 |
892 | size_type count(const _K& k) const {
893 | size_type pos =(*this).find(k);
894 | if(pos == (*this).end()){
895 | return 0;
896 | }else return 1;
897 | }
898 | const _V& at(const _K& k) const {
899 | size_type pos = find(k);
900 | if(pos != (*this).end()){
901 | return get_value(pos);
902 | }
903 | throw std::exception();
904 | }
905 | _V& direct(size_type pos) {
906 | return get_value(pos);
907 | }
908 | _V& at(const _K& k) {
909 | size_type pos = find(k);
910 | if(pos != (*this).end()){
911 | return get_value(pos);
912 | }
913 | throw std::exception();
914 | }
915 |
916 | bool get(const _K& k, _V& v) const {
917 | size_type pos = find(k);
918 | if(pos != (*this).end()){
919 | v = get_value(pos);
920 | return true;
921 | }
922 | return false;
923 | }
924 |
925 | /// probabilistic check if key with given hash exists
926 | /// false indicates the key definitely will not be found
927 | /// else well have to do a full find
928 |
929 | bool could_have(size_type origin){
930 | size_type pos = map_hash(origin);
931 | _Bt index = get_segment_index(origin);
932 | const _Segment& s = get_segment(origin);
933 | return (s.is_exists(index) || s.is_overflows(index));
934 | }
935 |
936 | size_type find_rest_not_empty(const _K& k, size_type origin) const
937 | RABBIT_NOINLINE_
938 | {
939 |
940 | /// randomization step for attack mitigation
941 | size_type pos = map_rand_key(k,origin);
942 | size_type base = pos;
943 | for(unsigned int i = 0; i < probes && pos < get_extent();){
944 | if(equal_key(pos,k)) return pos;
945 | pos = hash_probe_incr(base,i);
946 | ++i;
947 | }
948 | _Bt index = get_segment_index(origin);
949 | const _Segment& s = get_segment(origin);
950 | if(!s.is_overflows(index)){
951 | return end();
952 |
953 | }
954 | return find_in_bucket(k,origin);
955 |
956 | }
957 | size_type find_rest(const _K& k, size_type origin) const
958 | RABBIT_NOINLINE_
959 | {
960 | /// randomization step for attack mitigation
961 | size_type pos = map_rand_key(k);
962 | size_type base = pos;
963 | for(unsigned int i =0; i < probes && pos < get_extent();){//
964 | _Bt si = get_segment_index(pos);
965 | if(segment_equal_key_exists(pos,k)){
966 | return pos;
967 | }
968 | pos = hash_probe_incr(base,i);
969 | ++i;
970 | }
971 | return find_in_bucket(k,origin);
972 | }
973 | size_type find_empty(const _K& k, const size_type& unmapped) const
974 | RABBIT_NOINLINE_
975 | {
976 | size_type pos = map_hash(unmapped);
977 | _Bt index = get_segment_index(pos);
978 | const _Segment& s = get_segment(pos);
979 | if(s.is_exists(index) && equal_key(pos,k) ){ ///get_segment(pos).exists == ALL_BITS_SET ||
980 | return pos;
981 | }
982 | if(!s.is_overflows(index)){
983 | return end();
984 | }
985 | return find_rest(k, pos);
986 | }
987 |
988 | inline size_type find_non_empty(const _K& k,const size_type& unmapped) const {
989 | size_type pos = map_hash(unmapped);
990 | if(equal_key(pos,k)) return pos;
991 | return find_rest_not_empty(k, pos);
992 | }
993 |
994 | inline size_type find(const _K& k,const size_type& unmapped) const {
995 |
996 | bool is_empty = eq_f(empty_key,k); // && sizeof(_K) <= sizeof(size_type);
997 | if(is_empty){
998 | return find_empty(k, unmapped);
999 | }else{
1000 | return find_non_empty(k,unmapped);
1001 | }
1002 | }
1003 |
1004 | size_type find(const _K& k) const {
1005 |
1006 | size_type pos = hash_key(k);
1007 | return find(k,pos);
1008 | }
1009 |
1010 | size_type begin() const {
1011 | if(!elements)
1012 | return end();
1013 |
1014 | size_type pos = 0;
1015 | _Bt index = 0;
1016 |
1017 | const _Bt bits_size = config.BITS_SIZE;
1018 | size_type e = end();
1019 | while(pos < e){
1020 | const _Segment &seg = get_segment(pos);
1021 | index = get_segment_index(pos);
1022 | if(seg.exists == 0){
1023 | pos += bits_size;
1024 | }else{
1025 | if(seg.is_exists(index))
1026 | break;
1027 | ++pos;
1028 | }
1029 | }
1030 | return pos ;
1031 | }
1032 | size_type end() const {
1033 |
1034 | return get_data_size();
1035 | }
1036 | size_type size() const {
1037 | return elements;
1038 | }
1039 | size_type get_collisions() const {
1040 | return collisions;
1041 | }
1042 | typedef std::shared_ptr ptr;
1043 | }; /// hash_kernel
1044 | public:
1045 | struct const_iterator;
1046 | struct iterator {
1047 | typedef hash_kernel* kernel_ptr;
1048 | const basic_unordered_map* h;
1049 | size_type pos;
1050 | friend struct const_iterator;
1051 | protected:
1052 | _Bt index;
1053 | _Bt exists;
1054 | _Bt bsize;
1055 | kernel_ptr get_kernel() const {
1056 | return h->current.get();
1057 | }
1058 | kernel_ptr get_kernel() {
1059 | return h->current.get();
1060 | }
1061 |
1062 | void increment() {
1063 | ++pos;
1064 | ++index;
1065 | if (index == bsize) {
1066 | auto k = get_kernel();
1067 | const _Segment& s = k->get_segment(pos);
1068 | exists = s.exists;
1069 | index = k->get_segment_index(pos);
1070 | }
1071 |
1072 | }
1073 | public:
1074 | iterator() : h(nullptr), pos(0) {
1075 | }
1076 |
1077 | iterator(const basic_unordered_map* h, size_type pos, _Bt exists, _Bt index, _Bt bsize)
1078 | : pos(pos), h(h), exists(exists), index(index), bsize(bsize) {
1079 |
1080 | }
1081 |
1082 | iterator(const iterator& r) {
1083 | (*this) = r;
1084 | }
1085 |
1086 | //~iterator() {
1087 | //}
1088 |
1089 | iterator& operator=(const iterator& r) {
1090 | pos = r.pos;
1091 | h = r.h;
1092 | exists = r.exists;
1093 | index = r.index;
1094 | bsize = r.bsize;
1095 | return (*this);
1096 | }
1097 | inline iterator& operator++() {
1098 | do {
1099 | increment();
1100 | } while ((exists & (((_Bt)1) << index)) == (_Bt)0);
1101 | return (*this);
1102 | }
1103 | iterator operator++(int) {
1104 | iterator t = (*this);
1105 | ++(*this);
1106 | return t;
1107 | }
1108 | const _ElPair& operator*() const {
1109 | return get_kernel()->get_pair((*this).pos);
1110 | }
1111 | inline _ElPair& operator*() {
1112 | return get_kernel()->get_pair((*this).pos);
1113 | }
1114 | inline _ElPair* operator->() const {
1115 | _ElPair* ret = &(get_kernel()->get_pair(pos));
1116 | return ret;
1117 | }
1118 | inline const _ElPair *operator->() {
1119 | _ElPair* ret = &(get_kernel()->get_pair(pos));
1120 | return ret;
1121 | }
1122 | inline bool operator==(const iterator& r) const {
1123 |
1124 | return (pos == r.pos);
1125 | }
1126 | bool operator!=(const iterator& r) const {
1127 |
1128 | return (pos != r.pos);
1129 | }
1130 |
1131 | size_type get_pos() const {
1132 | return pos;
1133 | }
1134 |
1135 | };
1136 |
1137 | struct const_iterator {
1138 | private:
1139 | typedef hash_kernel* kernel_ptr;
1140 | const basic_unordered_map* h;
1141 | _Bt index;
1142 | _Bt exists;
1143 | void check_pointer() const {
1144 | return;
1145 | if(h!=nullptr && h->pcurrent != h->current.get()){
1146 | std::cout << "invalid cache pointer: not equal to actual" << std::endl;
1147 | }
1148 | }
1149 | inline kernel_ptr get_kernel() const {
1150 | check_pointer();
1151 | return h->pcurrent;
1152 | }
1153 | inline kernel_ptr get_kernel() {
1154 | check_pointer();
1155 | return const_cast(h)->pcurrent; // current.get();
1156 | }
1157 |
1158 | void increment() {
1159 | ++pos;
1160 | ++index;
1161 | auto k = get_kernel();
1162 | if (index == k->config.BITS_SIZE) {
1163 | const _Segment& s = k->get_segment(pos);
1164 | exists = s.exists;
1165 | index = k->get_segment_index(pos);
1166 | }
1167 |
1168 | }
1169 | public:
1170 | size_type pos;
1171 |
1172 | const_iterator() : h(nullptr){
1173 |
1174 | }
1175 | //~const_iterator() {
1176 |
1177 | //}
1178 | const_iterator
1179 | ( const basic_unordered_map* h, size_type pos, _Bt exists, _Bt index)
1180 | : pos(pos), h(h), exists(exists), index(index){
1181 |
1182 | }
1183 | const_iterator(const iterator& r) : h(nullptr){
1184 | (*this) = r;
1185 | }
1186 |
1187 | const_iterator& operator=(const iterator& r) {
1188 | pos = r.pos;
1189 | h = r.h;
1190 | index = r.index;
1191 | exists = r.exists;
1192 | return (*this);
1193 | }
1194 |
1195 | const_iterator& operator=(const const_iterator& r) {
1196 | pos = r.pos;
1197 | h = r.h;
1198 | index = r.index;
1199 | return (*this);
1200 | }
1201 |
1202 | const_iterator& operator++() {
1203 | do {
1204 | increment();
1205 | } while ((exists & (((_Bt)1) << index)) == (_Bt)0);
1206 | return (*this);
1207 | }
1208 | const_iterator operator++(int) {
1209 | const_iterator t = (*this);
1210 | ++(*this);
1211 | return t;
1212 | }
1213 | const _ElPair& operator*() const {
1214 | return get_kernel()->get_pair(pos);
1215 | }
1216 | const _ElPair *operator->() const {
1217 | _ElPair* ret = &(get_kernel()->get_pair(pos));
1218 | return ret;
1219 | }
1220 |
1221 | inline bool operator==(const const_iterator& r) const {
1222 |
1223 | return (pos == r.pos);
1224 | }
1225 | bool operator!=(const const_iterator& r) const {
1226 |
1227 | return (pos != r.pos);
1228 | }
1229 |
1230 | size_type get_pos() const {
1231 | return pos;
1232 | }
1233 |
1234 | };
1235 |
1236 | protected:
1237 | /// the default config for each hash instance
1238 | rabbit_config default_config;
1239 | key_compare key_c;
1240 | allocator_type alloc;
1241 |
1242 | void rehash(){
1243 | size_type to = current->key_mapper.next_size();
1244 | rehash(to);
1245 | }
1246 | void set_current(typename hash_kernel::ptr c){
1247 | current = c;
1248 | pcurrent = c.get();
1249 | }
1250 |
1251 |
1252 | typename hash_kernel::ptr current;
1253 | typedef std::vector kernel_stack;
1254 | hash_kernel* pcurrent;
1255 | inline void create_current(){
1256 | if(current==nullptr)
1257 | set_current(std::allocate_shared(alloc,key_c,alloc));
1258 | }
1259 |
1260 | iterator from_pos_empty(size_type pos) const {
1261 | return iterator(this, pos, 0, 0, 0);
1262 | }
1263 |
1264 | iterator from_pos(size_type pos) const {
1265 | const _Segment& s = pcurrent->get_segment(pos);
1266 | _Bt index = pcurrent->get_segment_index(pos);
1267 | _Bt bsize = pcurrent->config.BITS_SIZE;
1268 | return iterator(this,pos,s.exists,index,bsize);
1269 | }
1270 |
1271 | public:
1272 | float load_factor() const{
1273 | if(current==nullptr) return 0;
1274 | return current->load_factor();
1275 | }
1276 | size_type bucket_count() const {
1277 | if(current==nullptr) return 0;
1278 | return current->bucket_count();
1279 | }
1280 | size_type bucket_size ( size_type n ) const{
1281 | if(current==nullptr) return 0;
1282 | return current->bucket_size ( n );
1283 | }
1284 | float max_load_factor() const {
1285 | if(current==nullptr) return 1;
1286 | return current->max_load_factor();
1287 | }
1288 |
1289 | void max_load_factor ( float z ){
1290 | create_current();
1291 | current->max_load_factor(z);
1292 | }
1293 | bool empty() const {
1294 | if(current==nullptr) return true;
1295 | return current->size() == 0;
1296 | }
1297 | void reserve(size_type atleast){
1298 | create_current();
1299 | rehash((size_type)((double)atleast*current->get_resize_factor()));
1300 | }
1301 | void resize(size_type atleast) {
1302 | create_current();
1303 | size_type calc = current->key_mapper.nearest_larger(atleast);
1304 | rehash(calc,false); // avoid randomization
1305 | }
1306 | /// called when we dont want pure stl semantics
1307 | void rehash(size_type to_, bool check_lf = true) {
1308 | create_current();
1309 | rabbit_config config;
1310 | size_type to = std::max(to_, config.MIN_EXTENT);
1311 | /// can cause oom e because of recursive rehash'es
1312 |
1313 | typename hash_kernel::ptr rehashed = std::allocate_shared(alloc);
1314 | size_type extent = current->get_extent();
1315 | size_type new_extent = to;
1316 | size_type nrand_probes = current->get_rand_probes();
1317 | hash_kernel * reh = rehashed.get();
1318 | hash_kernel * cur = current.get();
1319 | try{
1320 | //std::cout << " load factor " << current->load_factor() << " for " << current->size() << " elements and collision factor " << current->collision_factor() << std::endl;
1321 | //std::cout << " capacity " << current->capacity() << std::endl;
1322 | if(check_lf && current->load_factor() < 0.15){
1323 | //std::cout << "possible attack/bad hash detected : using random probes : " << current->get_probes() << " : " << extent << " : " << current->get_logarithmic() << std::endl;
1324 | nrand_probes = 1;
1325 | }
1326 | rehashed->set_logarithmic(current->get_logarithmic());
1327 | rehashed->mf = (*this).current->mf;
1328 | rehashed->min_load_factor(this->current->min_load_factor());
1329 | rehashed->set_rand_probes(nrand_probes);
1330 | rehashed->resize_clear(new_extent);
1331 | using namespace std;
1332 | while(true){
1333 | iterator e = end();
1334 | size_type ctr = 0;
1335 | bool rerehashed = false;
1336 | //_K k;
1337 | for(iterator i = begin();i != e;++i){
1338 | //std::swap(k,(*i).first);
1339 | _V* v = reh->subscript((*i).first);
1340 | if(v != nullptr){
1341 | *v = i->second;
1342 | /// a cheap check to illuminate subtle bugs during development
1343 | if(++ctr != rehashed->elements){
1344 | cout << "iterations " << ctr << " elements " << rehashed->elements << " extent " << rehashed->get_extent() << endl;
1345 | cout << "inside rehash " << rehashed->get_extent() << endl;
1346 | cout << "new " << rehashed->elements << " current size:" << current->elements << endl;
1347 | throw bad_alloc();
1348 | }
1349 | }else{
1350 | //std::cout << "rehashing in rehash " << ctr << " of " << current->elements << std::endl;
1351 | rerehashed = true;
1352 | new_extent = rehashed->key_mapper.next_size();
1353 | rehashed = std::allocate_shared(alloc);
1354 | rehashed->resize_clear(new_extent);
1355 | rehashed->mf = (*this).current->mf;
1356 | rehashed->min_load_factor(this->current->min_load_factor());
1357 | rehashed->set_rand_probes(nrand_probes);
1358 | rehashed->set_logarithmic(current->get_logarithmic());
1359 | reh = rehashed.get();
1360 |
1361 | // i = begin(); // start over
1362 | //ctr = 0;
1363 | break;
1364 |
1365 | }
1366 | }
1367 | if(rehashed->elements == current->elements){
1368 | break;
1369 | }else if(!rerehashed){
1370 | cout << "hash error: unequal key count - retry rehash " << endl;
1371 | cout << "iterations " << ctr << " elements " << rehashed->elements << " extent " << rehashed->get_extent() << endl;
1372 | cout << "new " << rehashed->elements << " current size:" << current->elements << endl;
1373 | throw bad_alloc();
1374 | }else{
1375 | //cout << "re-rehash iterations " << ctr << " elements " << rehashed->elements << " extent " << rehashed->get_extent() << endl;
1376 |
1377 | //rehashed->resize_clear(rehashed->get_extent());
1378 | //break;
1379 | }
1380 |
1381 | }
1382 |
1383 | }catch(std::bad_alloc &e){
1384 | std::cout << "bad allocation: rehash failed in temp phase :" << new_extent << std::endl;
1385 | size_t t = 0;
1386 | std::cin >> t;
1387 | throw e;
1388 | }
1389 | set_current(rehashed);
1390 |
1391 | }
1392 | void clear(){
1393 | if(current!=nullptr)
1394 | current->clear();
1395 | pcurrent = nullptr;
1396 | current = nullptr;
1397 | ///set_current(std::allocate_shared(alloc));
1398 | }
1399 |
1400 | void clear(const key_compare& compare,const allocator_type& allocator){
1401 | set_current(std::allocate_shared(allocator,compare, allocator));
1402 | }
1403 |
1404 | basic_unordered_map() :current(nullptr),pcurrent(nullptr){
1405 | //
1406 | }
1407 |
1408 | basic_unordered_map(const key_compare& compare,const allocator_type& allocator) : key_c(compare),alloc(allocator),pcurrent(nullptr){
1409 |
1410 | }
1411 |
1412 | basic_unordered_map(const basic_unordered_map& right) {
1413 | *this = right;
1414 | }
1415 |
1416 | ~basic_unordered_map(){
1417 |
1418 | }
1419 |
1420 | void swap(basic_unordered_map& with){
1421 | typename hash_kernel::ptr t = with.current;
1422 | with.set_current(this->current);
1423 | this->set_current(t);
1424 | }
1425 |
1426 | void move(basic_unordered_map& from){
1427 | (*this).set_current(from.current);
1428 | from.set_current(nullptr);
1429 | }
1430 |
1431 | basic_unordered_map& operator=(const basic_unordered_map& right){
1432 | (*this).set_current(std::allocate_shared(alloc));
1433 | (*this).reserve(right.size());
1434 | const_iterator e = right.end();
1435 | for(const_iterator c = right.begin(); c!=e;++c){
1436 | (*this)[(*c).first] = (*c).second;
1437 | }
1438 |
1439 | return *this;
1440 | }
1441 |
1442 | hasher hash_function() const {
1443 | return (this->current->hf);
1444 | }
1445 |
1446 | key_equal key_eq() const {
1447 | if(current!=nullptr)
1448 | return (this->current->eq_f);
1449 | return key_equal();
1450 | }
1451 |
1452 | void set_min_load_factor(float x){
1453 | create_current();
1454 | current->min_load_factor(x);
1455 | }
1456 | iterator insert(const _K& k,const _V& v){
1457 | create_current();
1458 | (*this)[k] = v;
1459 | return from_pos(current->last_modified);
1460 | }
1461 |
1462 | iterator insert(const std::pair<_K,_V>& p){
1463 |
1464 | return iterator(this, insert(p.first, p.second));
1465 | }
1466 | /// generic template copy
1467 | template
1468 | iterator insert(_Iter start, _Iter _afterLast){
1469 | create_current();
1470 | for(_Iter i = start; i != _afterLast; ++i){
1471 | insert((*i).first, (*i).second);
1472 | }
1473 | return from_pos(current->last_modified);
1474 | }
1475 | /// fast getter that doesnt use iterators and doesnt change the table without letting you know
1476 | bool get(const _K& k, _V& v) const {
1477 | if(current!=nullptr)
1478 | return (*this).current->get(k,v);
1479 | return false;
1480 | }
1481 | /// throws a exception when value could not match the key
1482 | const _V& at(const _K& k) const {
1483 | if(current == nullptr) throw std::exception();
1484 | return (*this).current->at(k);
1485 | }
1486 | _V& at(const _K& k) {
1487 | create_current();
1488 | return (*this).current->at(k);
1489 | }
1490 |
1491 | bool error(const _K& k){
1492 | _V *rv = current->subscript(k);
1493 | return rv==nullptr;
1494 | }
1495 |
1496 | _V& operator[](const _K& k){
1497 | create_current();
1498 | _V *rv = current->subscript(k);
1499 | while(rv == nullptr){
1500 | this->rehash();
1501 | rv = current->subscript(k);
1502 | }
1503 | return *rv;
1504 | }
1505 | size_type erase(const _K& k){
1506 | if(current==nullptr) return size_type();
1507 | //if(current->is_small()){
1508 | // rehash(1);
1509 | //}
1510 | return current->erase(k);
1511 | }
1512 | size_type erase(iterator i){
1513 | return erase((*i).first);
1514 | }
1515 | size_type erase(const_iterator i){
1516 | return erase((*i).first);
1517 | }
1518 | size_type count(const _K& k) const {
1519 | if(current == nullptr)return size_type();
1520 | return current->count(k);
1521 | }
1522 | iterator find(const _K& k) const {
1523 | if(current == nullptr) return from_pos_empty(size_type());
1524 | return from_pos(current->find(k));
1525 | }
1526 | iterator begin() const {
1527 | if(current==nullptr) return from_pos_empty(size_type());
1528 | return from_pos(current->begin());
1529 | }
1530 | iterator end() const {
1531 | if(current==nullptr) return from_pos_empty(size_type());
1532 | return from_pos(current->end());
1533 | }
1534 | const_iterator cbegin() const {
1535 | return begin();
1536 | }
1537 | const_iterator cend() const {
1538 | return end();
1539 | }
1540 | size_type size() const {
1541 | if(current==nullptr)return size_type();
1542 | return current->size();
1543 | }
1544 | void set_logarithmic(size_type logarithmic){
1545 | create_current();
1546 | this->current->set_logarithmic(logarithmic);
1547 | }
1548 | };
1549 | /// the stl compatible unordered map interface
1550 | template
1551 | < class _Kty
1552 | , class _Ty
1553 | , class _Hasher = rabbit_hash<_Kty>
1554 | , class _Keyeq = std::equal_to<_Kty>
1555 | , class _Alloc = std::allocator >
1556 | , class _Traits = default_traits
1557 | >
1558 | class unordered_map : public basic_unordered_map<_Kty, _Ty, _Hasher, _Keyeq, _Alloc, _Traits>
1559 | { // hash table of {key, mapped} values, unique keys
1560 | public:
1561 | typedef basic_unordered_map<_Kty, _Ty, _Hasher, _Keyeq, _Alloc, _Traits> _Base;
1562 |
1563 | typedef unordered_map<_Kty, _Ty, _Hasher, _Keyeq, _Alloc, _Traits> _Myt;
1564 |
1565 | typedef _Hasher hasher;
1566 | typedef _Kty key_type;
1567 | typedef _Ty mapped_type;
1568 | typedef _Keyeq key_equal;
1569 | typedef typename _Base::key_compare key_compare;
1570 |
1571 | // typedef typename _Base::value_compare value_compare;
1572 | typedef typename _Base::allocator_type allocator_type;
1573 | typedef typename _Base::size_type size_type;
1574 | typedef typename _Base::difference_type difference_type;
1575 | typedef typename _Base::pointer pointer;
1576 | typedef typename _Base::const_pointer const_pointer;
1577 | typedef typename _Base::reference reference;
1578 | typedef typename _Base::const_reference const_reference;
1579 | typedef typename _Base::iterator iterator;
1580 | typedef typename _Base::const_iterator const_iterator;
1581 | // typedef typename _Base::reverse_iterator reverse_iterator;
1582 | // typedef typename _Base::const_reverse_iterator
1583 | // const_reverse_iterator;
1584 | typedef typename _Base::value_type value_type;
1585 |
1586 | typedef typename _Base::iterator local_iterator;
1587 | typedef typename _Base::const_iterator const_local_iterator;
1588 |
1589 | unordered_map()
1590 | : _Base(key_compare(), allocator_type())
1591 | { // construct empty map from defaults
1592 | }
1593 |
1594 | explicit unordered_map(const allocator_type& a)
1595 | : _Base(key_compare(), a)
1596 | { // construct empty map from defaults, allocator
1597 | }
1598 |
1599 | unordered_map(const _Myt& _Right)
1600 | : _Base(_Right)
1601 | { // construct map by copying _Right
1602 | }
1603 |
1604 | //unordered_map(const _Myt& _Right, const allocator_type& _Al)
1605 | // : _Base(_Right, _Al)
1606 | // { // construct map by copying _Right, allocator
1607 | // }
1608 |
1609 | explicit unordered_map(size_type _Buckets)
1610 | : _Base(key_compare(), allocator_type())
1611 | { // construct empty map from defaults, ignore initial size
1612 | this->rehash(_Buckets);
1613 | }
1614 |
1615 | unordered_map(size_type _Buckets, const hasher& _Hasharg)
1616 | : _Base(key_compare(_Hasharg), allocator_type())
1617 | { // construct empty map from hasher
1618 | this->rehash(_Buckets);
1619 | }
1620 |
1621 | unordered_map
1622 | ( size_type _Buckets
1623 | , const hasher& _Hasharg
1624 | , const _Keyeq& _Keyeqarg
1625 | )
1626 | : _Base(key_compare(_Hasharg, _Keyeqarg), allocator_type())
1627 | { // construct empty map from hasher and equality comparator
1628 | this->rehash(_Buckets);
1629 | }
1630 |
1631 | unordered_map
1632 | ( size_type _Buckets
1633 | , const hasher& _Hasharg
1634 | , const _Keyeq& _Keyeqarg
1635 | , const allocator_type& a
1636 | )
1637 | : _Base(key_compare(_Hasharg, _Keyeqarg), a)
1638 | { // construct empty map from hasher and equality comparator
1639 | this->rehash(_Buckets);
1640 | }
1641 |
1642 | template
1643 | unordered_map
1644 | ( _Iter _First
1645 | , _Iter _Last
1646 | )
1647 | : _Base(key_compare(), allocator_type())
1648 | { // construct map from sequence, defaults
1649 | _Base::insert(_First, _Last);
1650 | }
1651 |
1652 | template
1653 | unordered_map
1654 | ( _Iter _First
1655 | , _Iter _Last
1656 | , size_type _Buckets
1657 | )
1658 | : _Base(key_compare(), allocator_type())
1659 | { // construct map from sequence, ignore initial size
1660 | this->rehash(_Buckets);
1661 | _Base::insert(_First, _Last);
1662 | }
1663 |
1664 | template
1665 | unordered_map
1666 | ( _Iter _First
1667 | , _Iter _Last
1668 | , size_type _Buckets
1669 | , const hasher& _Hasharg
1670 | )
1671 | : _Base(key_compare(_Hasharg), allocator_type())
1672 | {
1673 | this->rehash(_Buckets);
1674 | _Base::insert(_First, _Last);
1675 | }
1676 |
1677 | template
1678 | unordered_map
1679 | ( _Iter _First
1680 | , _Iter _Last
1681 | , size_type _Buckets
1682 | , const hasher& _Hasharg
1683 | , const _Keyeq& _Keyeqarg
1684 | )
1685 | : _Base(key_compare(_Hasharg, _Keyeqarg), allocator_type())
1686 | {
1687 | this->rehash(_Buckets);
1688 | _Base::insert(_First, _Last);
1689 | }
1690 |
1691 | template
1692 | unordered_map
1693 | ( _Iter _First
1694 | , _Iter _Last
1695 | , size_type _Buckets
1696 | , const hasher& _Hasharg
1697 | , const _Keyeq& _Keyeqarg
1698 | , const allocator_type& _Al
1699 | )
1700 | : _Base(key_compare(_Hasharg, _Keyeqarg), _Al)
1701 | {
1702 | this->rehash(_Buckets);
1703 | _Base::insert(_First, _Last);
1704 | }
1705 |
1706 | _Myt& operator=(const _Myt& _Right){ // assign by copying _Right
1707 | _Base::operator=(_Right);
1708 | return (*this);
1709 | }
1710 |
1711 | unordered_map(_Myt&& from)
1712 | {
1713 | _Base::move(from);
1714 | }
1715 |
1716 | unordered_map(_Myt&& from, const allocator_type& _Al)
1717 | : _Base(key_compare(), _Al)
1718 | { // construct map by moving _Right, allocator
1719 | _Base::move(from);
1720 | }
1721 |
1722 | _Myt& operator=(_Myt&& from){ // assign by moving _Right
1723 | _Base::move(from);
1724 | return (*this);
1725 | }
1726 | const mapped_type& at(const key_type& k) const {
1727 | return _Base::at(k);
1728 | }
1729 |
1730 | mapped_type& at(const key_type& k) {
1731 | return _Base::at(k);
1732 | }
1733 |
1734 | mapped_type& operator[](const key_type& k){
1735 | // find element matching _Keyval or insert with default mapped
1736 | return _Base::operator[](k);
1737 | }
1738 |
1739 | // find element matching _Keyval or insert with default mapped
1740 | mapped_type& operator[](key_type&& k){
1741 | return (*this)[k];
1742 | }
1743 |
1744 | void swap(_Myt& _Right){ // exchange contents with non-movable _Right
1745 | _Base::swap(_Right);
1746 | }
1747 | };
1748 |
1749 |
1750 | template
1751 | < class _Kty
1752 | , class _Ty
1753 | , class _Hasher = rabbit_hash<_Kty>
1754 | , class _Keyeq = std::equal_to<_Kty>
1755 | , class _Alloc = std::allocator >
1756 | , class _Traits = sparse_traits
1757 | >
1758 | class sparse_unordered_map : public unordered_map<_Kty, _Ty, _Hasher, _Keyeq, _Alloc, _Traits>
1759 | {
1760 | public:
1761 | typedef _Hasher hasher;
1762 | typedef _Kty key_type;
1763 | typedef _Ty mapped_type;
1764 | typedef _Keyeq key_equal;
1765 | typedef unordered_map<_Kty, _Ty, _Hasher, _Keyeq, _Alloc, _Traits> _Base;
1766 | typedef typename _Base::key_compare key_compare;
1767 |
1768 | // typedef typename _Base::value_compare value_compare;
1769 | typedef typename _Base::allocator_type allocator_type;
1770 | typedef typename _Base::size_type size_type;
1771 | typedef typename _Base::difference_type difference_type;
1772 | typedef typename _Base::pointer pointer;
1773 | typedef typename _Base::const_pointer const_pointer;
1774 | typedef typename _Base::reference reference;
1775 | typedef typename _Base::const_reference const_reference;
1776 | typedef typename _Base::iterator iterator;
1777 | typedef typename _Base::const_iterator const_iterator;
1778 | // typedef typename _Base::reverse_iterator reverse_iterator;
1779 | // typedef typename _Base::const_reverse_iterator
1780 | // const_reverse_iterator;
1781 | typedef typename _Base::value_type value_type;
1782 |
1783 | typedef typename _Base::iterator local_iterator;
1784 | typedef typename _Base::const_iterator const_local_iterator;
1785 | sparse_unordered_map(){
1786 | }
1787 | ~sparse_unordered_map(){
1788 | }
1789 | };
1790 |
1791 | }; // rab-bit
1792 |
1793 | #endif /// _RABBIT_H_CEP_20150303_
1794 |
--------------------------------------------------------------------------------
/rabbit/rabbit_set.h:
--------------------------------------------------------------------------------
1 | #ifndef _RABBIT_H_CEP_20150303_
2 | #define _RABBIT_H_CEP_20150303_
3 | /**
4 | The MIT License (MIT)
5 | Copyright (c) 2015 Christiaan Pretorius
6 | Permission is hereby granted, free of charge, to any person obtaining a copy
7 | of this software and associated documentation files (the "Software"), to deal
8 | in the Software without restriction, including without limitation the rights
9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 | **/
22 | #include
23 | #include
24 | #include
25 | #include
26 | #include
27 | #include
28 | #include
29 | #include
30 | #include
31 | #include
32 | #include
33 | /// the rab-bit hash
34 | /// probably the worlds simplest working hashtable - only kiddingk
35 | /// it uses linear probing for the first level of fallback and then a overflow area or secondary hash
36 |
37 | #ifdef _MSC_VER
38 | #define RABBIT_NOINLINE_PRE _declspec(noinline)
39 | #define RABBIT_NOINLINE_
40 | #else
41 | #define RABBIT_NOINLINE_PRE
42 | #define RABBIT_NOINLINE_ __attribute__((noinline))
43 | #endif
44 | namespace rabbit {
45 |
46 | /// a very basic version of std::pair which keeps references only
47 | struct end_iterator {
48 | };
49 | template
50 | < class _Ty1
51 | , class _Ty2
52 | >
53 | struct ref_pair {
54 | // store references to a pair of values
55 |
56 | typedef ref_pair<_Ty1, _Ty2> _Myt;
57 | typedef _Ty1 first_type;
58 | typedef _Ty2 second_type;
59 |
60 | // construct from specified non const values
61 | ref_pair(_Ty1& _Val1, _Ty2& _Val2)
62 | : first(_Val1)
63 | , second(_Val2)
64 | {
65 | }
66 | /// rely on the compiler default to do this
67 | //ref_pair(_Myt& _Right)
68 | //: first(_Right.first)
69 | //, second(_Right.second)
70 | //{
71 | //}
72 | _Myt& operator=(const _Myt& _Right) {
73 | first = _Right.first;
74 | second = _Right.second;
75 | return (*this);
76 | }
77 |
78 | operator std::pair<_Ty1, _Ty2>() {
79 | return std::make_pair(first, second);
80 | }
81 |
82 | operator const std::pair<_Ty1, _Ty2>() const {
83 | return std::make_pair(first, second);
84 | }
85 |
86 | _Ty1& first; // the first stored value
87 | _Ty2& second; // the second stored value
88 | };
89 |
90 |
91 | template
92 | struct _BinMapper {
93 | typedef typename _Config::size_type size_type;
94 | typedef _Config config_type;
95 | size_type extent;
96 | size_type extent1;
97 | size_type extent2;
98 | size_type primary_bits;
99 | size_type random_val;
100 | unsigned long long gate_bits;
101 | _Config config;
102 | _BinMapper() {
103 | }
104 | _BinMapper(size_type new_extent, const _Config& config) {
105 | this->config = config;
106 | this->extent = ((size_type)1) << this->config.log2(new_extent);
107 | this->extent1 = this->extent - 1;
108 | this->extent2 = this->config.log2(new_extent);
109 | this->primary_bits = extent2;
110 | //std::minstd_rand rd;
111 | //std::mt19937 gen(rd());
112 | //std::uniform_int_distribution dis(1ll<<4, std::numeric_limits::max());
113 | this->random_val = 0; //(size_type)dis(gen);
114 | if (new_extent < (1ll << 32ll)) {
115 | this->gate_bits = (1ll << 32ll) - 1ll;
116 | }
117 | else {
118 | this->gate_bits = (1ll << 62ll) - 1ll;
119 | }
120 | }
121 | inline size_type nearest_larger(size_type any) {
122 | size_type l2 = this->config.log2(any);
123 | return (size_type)(2ll << (l2 + 1ll));
124 | }
125 | inline size_type randomize(size_type other) const {
126 | size_type r = other >> this->primary_bits;
127 | return other + (r*r); //(other ^ random_val) & this->extent1;
128 | }
129 | inline size_type operator()(size_type h_n) const {
130 | size_type h = h_n & this->gate_bits;
131 | //h += (h>>this->primary_bits);
132 | return h & this->extent1; //
133 |
134 | }
135 | double resize_factor() const {
136 | return 2;
137 | }
138 | double recalc_growth_factor(size_type elements) {
139 | return 2;
140 | }
141 |
142 | inline size_type next_size() {
143 |
144 | double r = recalc_growth_factor(this->extent) * this->extent;
145 | assert(r > (double)extent);
146 | return (size_type)r;
147 | }
148 | };
149 | template
150 | struct rabbit_hash {
151 | size_t operator()(const _Ht& k) const {
152 | return (size_t)std::hash<_Ht>()(k); ///
153 | };
154 | };
155 | template<>
156 | struct rabbit_hash {
157 | unsigned long operator()(const long& k) const {
158 | return (unsigned long)k;
159 | };
160 | };
161 | template<>
162 | struct rabbit_hash {
163 | inline unsigned long operator()(const unsigned long& k) const {
164 | return k;
165 | };
166 | };
167 | template<>
168 | struct rabbit_hash {
169 | inline unsigned int operator()(const unsigned int& k) const {
170 | return k;
171 | };
172 | };
173 | template<>
174 | struct rabbit_hash {
175 | inline unsigned int operator()(const int& k) const {
176 | return k;
177 | };
178 | };
179 | template<>
180 | struct rabbit_hash {
181 | inline unsigned long long operator()(const unsigned long long& k) const {
182 | return k;
183 | };
184 | };
185 | template<>
186 | struct rabbit_hash {
187 | inline unsigned long long operator()(const long long& k) const {
188 | return (unsigned long)k;
189 | };
190 | };
191 | template
192 | class basic_config {
193 | public:
194 | typedef unsigned long long int _Bt; /// exists ebucket type - not using vector - interface does not support bit bucketing
195 | /// if even more speed is desired but you'r willing to live with a 4 billion key limit then
196 | //typedef unsigned long size_type;
197 | typedef std::size_t size_type;
198 |
199 | size_type log2(size_type n) {
200 | size_type r = 0;
201 | while (n >>= 1)
202 | {
203 | r++;
204 | }
205 | return r;
206 | }
207 | _Bt CHAR_BITS;
208 | _Bt BITS_SIZE;
209 | _Bt BITS_SIZE1;
210 | _Bt ALL_BITS_SET;
211 | _Bt LOGARITHMIC;
212 | /// maximum probes per access
213 | size_type PROBES; /// a value of 32 gives a little more speed but much larger table size(> twice the size in some cases)
214 | size_type BITS_LOG2_SIZE;
215 | /// this distributes the h values which are powers of 2 a little to avoid primary clustering when there is no
216 | /// hash randomizer available
217 |
218 | size_type MIN_EXTENT;
219 | size_type MAX_OVERFLOW_FACTOR;
220 |
221 | basic_config(const basic_config& right) {
222 | *this = right;
223 | }
224 |
225 | basic_config& operator=(const basic_config& right) {
226 | CHAR_BITS = right.CHAR_BITS;
227 | BITS_SIZE = right.BITS_SIZE;
228 | BITS_SIZE1 = right.BITS_SIZE1;
229 | BITS_LOG2_SIZE = right.BITS_LOG2_SIZE;
230 | ALL_BITS_SET = right.ALL_BITS_SET;
231 | PROBES = right.PROBES; /// a value of 32 gives a little more speed but much larger table size(> twice the size in some cases)
232 | MIN_EXTENT = right.MIN_EXTENT;
233 | MAX_OVERFLOW_FACTOR = right.MAX_OVERFLOW_FACTOR;
234 | LOGARITHMIC = right.LOGARITHMIC;
235 | return *this;
236 | }
237 |
238 | basic_config() {
239 | CHAR_BITS = 8;
240 | BITS_SIZE = (sizeof(_Bt) * CHAR_BITS);
241 | BITS_SIZE1 = BITS_SIZE - 1;
242 | BITS_LOG2_SIZE = (size_type)log2((size_type)BITS_SIZE);
243 | ALL_BITS_SET = ~(_Bt)0;
244 | PROBES = 12;
245 | MIN_EXTENT = 4; /// start size of the hash table
246 | MAX_OVERFLOW_FACTOR = 1<<17;
247 | LOGARITHMIC = logarithmic;
248 |
249 | }
250 | };
251 | template
252 | struct basic_traits {
253 | typedef typename _InMapper::config_type rabbit_config;
254 | typedef typename rabbit_config::_Bt _Bt;
255 | typedef typename rabbit_config::size_type size_type;
256 | typedef ptrdiff_t difference_type;
257 | typedef _InMapper _Mapper;
258 | };
259 | typedef basic_traits<_BinMapper> > default_traits;
260 | typedef basic_traits<_BinMapper> > sparse_traits;
261 |
262 |
263 | template
264 | < class _K
265 | , class _V
266 | , class _H = rabbit_hash<_K>
267 | , class _E = std::equal_to<_K>
268 | , class _Allocator = std::allocator<_K>
269 | , class _Traits = default_traits
270 | >
271 | class basic_unordered_set {
272 | public:
273 | typedef _K key_type;
274 |
275 | typedef _V mapped_type;
276 |
277 | typedef ref_pair<_K, _V> _ElPair;
278 | typedef ref_pair _ConstElPair;
279 | typedef _ElPair value_type;
280 | typedef _ConstElPair const_value_type;
281 | typedef typename _Traits::_Bt _Bt; /// exists ebucket type - not using vector - interface does not support bit bucketing
282 | typedef typename _Traits::size_type size_type;
283 | typedef typename _Traits::rabbit_config rabbit_config;
284 | typedef typename _Traits::_Mapper _Mapper;
285 | typedef typename _Traits::difference_type difference_type;
286 |
287 | typedef _Allocator allocator_type;
288 | typedef _ElPair* pointer;
289 | typedef const _ElPair* const_pointer;
290 | typedef _ElPair& reference;
291 | typedef const _ElPair& const_reference;
292 | // typedef typename _Base::reverse_iterator reverse_iterator;
293 | // typedef typename _Base::const_reverse_iterator
294 | // const_reverse_iterator;
295 |
296 | typedef _E key_equal;
297 | typedef _E key_compare;
298 | typedef _H hasher;
299 | typedef _V* _RV;
300 |
301 | static const size_type end_pos = std::numeric_limits::max();
302 | protected:
303 | struct overflow_stats {
304 | size_type start_elements;
305 | size_type end_elements;
306 | overflow_stats() : start_elements(0), end_elements(0) {}
307 | };
308 |
309 | struct _KeySegment {
310 |
311 | public:
312 | _Bt overflows;
313 | _Bt exists;
314 |
315 | private:
316 | void set_bit(_Bt& w, _Bt index, bool f) {
317 |
318 | #ifdef _MSC_VER
319 | #pragma warning(disable:4804)
320 | #endif
321 | _Bt m = (_Bt)1ul << index;// the bit mask
322 | w ^= (-f ^ w) & m;
323 | ///w = (w & ~m) | (-f & m);
324 | }
325 |
326 | public:
327 |
328 | inline bool all_exists() const {
329 | return (exists == ~(_Bt)0);
330 | }
331 |
332 | inline bool none_exists() const {
333 | return (exists == (_Bt)0);
334 | }
335 |
336 | inline bool is_exists(_Bt bit) const {
337 | return ((exists >> bit) & (_Bt)1ul);
338 | }
339 |
340 | inline bool is_overflows(_Bt bit) const {
341 | return ((overflows >> bit) & (_Bt)1ul);
342 | }
343 |
344 | inline void set_exists(_Bt index, bool f) {
345 | set_bit(exists, index, f);
346 | }
347 |
348 | inline void toggle_exists(_Bt index) {
349 | exists ^= ((_Bt)1 << index);
350 | }
351 |
352 | void set_overflows(_Bt index, bool f) {
353 | set_bit(overflows, index, f);
354 | }
355 |
356 | void clear() {
357 | exists = 0;
358 | overflows = 0;
359 | }
360 |
361 | _KeySegment() {
362 | exists = 0;
363 | overflows = 0;
364 | }
365 | };
366 | //typedef _PairSegment _Segment;
367 | typedef _KeySegment _Segment;
368 | /// the vector that will contain the segmented mapping pairs and flags
369 |
370 | typedef std::vector<_Segment, _Allocator> _Segments;
371 | typedef std::vector<_K, _Allocator> _Keys;
372 | typedef std::vector<_V, _Allocator> _Values;
373 |
374 | struct hash_kernel {
375 |
376 | /// settings configuration
377 | rabbit_config config;
378 | size_type elements;
379 | size_type initial_probes;
380 | size_type probes;
381 | size_type rand_probes; /// used when there might be an attack
382 | size_type last_modified;
383 | size_type random_val;
384 | /// the existence bit set is a factor of BITS_SIZE+1 less than the extent
385 | _Segment* clusters;///a.k.a. pages
386 | _Keys keys;
387 | ///_Values values;
388 | _V* values;
389 |
390 | size_type overflow;
391 | size_type overflow_elements;
392 | overflow_stats stats;
393 | _Mapper key_mapper;
394 | _H hf;
395 | _E eq_f;
396 | float mf;
397 | size_type buckets;
398 | size_type removed;
399 | bool keys_overflowed;
400 | _Allocator allocator;
401 | _K empty_key;
402 | //bool sparse;
403 | size_type logarithmic;
404 |
405 | typename _Allocator::template rebind<_Segment>::other get_segment_allocator() {
406 | return typename _Allocator::template rebind<_Segment>::other(allocator);
407 | }
408 |
409 | typename _Allocator::template rebind<_V>::other get_value_allocator() {
410 | return typename _Allocator::template rebind<_V>::other(allocator);
411 | }
412 |
413 | /// the minimum load factor
414 | float load_factor() const {
415 | return (float)((double)elements / (double)bucket_count());
416 | }
417 |
418 | /// there are a variable ammount of buckets there are at most this much
419 | ///
420 | size_type bucket_count() const {
421 |
422 | return get_data_size();
423 | }
424 | /// the size of a bucket can be calculated based on the
425 | /// hash value of its first occupant
426 | /// mainly to satisfy stl conventions
427 | size_type bucket_size(size_type n) const {
428 | size_type pos = n;
429 | if (!overflows_(pos)) {
430 | if (exists_(pos) && map_key(get_segment_key(pos)) == n)
431 | return 1;
432 | else return 0;
433 | }
434 | size_type m = pos + probes;
435 | size_type r = 0;
436 | for (; pos < m; ++pos) {
437 | if (!exists_(pos)) {
438 | }
439 | else if (map_key(get_segment_key(pos)) == n) {
440 | ++r;
441 | }
442 | }
443 | size_type e = end();
444 | for (pos = get_o_start(); pos < e; ) {
445 | if (!exists_(pos)) {
446 | }
447 | else if (map_key(get_segment_key(pos)) == n) {
448 | ++r;
449 | }
450 | ++pos;
451 | }
452 | return r;
453 | }
454 |
455 | float max_load_factor() const {
456 | return mf;
457 | }
458 |
459 | void max_load_factor(float z) {
460 | mf = z;
461 | }
462 |
463 | /// total data size, never less than than size()
464 | size_type get_data_size() const {
465 | return get_extent() + initial_probes + overflow;
466 | }
467 |
468 | /// the overflow start
469 | size_type get_o_start() const {
470 | return get_extent() + initial_probes;
471 | }
472 |
473 | size_type get_segment_number(size_type pos) const {
474 | return (pos >> config.BITS_LOG2_SIZE);
475 | }
476 |
477 | _Bt get_segment_index(size_type pos) const {
478 | return (_Bt)(pos & (config.BITS_SIZE1));
479 | }
480 |
481 | _Segment &get_segment(size_type pos) {
482 | return clusters[pos >> config.BITS_LOG2_SIZE];
483 | }
484 |
485 | const _Segment &get_segment(size_type pos) const {
486 | return clusters[get_segment_number(pos)];
487 | }
488 |
489 | const _ElPair get_segment_pair(size_type pos) const {
490 | return std::make_pair(get_segment_key(pos), get_segment_value(pos));
491 |
492 | }
493 |
494 | const _K & get_segment_key(size_type pos) const {
495 | return keys[pos];
496 | }
497 |
498 | _ElPair get_segment_pair(size_type pos) {
499 | return _ElPair(get_segment_key(pos), get_segment_value(pos));
500 | }
501 |
502 | _K & get_segment_key(size_type pos) {
503 | return keys[pos];
504 | }
505 | const _V & get_segment_value(size_type pos) const {
506 | return values[pos];
507 | }
508 |
509 | _V & get_segment_value(size_type pos) {
510 | return values[pos];
511 | }
512 | void set_segment_key(size_type pos, const _K &k) {
513 | keys[pos] = k;
514 | }
515 | void destroy_segment_value(size_type pos) {
516 | values[pos].~_V();
517 | }
518 | _V* create_segment_value(size_type pos, const _V &v) {
519 | _V* r = &values[pos];
520 | new (r) _V(v);
521 | return r;
522 | }
523 | _V* create_segment_value(size_type pos) {
524 | _V* r = &values[pos];
525 | new (r) _V();
526 | return r;
527 | }
528 | void set_segment_value(size_type pos, const _V &v) {
529 | values[pos] = v;
530 | }
531 |
532 | void set_exists(size_type pos, bool f) {
533 | last_modified = pos;
534 | get_segment(pos).set_exists(get_segment_index(pos), f);
535 | }
536 |
537 | void set_overflows(size_type pos, bool f) {
538 | get_segment(pos).set_overflows(get_segment_index(pos), f);
539 | }
540 |
541 | inline bool exists_(size_type pos) const {
542 | return get_segment(pos).is_exists(get_segment_index(pos));
543 | }
544 |
545 | inline bool overflows_(size_type pos) const {
546 | return get_segment(pos).is_overflows(get_segment_index(pos));
547 | }
548 |
549 | inline bool overflowed_(size_type pos) const {
550 | return false;
551 | }
552 |
553 | inline size_type map_key(const _K& k) const {
554 | size_type h = (size_type)_H()(k);
555 |
556 | return key_mapper(h);
557 |
558 | }
559 | inline size_type map_rand_key(const _K& k) const {
560 | size_type h = (size_type)_H()(k);
561 | if (this->rand_probes)
562 | return key_mapper(randomize(h));
563 | return key_mapper(h); //
564 | }
565 | size_type get_e_size() const {
566 | return (size_type)(get_data_size() / config.BITS_SIZE) + 1;
567 | }
568 |
569 | void clear_data() {
570 | if (values) {
571 | for (size_type pos = 0; pos < get_data_size(); ++pos) {
572 | if (exists_(pos)) {
573 | get_value_allocator().destroy(&values[pos]);
574 | }
575 | }
576 | }
577 | }
578 |
579 | void free_values() {
580 | if (values) {
581 | clear_data();
582 | get_value_allocator().deallocate(values, get_data_size());
583 | values = nullptr;
584 | }
585 | }
586 | void free_data() {
587 | free_values();
588 | if (clusters) {
589 | size_type esize = get_e_size();
590 | for (size_type c = 0; c < esize; ++c) {
591 | get_segment_allocator().destroy(&clusters[c]);
592 | }
593 | get_segment_allocator().deallocate(clusters, get_e_size());
594 | }
595 | values = nullptr;
596 | clusters = nullptr;
597 | }
598 | double get_resize_factor() const {
599 | return key_mapper.resize_factor();
600 | }
601 | size_type get_probes() const {
602 | return this->probes;
603 | }
604 | void set_rand_probes() {
605 | this->rand_probes = this->probes;
606 | }
607 | void set_rand_probes(size_type rand_probes) {
608 | this->rand_probes = rand_probes;
609 | }
610 |
611 | /// clears all data and resize the new data vector to the parameter
612 | void resize_clear(size_type new_extent) {
613 | /// inverse of factor used to determine overflow list
614 | /// when overflow list is full rehash starts
615 | free_data();
616 |
617 | key_mapper = _Mapper(new_extent, config);
618 |
619 | mf = 1.0;
620 | assert(config.MAX_OVERFLOW_FACTOR > 0);
621 | if (is_logarithmic()) {
622 | probes = config.log2(new_extent)*logarithmic;
623 | overflow = config.log2(new_extent)*logarithmic;
624 | }
625 | else {
626 | probes = config.PROBES; //config.log2(new_extent); //-config.log2(config.MIN_EXTENT/2); /// start probes config.PROBES; //
627 | overflow = std::max(config.PROBES, new_extent / config.MAX_OVERFLOW_FACTOR); //config.log2(new_extent); // *16*
628 | }
629 |
630 | rand_probes = 0;
631 |
632 | initial_probes = probes;
633 | //std::cout << "rehash with overflow:" << overflow << std::endl;
634 | elements = 0;
635 | removed = 0;
636 | empty_key = _K();
637 | keys_overflowed = false;
638 | overflow_elements = get_o_start();
639 | size_type esize = get_e_size();
640 | keys.resize(get_data_size());
641 | clusters = get_segment_allocator().allocate(esize);
642 | values = get_value_allocator().allocate(get_data_size());
643 | _KeySegment ks;
644 | for (size_type c = 0; c < esize; ++c) {
645 | get_segment_allocator().construct(&clusters[c], ks);
646 | }
647 | set_exists(get_data_size(), true);
648 | buckets = 0;
649 |
650 | };
651 |
652 | void clear() {
653 | size_type esize = get_e_size();
654 | //clear_data();
655 | for (size_type c = 0; c < esize; ++c) {
656 | clusters[c].clear();
657 | }
658 | set_exists(get_data_size(), true);
659 | elements = 0;
660 | removed = 0;
661 | //resize_clear(config.MIN_EXTENT);
662 | }
663 |
664 | hash_kernel(const key_compare& compare, const allocator_type& allocator)
665 | : clusters(nullptr), values(nullptr), eq_f(compare), mf(1.0f), allocator(allocator), logarithmic(config.LOGARITHMIC) {
666 | resize_clear(config.MIN_EXTENT);
667 | }
668 |
669 | hash_kernel() : clusters(nullptr), values(nullptr), mf(1.0f), logarithmic(config.LOGARITHMIC) {
670 | resize_clear(config.MIN_EXTENT);
671 | }
672 |
673 | hash_kernel(const hash_kernel& right) : clusters(nullptr), values(nullptr), mf(1.0f), logarithmic(config.LOGARITHMIC) {
674 | *this = right;
675 | }
676 |
677 | ~hash_kernel() {
678 | free_data();
679 | }
680 | inline size_type get_extent() const {
681 | return key_mapper.extent;
682 | }
683 | void set_logarithmic(size_type loga) {
684 | logarithmic = loga;
685 | }
686 | size_type get_logarithmic() const {
687 | return this->logarithmic;
688 | }
689 | bool is_logarithmic() const {
690 | return this->logarithmic > 0;
691 | }
692 | hash_kernel& operator=(const hash_kernel& right) {
693 | config = right.config;
694 | key_mapper = right.key_mapper;
695 | free_data();
696 | buckets = right.buckets;
697 | removed = right.removed;
698 | mf = right.mf;
699 | elements = right.elements;
700 | size_type esize = get_e_size();
701 |
702 | clusters = get_segment_allocator().allocate(esize);
703 | values = get_value_allocator().allocate(get_data_size());
704 |
705 | std::copy(values, right.values, right.values + right.get_data_size());
706 | keys = right.keys;
707 | std::copy(clusters, right.clusters, right.clusters + esize);
708 |
709 | return *this;
710 | }
711 | inline bool raw_equal_key(size_type pos, const _K& k) const {
712 | const _K& l = get_segment_key(pos); ///.key(get_segment_index(pos));
713 | return eq_f(l, k);
714 | }
715 | inline bool segment_equal_key_exists(size_type pos, const _K& k) const {
716 | _Bt index = get_segment_index(pos);
717 | const _Segment& s = get_segment(pos);
718 | return eq_f(get_segment_key(pos), k) && s.is_exists(index);
719 |
720 | }
721 |
722 | bool equal_key(size_type pos, const _K& k) const {
723 | const _K& l = get_segment_key(pos);
724 | return eq_f(l, k);
725 | }
726 |
727 | inline size_type randomize(size_type v) const {
728 | return key_mapper.randomize(v);
729 | }
730 |
731 | inline size_type hash_probe_incr(size_type i) const {
732 | return 1;
733 |
734 | }
735 | /// when all inputs to this function is unique relative to current hash map(i.e. they dont exist in the hashmap)
736 | /// and there where no erasures. for maximum fillrate in rehash
737 | _V* unique_subscript_rest(const _K& k, size_type origin) {
738 |
739 | size_type pos = map_rand_key(k);
740 |
741 | size_type start = 0;
742 | for (unsigned int i = 0; i < probes && pos < get_extent(); ++i) {
743 |
744 | if (!exists_(pos)) {
745 | set_exists(pos, true);
746 | set_segment_key(pos, k);
747 | set_overflows(origin, true);
748 | keys_overflowed = true;
749 | ++elements;
750 | last_modified = pos;
751 | return create_segment_value(pos);
752 | }
753 | pos += hash_probe_incr(i);
754 | }
755 |
756 | if (overflow_elements < end()) {
757 | pos = overflow_elements++;
758 |
759 | if (!exists_(pos)) {
760 | set_overflows(origin, true);
761 | keys_overflowed = true;
762 | set_exists(pos, true);
763 | set_segment_key(pos, k);
764 | ++elements;
765 |
766 | last_modified = pos;
767 | return create_segment_value(pos);
768 | }
769 | };
770 |
771 | return nullptr;
772 | }
773 | _V* unique_subscript(const _K& k) {
774 |
775 | /// eventualy an out of memory (bad_allocation) exception will occur
776 | size_type pos = map_key(k);
777 | _Bt si = get_segment_index(pos);
778 | _Segment &s = clusters[pos >> config.BITS_LOG2_SIZE];/// get_segment(pos)
779 |
780 | if (!s.is_exists(si)) { //!s.is_overflows(si)
781 | s.toggle_exists(si);
782 | set_segment_key(pos, k);
783 | ++elements;
784 |
785 | return create_segment_value(pos);
786 | }
787 |
788 | return unique_subscript_rest(k, pos);
789 | }
790 |
791 | _V* subscript_rest(const _K& k, size_type origin) {
792 | size_type pos = map_rand_key(k);
793 | for (unsigned int i = 0; i < probes && pos < get_extent(); ++i) {
794 | _Bt si = get_segment_index(pos);
795 | _Segment& s = get_segment(pos);
796 | if (!s.is_exists(si)) {
797 | s.toggle_exists(si);
798 | //s.key(si)=k;
799 | set_segment_key(pos, k);
800 | ++elements;
801 | set_overflows(origin, true);
802 | keys_overflowed = true;
803 | return create_segment_value(pos);
804 | }
805 | pos += hash_probe_incr(i);
806 | }
807 |
808 | size_type at_empty = end();
809 |
810 | if (overflow_elements < end()) {
811 | if (!exists_(overflow_elements)) {
812 | at_empty = overflow_elements++;
813 |
814 | }
815 | }
816 | else if (removed) {
817 | size_type e = end();
818 | for (pos = get_o_start(); pos < e; ) {
819 | if (!exists_(pos)) {
820 |
821 | at_empty = pos; break;
822 | }
823 | ++pos;
824 | }
825 | }
826 |
827 | pos = at_empty;
828 | if (pos != end()) {
829 | set_overflows(origin, true);
830 | keys_overflowed = true;
831 | set_exists(pos, true);
832 | set_segment_key(pos, k);
833 | size_type os = (overflow_elements - (get_extent() + initial_probes));
834 | if (os == 1) {
835 | stats.start_elements = elements;
836 | //std::cout << "overflow start: hash table size " << elements << " elements in over flow:" << os << std::endl;
837 | }
838 |
839 | if (overflow_elements == end() && stats.start_elements) {
840 | stats.end_elements = elements;
841 | size_type saved = stats.end_elements - stats.start_elements - os;
842 | double percent_saved = (100.0*((double)saved / (double)elements));
843 |
844 | // std::cout << "overflow end: hash table size " << elements << " elements in over flow:" << os << " saved : " << saved <<
845 | // std::endl << " percent saved " << std::setprecision(4) << percent_saved <<
846 | // std::endl;
847 | }
848 |
849 | ++elements;
850 | return create_segment_value(pos);
851 |
852 | }
853 | return nullptr;
854 | }
855 | _V* subscript(const _K& k) {
856 | size_type pos = map_key(k);
857 | _Bt si = get_segment_index(pos);
858 | _Segment& s = get_segment(pos);
859 | bool key_exists = s.is_exists(si);
860 | //key_overflows = s.is_overflows(si);
861 | if (!key_exists) { //!key_overflows &&
862 | s.toggle_exists(si);
863 | set_segment_key(pos, k);
864 | ++elements;
865 | return create_segment_value(pos);
866 | }
867 | else if (key_exists && equal_key(pos, k)) {
868 | return &(get_segment_value(pos));
869 | }
870 | bool key_overflows = s.is_overflows(si);
871 | size_type h = pos;
872 | if (key_overflows) {
873 | pos = find_rest(k, h);
874 | if (pos != end()) {
875 | return &(get_segment_value(pos));
876 | }
877 | }
878 | return subscript_rest(k, h);
879 | }
880 | size_type erase_rest(const _K& k, size_type origin)
881 | RABBIT_NOINLINE_ /// this function must never be inlined
882 | {
883 | size_type pos = find_rest(k, origin);
884 |
885 | if (pos != (*this).end()) {
886 | set_exists(pos, false);
887 | ++removed;
888 | set_segment_key(pos, empty_key);
889 | destroy_segment_value(pos);
890 | --elements;
891 | if (pos >= get_o_start()) {
892 | size_type c = get_o_start();
893 | for (; c < overflow_elements; ++c) {
894 | if (origin == map_key(get_segment_key(c))) {
895 | break;
896 | }
897 | }
898 | }
899 | return 1;
900 | }
901 | return 0;
902 | }
903 | size_type erase(const _K& k) {
904 |
905 | size_type pos = map_key(k);
906 |
907 | _Bt si = get_segment_index(pos);
908 | _Segment& s = get_segment(pos);
909 | if (s.is_exists(si) && equal_key(pos, k)) { ///get_segment(pos).exists == ALL_BITS_SET ||
910 | set_segment_key(pos, empty_key);
911 | s.toggle_exists(si);
912 | destroy_segment_value(pos);
913 | --elements;
914 | ++removed;
915 | return 1;
916 | }
917 | if (!s.is_overflows(si)) {
918 | return 0;
919 | }
920 | else
921 | return erase_rest(k, pos);
922 |
923 | }
924 | /// not used (could be used where hash table must actually shrink too)
925 | bool is_small() const {
926 | return (get_extent() > (config.MIN_EXTENT << 3)) && (elements < get_extent() / 8);
927 | }
928 |
929 | size_type count(const _K& k) const {
930 | size_type pos = (*this).find(k);
931 | if (pos == (*this).end()) {
932 | return 0;
933 | }
934 | else return 1;
935 | }
936 | const _V& at(const _K& k) const {
937 | size_type pos = find(k);
938 | if (pos != (*this).end()) {
939 | return get_segment_value(pos);
940 | }
941 | throw std::exception();
942 | }
943 | _V& at(const _K& k) {
944 | size_type pos = find(k);
945 | if (pos != (*this).end()) {
946 | return get_segment_value(pos);
947 | }
948 | throw std::exception();
949 | }
950 |
951 | bool get(const _K& k, _V& v) const {
952 | size_type pos = find(k);
953 | if (pos != (*this).end()) {
954 | v = get_segment_value(pos);
955 | return true;
956 | }
957 | return false;
958 | }
959 |
960 | size_type find_rest(const _K& k, size_type origin) const {
961 | /// randomization step for attack mitigation
962 | size_type pos = map_rand_key(k);
963 |
964 | for (unsigned int i = 0; i < probes && pos < get_extent();) {
965 | _Bt si = get_segment_index(pos);
966 | if (segment_equal_key_exists(pos, k)) {
967 | return pos;
968 | }
969 | pos += hash_probe_incr(i);
970 | ++i;
971 | }
972 |
973 | for (pos = get_o_start(); pos < overflow_elements; ) {
974 | if (equal_key(pos, k) && exists_(pos)) return pos;
975 | ++pos;
976 | }
977 |
978 | return end();
979 | }
980 | size_type find(const _K& k, size_type& pos) const {
981 | pos = map_key(k);
982 | bool is_empty = eq_f(empty_key, k);
983 |
984 | if (is_empty) {
985 | _Bt index = get_segment_index(pos);
986 | const _Segment& s = get_segment(pos);
987 | if (s.is_exists(index) && equal_key(pos, k)) { ///get_segment(pos).exists == ALL_BITS_SET ||
988 | return pos;
989 | }
990 | if (!s.is_overflows(index)) {
991 | return end();
992 | }
993 | }
994 | else {
995 | if (equal_key(pos, k)) return pos;
996 |
997 | }
998 | _Bt index = get_segment_index(pos);
999 | const _Segment& s = get_segment(pos);
1000 | if (!s.is_overflows(index)) {
1001 | return end();
1002 | }
1003 |
1004 | return find_rest(k, pos);
1005 | }
1006 | size_type find(const _K& k) const {
1007 |
1008 | size_type pos;
1009 | return find(k, pos);
1010 | }
1011 |
1012 | size_type begin() const {
1013 | if (elements == 0)
1014 | return end();
1015 | size_type pos = 0;
1016 | while (!exists_(pos)) {
1017 | ++pos;
1018 |
1019 | }
1020 | return pos;
1021 | }
1022 | size_type end() const {
1023 | return get_data_size();
1024 | }
1025 | size_type size() const {
1026 | return elements;
1027 | }
1028 | typedef std::shared_ptr ptr;
1029 | }; /// hash_kernel
1030 | typedef std::shared_ptr _KernelPtr;
1031 | typedef std::vector<_KernelPtr> _Kernels;
1032 | _Kernels versions;
1033 | public:
1034 |
1035 | struct iterator {
1036 | typedef hash_kernel* kernel_ptr;
1037 | const basic_unordered_set* h;
1038 | size_type pos;
1039 | mutable char rdata[sizeof(_ElPair)];
1040 | private:
1041 | _Bt index;
1042 | _Bt exists;
1043 | _Bt bsize;
1044 | const kernel_ptr get_kernel() const {
1045 | return h->pcurrent;
1046 |
1047 | }
1048 | kernel_ptr get_kernel() {
1049 | return h->pcurrent;
1050 | }
1051 | void set_index() {
1052 | if (h != nullptr && !is_end(*this)) {//
1053 | const _Segment& s = get_kernel()->get_segment(pos);
1054 | exists = s.exists;
1055 | index = get_kernel()->get_segment_index(pos);
1056 | bsize = get_kernel()->config.BITS_SIZE;
1057 | }
1058 | }
1059 | void check_index() {
1060 |
1061 | }
1062 | void increment() {
1063 | ++pos;
1064 | ++index;
1065 | if (index == bsize) {
1066 | set_index();
1067 | }
1068 |
1069 | }
1070 | public:
1071 | iterator() : h(nullptr), pos(0) {
1072 | }
1073 |
1074 | iterator(const end_iterator&) : h(nullptr), pos(end_pos) {
1075 | }
1076 | iterator(const basic_unordered_set* h, size_type pos) : pos(pos) {
1077 | this->h = h;
1078 | set_index();
1079 | }
1080 |
1081 | iterator(const iterator& r) {
1082 | (*this) = r;
1083 | }
1084 |
1085 | //~iterator() {
1086 | //}
1087 |
1088 | iterator& operator=(const iterator& r) {
1089 | pos = r.pos;
1090 | h = r.h;
1091 | set_index();
1092 |
1093 | return (*this);
1094 | }
1095 | inline iterator& operator++() {
1096 | do {
1097 | increment();
1098 | } while ((exists & (((_Bt)1) << index)) == (_Bt)0);
1099 | return (*this);
1100 | }
1101 | iterator operator++(int) {
1102 | iterator t = (*this);
1103 | ++(*this);
1104 | return t;
1105 | }
1106 | inline _V& get_value() {
1107 | return get_kernel()->get_segment_value((*this).pos);
1108 | }
1109 | inline const _V& get_value() const {
1110 | return get_kernel()->get_segment_value((*this).pos);
1111 | }
1112 | inline _K& get_key() {
1113 | return get_kernel()->get_segment_key((*this).pos);
1114 | }
1115 | inline const _K& get_key() const {
1116 | return get_kernel()->get_segment_key((*this).pos);
1117 | }
1118 | const _ElPair operator*() const {
1119 | return get_kernel()->get_segment_pair((*this).pos);
1120 | }
1121 | inline _ElPair operator*() {
1122 | return get_kernel()->get_segment_pair((*this).pos);
1123 | }
1124 | inline _ElPair* operator->() const {
1125 | /// can reconstruct multiple times on same memory because _ElPair is only references
1126 | _ElPair* ret = new ((void *)rdata) _ElPair(get_kernel()->get_segment_pair(pos));
1127 | return ret;
1128 | }
1129 | inline const _ElPair *operator->() {
1130 | /// can reconstruct multiple times on same memory because _ElPair is only references
1131 | _ElPair* ret = new ((void *)rdata) _ElPair(get_kernel()->get_segment_pair(pos));
1132 | return ret;
1133 | }
1134 | inline bool operator==(const iterator& r) const {
1135 | if (r.pos == end_pos) return is_end();
1136 | return (pos == r.pos);
1137 | }
1138 | bool operator!=(const iterator& r) const {
1139 | if (r.pos == end_pos) return !is_end();
1140 | return (pos != r.pos);
1141 | }
1142 | inline bool operator==(const end_iterator& r) const {
1143 | return is_end();
1144 | }
1145 | bool operator!=(const end_iterator& r) const {
1146 | return !is_end();
1147 |
1148 | }
1149 | bool is_end(const iterator& r) const {
1150 | if (h == nullptr) return pos == end_pos;
1151 | return r.pos >= get_kernel()->end();
1152 | }
1153 | bool is_end() const {
1154 | return is_end(*this);
1155 | }
1156 | size_type get_pos() const {
1157 | return pos;
1158 | }
1159 |
1160 | };
1161 |
1162 | struct const_iterator {
1163 | private:
1164 | typedef hash_kernel* kernel_ptr;
1165 | const basic_unordered_set* h;
1166 | //mutable kernel_ptr h;
1167 | _Bt index;
1168 | _Bt exists;
1169 | mutable char rdata[sizeof(_ElPair)];
1170 | inline const kernel_ptr get_kernel() const {
1171 |
1172 | return h->pcurrent; // current.get();
1173 | }
1174 | inline kernel_ptr get_kernel() {
1175 |
1176 | return const_cast(h)->pcurrent; // current.get();
1177 | }
1178 | void set_index() {
1179 | if (get_kernel() != nullptr && !is_end(*this)) { ///
1180 | const _Segment& s = get_kernel()->get_segment(pos);
1181 | exists = s.exists;
1182 | index = get_kernel()->get_segment_index(pos);
1183 | }
1184 | }
1185 | void check_index() {
1186 |
1187 | }
1188 | void increment() {
1189 | ++pos;
1190 | ++index;
1191 | if (index == get_kernel()->config.BITS_SIZE) {
1192 | set_index();
1193 | }
1194 |
1195 | }
1196 | public:
1197 | size_type pos;
1198 |
1199 | const_iterator() : h(nullptr){
1200 |
1201 | }
1202 | const_iterator(const end_iterator&) : h(nullptr), pos(end_pos) {
1203 | }
1204 | //~const_iterator() {
1205 |
1206 | //}
1207 | const_iterator(const basic_unordered_set* h, size_type pos) : pos(pos) {
1208 | this->h = h; // ->current.get();
1209 | set_index();
1210 | }
1211 | const_iterator(const iterator& r) : h(nullptr){
1212 | (*this) = r;
1213 | }
1214 |
1215 | const_iterator& operator=(const iterator& r) {
1216 | pos = r.pos;
1217 | h = r.h;
1218 | set_index();
1219 | return (*this);
1220 | }
1221 |
1222 | const_iterator& operator=(const const_iterator& r) {
1223 | pos = r.pos;
1224 | h = r.h;
1225 | index = r.index;
1226 | return (*this);
1227 | }
1228 |
1229 | const_iterator& operator++() {
1230 | do {
1231 | increment();
1232 | } while ((exists & (((_Bt)1) << index)) == (_Bt)0);
1233 | //increment();
1234 | //while ((exists & (((_Bt)1) << index)) == (_Bt)0) {
1235 | // increment();
1236 | //}
1237 |
1238 |
1239 | return (*this);
1240 | }
1241 | const_iterator operator++(int) {
1242 | return (*this);
1243 | }
1244 | const _ElPair operator*() const {
1245 | return get_kernel()->get_segment_pair(pos);
1246 |
1247 | }
1248 | const _ElPair *operator->() const {
1249 | /// can reconstruct multiple times on same memory because _ElPair is only references
1250 | _ElPair* ret = new ((void *)rdata) _ElPair(get_kernel()->get_segment_pair(pos));
1251 | return ret;
1252 | }
1253 |
1254 | inline bool operator==(const const_iterator& r) const {
1255 | if (r.pos == end_pos) return is_end();
1256 | return (pos == r.pos);
1257 | }
1258 | bool operator!=(const const_iterator& r) const {
1259 | if (r.pos == end_pos) return !is_end();
1260 | return (pos != r.pos);
1261 | }
1262 | bool is_end(const const_iterator& r) const {
1263 | if (h == nullptr) return false;
1264 | return r.pos >= get_kernel()->end();
1265 | }
1266 | bool is_end() const {
1267 | return is_end(*this);
1268 | }
1269 | size_type get_pos() const {
1270 | return pos;
1271 | }
1272 |
1273 | };
1274 |
1275 | protected:
1276 | /// the default config for each hash instance
1277 | rabbit_config default_config;
1278 | key_compare key_c;
1279 | allocator_type alloc;
1280 |
1281 | void rehash() {
1282 | size_type to = current->key_mapper.next_size();
1283 | rehash(to);
1284 | }
1285 |
1286 | void set_current(typename hash_kernel::ptr c) {
1287 | pcurrent = c.get();
1288 | current = c;
1289 | }
1290 |
1291 | typename hash_kernel::ptr current;
1292 | hash_kernel* pcurrent;
1293 | inline void create_current() {
1294 | if (current == nullptr)
1295 |
1296 | set_current(std::allocate_shared(alloc, key_c, alloc));
1297 | }
1298 | public:
1299 | float load_factor() const {
1300 | if (current == nullptr) return 0;
1301 | return current->load_factor();
1302 | }
1303 | size_type bucket_count() const {
1304 | if (current == nullptr) return 0;
1305 | return current->bucket_count();
1306 | }
1307 | size_type bucket_size(size_type n) const {
1308 | if (current == nullptr) return 0;
1309 | return current->bucket_size(n);
1310 | }
1311 | float max_load_factor() const {
1312 | if (current == nullptr) 1;
1313 | return current->max_load_factor();
1314 | }
1315 |
1316 | void max_load_factor(float z) {
1317 | create_current();
1318 | current->max_load_factor(z);
1319 | }
1320 | bool empty() const {
1321 | if (current == nullptr) return true;
1322 | return current->size() == 0;
1323 | }
1324 | void reserve(size_type atleast) {
1325 | create_current();
1326 | rehash((size_type)((double)atleast*current->get_resize_factor()));
1327 | }
1328 | void resize(size_type atleast) {
1329 | create_current();
1330 | rehash(current->key_mapper.nearest_larger(atleast));
1331 | }
1332 | void rehash(size_type to_) {
1333 | create_current();
1334 | rabbit_config config;
1335 | size_type to = std::max(to_, config.MIN_EXTENT);
1336 | /// can cause oom e because of recursive rehash'es
1337 |
1338 | typename hash_kernel::ptr rehashed = std::allocate_shared(alloc);
1339 | size_type extent = current->get_extent();
1340 | size_type new_extent = to;
1341 | size_type nrand_probes = 0;
1342 | hash_kernel * reh = rehashed.get();
1343 | hash_kernel * cur = current.get();
1344 | try {
1345 |
1346 | rehashed->set_logarithmic(current->get_logarithmic());
1347 | rehashed->resize_clear(new_extent);
1348 | rehashed->mf = (*this).current->mf;
1349 | //std::cout << " load factor " << current->load_factor() << std::endl;
1350 | if (current->load_factor() < 0.2) {
1351 | /// std::cout << "possible attack/bad hash detected : using random probes : " << current->get_probes() << std::endl;
1352 | nrand_probes = 1;
1353 | rehashed->set_rand_probes(nrand_probes);
1354 | }
1355 | using namespace std;
1356 |
1357 |
1358 | while (true) {
1359 | iterator e = end();
1360 | size_type ctr = 0;
1361 | bool rerehashed = false;
1362 |
1363 | //_K k;
1364 | for (iterator i = begin(); i != e; ++i) {
1365 | //std::swap(k,(*i).first);
1366 | _RV v = rehashed->unique_subscript((*i).first);
1367 | if (v != nullptr) {
1368 | *v = i->second;
1369 | /// a cheap check to illuminate subtle bugs during development
1370 | if (++ctr != rehashed->elements) {
1371 | cout << "iterations " << ctr << " elements " << rehashed->elements << " extent " << rehashed->get_extent() << endl;
1372 | cout << "inside rehash " << rehashed->get_extent() << endl;
1373 | cout << "new " << rehashed->elements << " current size:" << current->elements << endl;
1374 | throw bad_alloc();
1375 | }
1376 |
1377 | }
1378 | else {
1379 | //std::cout << "rehashing in rehash " << ctr << " of " << current->elements << std::endl;
1380 | rerehashed = true;
1381 | new_extent = rehashed->key_mapper.next_size();
1382 | rehashed = std::allocate_shared(alloc);
1383 | rehashed->resize_clear(new_extent);
1384 | rehashed->mf = (*this).current->mf;
1385 | rehashed->set_rand_probes(nrand_probes);
1386 | // i = begin(); // start over
1387 | //ctr = 0;
1388 | break;
1389 |
1390 | }
1391 | }
1392 | if (rehashed->elements == current->elements) {
1393 | break;
1394 | }
1395 | else if (!rerehashed) {
1396 | cout << "hash error: unequal key count - retry rehash " << endl;
1397 | cout << "iterations " << ctr << " elements " << rehashed->elements << " extent " << rehashed->get_extent() << endl;
1398 | cout << "new " << rehashed->elements << " current size:" << current->elements << endl;
1399 | throw bad_alloc();
1400 | }
1401 | else {
1402 | //cout << "re-rehashing iterations " << ctr << " elements " << rehashed->elements << " extent " << rehashed->get_extent() << endl;
1403 | //rehashed->resize_clear(rehashed->get_extent());
1404 | //break;
1405 | }
1406 |
1407 | }/// for
1408 |
1409 | }
1410 | catch (std::bad_alloc &e) {
1411 | std::cout << "bad allocation: rehash failed in temp phase :" << new_extent << std::endl;
1412 | size_t t = 0;
1413 | std::cin >> t;
1414 | throw e;
1415 | }
1416 | set_current(rehashed);
1417 |
1418 | }
1419 | void clear() {
1420 | if (current != nullptr)
1421 | current->clear();
1422 | current = nullptr;
1423 | ///set_current(std::allocate_shared(alloc));
1424 | }
1425 |
1426 | void clear(const key_compare& compare, const allocator_type& allocator) {
1427 | set_current(std::allocate_shared(allocator, compare, allocator));
1428 | }
1429 |
1430 | basic_unordered_set() :current(nullptr) {
1431 | //
1432 | }
1433 |
1434 | basic_unordered_set(const key_compare& compare, const allocator_type& allocator) : key_c(compare), alloc(allocator) {
1435 |
1436 | }
1437 |
1438 | basic_unordered_set(const basic_unordered_set& right) {
1439 | *this = right;
1440 | }
1441 |
1442 | ~basic_unordered_set() {
1443 |
1444 | }
1445 |
1446 | void swap(basic_unordered_set& with) {
1447 | typename hash_kernel::ptr t = with.current;
1448 | with.set_current(this->current);
1449 | this->set_current(t);
1450 | }
1451 |
1452 | void move(basic_unordered_set& from) {
1453 | (*this).current = from.current;
1454 | from.current = nullptr;
1455 | }
1456 |
1457 | basic_unordered_set& operator=(const basic_unordered_set& right) {
1458 | (*this).set_current(std::allocate_shared(alloc));
1459 | (*this).reserve(right.size());
1460 | const_iterator e = right.end();
1461 | for (const_iterator c = right.begin(); c != e; ++c) {
1462 | (*this)[(*c).first] = (*c).second;
1463 | }
1464 |
1465 | return *this;
1466 | }
1467 |
1468 | hasher hash_function() const {
1469 | return (this->current->hf);
1470 | }
1471 |
1472 | key_equal key_eq() const {
1473 | if (current != nullptr)
1474 | return (this->current->eq_f);
1475 | return key_equal();
1476 | }
1477 | iterator insert(const _K& k, const _V& v) {
1478 | create_current();
1479 | (*this)[k] = v;
1480 | return iterator(this, current->last_modified);
1481 | }
1482 |
1483 | iterator insert(const std::pair<_K, _V>& p) {
1484 |
1485 | return iterator(this, insert(p.first, p.second));
1486 | }
1487 | /// generic template copy
1488 | template
1489 | iterator insert(_Iter start, _Iter _afterLast) {
1490 | create_current();
1491 | for (_Iter i = start; i != _afterLast; ++i) {
1492 | insert((*i).first, (*i).second);
1493 | }
1494 | return iterator(this, current->last_modified);
1495 | }
1496 | /// fast getter that doesnt use iterators and doesnt change the table without letting you know
1497 | bool get(const _K& k, _V& v) const {
1498 | if (current != nullptr)
1499 | return (*this).current->get(k, v);
1500 | return false;
1501 | }
1502 | /// throws a exception when value could not match the key
1503 | const _V& at(const _K& k) const {
1504 | if (current == nullptr) throw std::exception();
1505 | return (*this).current->at(k);
1506 | }
1507 | _V& at(const _K& k) {
1508 | create_current();
1509 | return (*this).current->at(k);
1510 | }
1511 |
1512 | bool error(const _K& k) {
1513 | _V *rv = current->subscript(k);
1514 | return rv == nullptr;
1515 | }
1516 |
1517 | _V& operator[](const _K& k) {
1518 | create_current();
1519 | _V *rv = current->subscript(k);
1520 | while (rv == nullptr) {
1521 | this->rehash();
1522 | rv = current->subscript(k);
1523 | }
1524 | return *rv;
1525 | }
1526 | size_type erase(const _K& k) {
1527 | if (current == nullptr) return size_type();
1528 | //if(current->is_small()){
1529 | // rehash(1);
1530 | //}
1531 | return current->erase(k);
1532 | }
1533 | size_type erase(iterator i) {
1534 | return erase((*i).first);
1535 | }
1536 | size_type erase(const_iterator i) {
1537 | return erase((*i).first);
1538 | }
1539 | size_type count(const _K& k) const {
1540 | if (current == nullptr)return size_type();
1541 | return current->count(k);
1542 | }
1543 | iterator find(const _K& k) const {
1544 | if (current == nullptr) return iterator(this, size_type());
1545 |
1546 | return iterator(this, current->find(k));
1547 | }
1548 | iterator begin() const {
1549 | if (current == nullptr)return iterator(this, size_type());
1550 | return iterator(this, current->begin());
1551 | }
1552 | end_iterator end() const {
1553 | return end_iterator(); // iterator(end_pos);
1554 | }
1555 | const_iterator cbegin() const {
1556 | if (current == nullptr)return const_iterator(this, size_type());
1557 | return const_iterator(this, current->begin());
1558 | }
1559 | const_iterator cend() const {
1560 | return iterator(end_pos);
1561 | }
1562 | size_type size() const {
1563 | if (current == nullptr)return size_type();
1564 | return current->size();
1565 | }
1566 | void set_logarithmic(size_type logarithmic) {
1567 | create_current();
1568 | this->current->set_logarithmic(logarithmic);
1569 | }
1570 | };
1571 |
1572 | /// the unordered set
1573 | template >
1574 | class unordered_set : public basic_unordered_set<_K, char, _H> {
1575 | protected:
1576 | typedef basic_unordered_set<_K, char, _H> _Container;
1577 | public:
1578 |
1579 |
1580 | unordered_set() {
1581 | }
1582 |
1583 | ~unordered_set() {
1584 | }
1585 |
1586 | void insert(const _K& k) {
1587 | _Container::insert(k, '0');
1588 | }
1589 |
1590 | }; /// unordered set
1591 | }; // rab-bit
1592 |
1593 | #endif /// _RABBIT_H_CEP_20150303_
1594 |
--------------------------------------------------------------------------------
/rabbit/unordered_map:
--------------------------------------------------------------------------------
1 | /// defines a map
2 | #pragma once
3 | #include
4 | //#include
5 |
--------------------------------------------------------------------------------
/rabbit/unordered_set:
--------------------------------------------------------------------------------
1 | /// defines a set
2 | #pragma once
3 | #include
4 |
--------------------------------------------------------------------------------
/rabbit_tests/main.cpp:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include
4 | #include
5 | #include
6 | #include
7 | #include