├── .gitignore ├── .travis.yml ├── LICENSE ├── Makefile ├── README.md ├── include ├── concise.h └── conciseutil.h └── tests └── unit.cpp /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files 2 | *.slo 3 | *.lo 4 | *.o 5 | *.obj 6 | 7 | # Precompiled Headers 8 | *.gch 9 | *.pch 10 | 11 | # Compiled Dynamic libraries 12 | *.so 13 | *.dylib 14 | *.dll 15 | 16 | # Fortran module files 17 | *.mod 18 | 19 | # Compiled Static libraries 20 | *.lai 21 | *.la 22 | *.a 23 | *.lib 24 | 25 | # Executables 26 | *.exe 27 | *.out 28 | *.app 29 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: cpp 2 | sudo: false 3 | compiler: 4 | - clang 5 | 6 | branches: 7 | only: 8 | - master 9 | 10 | script: make && ./unit 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # minimalist makefile 2 | .SUFFIXES: 3 | # 4 | .SUFFIXES: .cpp .o .c .h 5 | ifeq ($(DEBUG),1) 6 | CXXFLAGS = -fPIC -std=c++11 -ggdb -march=native -Wall -Wextra -Wshadow -fsanitize=undefined -fno-omit-frame-pointer -fsanitize=address 7 | else 8 | CXXFLAGS = -fPIC -std=c++11 -O3 -march=native -Wall -Wextra -Wshadow 9 | endif # debug 10 | all: unit 11 | HEADERS=./include/concise.h ./include/conciseutil.h 12 | 13 | unit: ./tests/unit.cpp $(HEADERS) 14 | $(CXX) $(CXXFLAGS) -o unit ./tests/unit.cpp -Iinclude 15 | clean: 16 | rm -f *.o unit 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Concise 2 | [![Build Status](https://travis-ci.org/lemire/Concise.png)](https://travis-ci.org/lemire/Concise) 3 | 4 | C++ implementation of CONCISE (COmpressed 'N' Composable Integer SEt) and WAH compressed bitsets. 5 | The implementation is loosely based on Colantonio's original Java code. 6 | 7 | Pre-requisite: gcc-like compiler (with C++11 support). 8 | 9 | Usage : 10 | 11 | ```bash 12 | make 13 | ./unit 14 | ``` 15 | ## Other libraries 16 | - See CRoaring https://github.com/RoaringBitmap/CRoaring 17 | - See EWAHBoolArray https://github.com/lemire/EWAHBoolArray 18 | - Git itself uses compressed bitsets https://github.com/git/git/tree/master/ewah 19 | 20 | ## Warning 21 | 22 | Though this implementation is made freely available under the Apache License 2.0, WAH 23 | was patented by its authors and Concise is a derivative of WAH. I (Daniel Lemire) cannot garantee that this is 24 | code can be safely used in commercial projects. Consult a lawyer. 25 | -------------------------------------------------------------------------------- /include/concise.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include "conciseutil.h" 9 | 10 | template class WordIterator; 11 | 12 | template class ConciseSetBitForwardIterator; 13 | 14 | /** 15 | * wah_mode: 16 | * true for a WAH bitset, 17 | * false for a Concise bitset, 18 | */ 19 | template class ConciseSet { 20 | 21 | public: 22 | /** 23 | * Creates an empty integer set 24 | */ 25 | ConciseSet() : words(), last(-1), lastWordIndex(-1) {} 26 | 27 | ConciseSet(const ConciseSet &cs) 28 | : words(cs.words), last(cs.last), lastWordIndex(cs.lastWordIndex) {} 29 | 30 | bool isEmpty() const { return lastWordIndex == -1; } 31 | 32 | size_t sizeInBytes() const { return (words.size() + 1) * sizeof(uint32_t); } 33 | 34 | void compact() { words.shrink_to_fit(); } 35 | 36 | void swap(ConciseSet &other) { 37 | this->words.swap(other.words); 38 | uint32_t tmplast = this->last; 39 | this->last = other.last; 40 | other.last = tmplast; 41 | 42 | int32_t tmplwi = this->lastWordIndex; 43 | this->lastWordIndex = other.lastWordIndex; 44 | other.lastWordIndex = tmplwi; 45 | } 46 | 47 | ConciseSet logicaland(const ConciseSet &other) const { 48 | ConciseSet res; 49 | logicalandToContainer(other, res); 50 | return res; 51 | } 52 | 53 | ConciseSet operator&(const ConciseSet &o) const { 54 | return logicaland(o); 55 | } 56 | 57 | void logicalandToContainer(const ConciseSet &other, 58 | ConciseSet &res) const { 59 | if (isEmpty() || other.isEmpty()) { 60 | res.clear(); 61 | return; 62 | } 63 | res.words.resize(3 + this->lastWordIndex + other.lastWordIndex); 64 | 65 | // scan "this" and "other" 66 | WordIterator thisItr(*this); 67 | WordIterator otherItr(other); 68 | while (true) { 69 | if (!thisItr.IsLiteral) { 70 | if (!otherItr.IsLiteral) { 71 | int minCount = std::min(thisItr.count, otherItr.count); 72 | res.appendFill(minCount, thisItr.word & otherItr.word); 73 | if (!thisItr.prepareNext(minCount) | 74 | !otherItr.prepareNext(minCount)) // NOT || 75 | break; 76 | } else { 77 | res.appendLiteral(thisItr.toLiteral() & otherItr.word); 78 | thisItr.word--; 79 | if (!thisItr.prepareNext(1) | 80 | !otherItr.prepareNext()) // do NOT use "||" 81 | break; 82 | } 83 | } else if (!otherItr.IsLiteral) { 84 | res.appendLiteral(thisItr.word & otherItr.toLiteral()); 85 | otherItr.word--; 86 | if (!thisItr.prepareNext() | 87 | !otherItr.prepareNext(1)) // do NOT use "||" 88 | break; 89 | } else { 90 | // Java code simply does thisItr.word & otherItr.word below 91 | res.appendLiteral(concise_and(thisItr.word , otherItr.word)); 92 | if (!thisItr.prepareNext() | !otherItr.prepareNext()) // do NOT use "||" 93 | break; 94 | } 95 | } 96 | bool invalidLast = true; 97 | // remove trailing zeros 98 | res.trimZeros(); 99 | if (res.isEmpty()) 100 | return; 101 | 102 | // compute the greatest element 103 | if (invalidLast) 104 | res.updateLast(); 105 | 106 | return; 107 | } 108 | 109 | bool intersects(const ConciseSet &other) const { 110 | if (isEmpty() || other.isEmpty()) { 111 | return 0; 112 | } 113 | // scan "this" and "other" 114 | WordIterator thisItr(*this); 115 | WordIterator otherItr(other); 116 | while (true) { 117 | if (!thisItr.IsLiteral) { 118 | if (!otherItr.IsLiteral) { 119 | int minCount = std::min(thisItr.count, otherItr.count); 120 | if(concise_and(thisItr.word, otherItr.word) & SEQUENCE_BIT) 121 | if(minCount > 0 ) return true; 122 | if (!thisItr.prepareNext(minCount) | 123 | !otherItr.prepareNext(minCount)) // NOT || 124 | break; 125 | } else { 126 | if( !isLiteralZero(thisItr.toLiteral() & otherItr.word) ) return true; 127 | thisItr.word--; 128 | if (!thisItr.prepareNext(1) | 129 | !otherItr.prepareNext()) // do NOT use "||" 130 | break; 131 | } 132 | } else if (!otherItr.IsLiteral) { 133 | if( !isLiteralZero(thisItr.word & otherItr.toLiteral()) ) return true; 134 | otherItr.word--; 135 | if (!thisItr.prepareNext() | 136 | !otherItr.prepareNext(1)) // do NOT use "||" 137 | break; 138 | } else { 139 | // Java code simply does thisItr.word & otherItr.word below 140 | if ( !isLiteralZero(concise_and(thisItr.word , otherItr.word)) ) return true; 141 | if (!thisItr.prepareNext() | !otherItr.prepareNext()) // do NOT use "||" 142 | break; 143 | } 144 | } 145 | return false; 146 | } 147 | 148 | size_t logicalandCount(const ConciseSet &other) const { 149 | if (isEmpty() || other.isEmpty()) { 150 | return 0; 151 | } 152 | size_t answer = 0; 153 | // scan "this" and "other" 154 | WordIterator thisItr(*this); 155 | WordIterator otherItr(other); 156 | while (true) { 157 | if (!thisItr.IsLiteral) { 158 | if (!otherItr.IsLiteral) { 159 | int minCount = std::min(thisItr.count, otherItr.count); 160 | if(concise_and(thisItr.word, otherItr.word) & SEQUENCE_BIT) 161 | answer += 31 * minCount; 162 | if (!thisItr.prepareNext(minCount) | 163 | !otherItr.prepareNext(minCount)) // NOT || 164 | break; 165 | } else { 166 | answer += getLiteralBitCount(thisItr.toLiteral() & otherItr.word); 167 | thisItr.word--; 168 | if (!thisItr.prepareNext(1) | 169 | !otherItr.prepareNext()) // do NOT use "||" 170 | break; 171 | } 172 | } else if (!otherItr.IsLiteral) { 173 | answer += getLiteralBitCount(thisItr.word & otherItr.toLiteral()); 174 | otherItr.word--; 175 | if (!thisItr.prepareNext() | 176 | !otherItr.prepareNext(1)) // do NOT use "||" 177 | break; 178 | } else { 179 | // Java code simply does thisItr.word & otherItr.word below 180 | answer += getLiteralBitCount(concise_and(thisItr.word , otherItr.word)); 181 | if (!thisItr.prepareNext() | !otherItr.prepareNext()) // do NOT use "||" 182 | break; 183 | } 184 | } 185 | return answer; 186 | } 187 | 188 | ConciseSet logicalandnot(const ConciseSet &other) const { 189 | ConciseSet res; 190 | logicalandnotToContainer(other, res); 191 | return res; 192 | } 193 | 194 | ConciseSet operator-(const ConciseSet &o) const { 195 | return logicalandnot(o); 196 | } 197 | 198 | void logicalandnotToContainer(const ConciseSet &other, 199 | ConciseSet &res) const { 200 | if (isEmpty()) { 201 | res.clear(); 202 | return; 203 | } 204 | if (other.isEmpty()) { 205 | res = *this; 206 | return; 207 | } 208 | res.words.resize(3 + this->lastWordIndex + other.lastWordIndex); 209 | 210 | // scan "this" and "other" 211 | WordIterator thisItr(*this); 212 | WordIterator otherItr(other); 213 | while (true) { 214 | if (!thisItr.IsLiteral) { 215 | if (!otherItr.IsLiteral) { 216 | int minCount = std::min(thisItr.count, otherItr.count); 217 | res.appendFill(minCount, concise_andnot(thisItr.word, otherItr.word)); 218 | if (!thisItr.prepareNext(minCount) | 219 | !otherItr.prepareNext(minCount)) // NOT || 220 | break; 221 | } else { 222 | res.appendLiteral(concise_andnot(thisItr.toLiteral(), otherItr.word)); 223 | thisItr.word--; 224 | if (!thisItr.prepareNext(1) | 225 | !otherItr.prepareNext()) // do NOT use "||" 226 | break; 227 | } 228 | } else if (!otherItr.IsLiteral) { 229 | res.appendLiteral(concise_andnot(thisItr.word, otherItr.toLiteral())); 230 | otherItr.word--; 231 | if (!thisItr.prepareNext() | 232 | !otherItr.prepareNext(1)) // do NOT use "||" 233 | break; 234 | } else { 235 | res.appendLiteral(concise_andnot(thisItr.word, otherItr.word)); 236 | if (!thisItr.prepareNext() | !otherItr.prepareNext()) // do NOT use "||" 237 | break; 238 | } 239 | } 240 | bool invalidLast = true; 241 | invalidLast |= thisItr.flush(res); 242 | // remove trailing zeros 243 | res.trimZeros(); 244 | if (res.isEmpty()) 245 | return; 246 | 247 | // compute the greatest element 248 | if (invalidLast) 249 | res.updateLast(); 250 | return; 251 | } 252 | 253 | ConciseSet logicalor(const ConciseSet &other) const { 254 | ConciseSet res; 255 | logicalorToContainer(other, res); 256 | return res; 257 | } 258 | 259 | ConciseSet operator|(const ConciseSet &o) const { 260 | return logicalor(o); 261 | } 262 | 263 | void logicalorToContainer(const ConciseSet &other, 264 | ConciseSet &res) const { 265 | if (this->isEmpty()) { 266 | res = other; 267 | return; 268 | } 269 | if (other.isEmpty()) { 270 | res = *this; 271 | return; 272 | } 273 | res.words.resize(3 + this->lastWordIndex + other.lastWordIndex); 274 | // scan "this" and "other" 275 | WordIterator thisItr(*this); 276 | WordIterator otherItr(other); 277 | while (true) { 278 | if (!thisItr.IsLiteral) { 279 | if (!otherItr.IsLiteral) { 280 | int minCount = std::min(thisItr.count, otherItr.count); 281 | res.appendFill(minCount, thisItr.word | otherItr.word); 282 | if (!thisItr.prepareNext(minCount) | 283 | !otherItr.prepareNext(minCount)) // NOT || 284 | break; 285 | } else { 286 | res.appendLiteral(thisItr.toLiteral() | otherItr.word); 287 | thisItr.word--; 288 | if (!thisItr.prepareNext(1) | 289 | !otherItr.prepareNext()) // do NOT use "||" 290 | break; 291 | } 292 | } else if (!otherItr.IsLiteral) { 293 | res.appendLiteral(thisItr.word | otherItr.toLiteral()); 294 | otherItr.word--; 295 | if (!thisItr.prepareNext() | 296 | !otherItr.prepareNext(1)) // do NOT use "||" 297 | break; 298 | } else { 299 | res.appendLiteral(thisItr.word | otherItr.word); 300 | if (!thisItr.prepareNext() | !otherItr.prepareNext()) // do NOT use "||" 301 | break; 302 | } 303 | } 304 | bool invalidLast = true; 305 | res.last = std::max(this->last, other.last); 306 | invalidLast = false; 307 | invalidLast |= thisItr.flush(res); 308 | invalidLast |= otherItr.flush(res); 309 | // remove trailing zeros 310 | res.trimZeros(); 311 | if (res.isEmpty()) 312 | return; 313 | // compute the greatest element 314 | if (invalidLast) 315 | res.updateLast(); 316 | return; 317 | } 318 | 319 | ConciseSet logicalxor(const ConciseSet &other) const { 320 | ConciseSet res; 321 | logicalxorToContainer(other, res); 322 | return res; 323 | } 324 | 325 | ConciseSet operator^(const ConciseSet &o) const { 326 | return logicalxor(o); 327 | } 328 | 329 | void logicalxorToContainer(const ConciseSet &other, 330 | ConciseSet &res) const { 331 | if (this->isEmpty()) { 332 | res = other; 333 | return; 334 | } 335 | if (other.isEmpty()) { 336 | res = *this; 337 | return; 338 | } 339 | res.words.resize(3 + this->lastWordIndex + other.lastWordIndex); 340 | // scan "this" and "other" 341 | WordIterator thisItr(*this); 342 | WordIterator otherItr(other); 343 | while (true) { 344 | if (!thisItr.IsLiteral) { 345 | if (!otherItr.IsLiteral) { 346 | int minCount = std::min(thisItr.count, otherItr.count); 347 | res.appendFill(minCount, concise_xor(thisItr.word, otherItr.word)); 348 | if (!thisItr.prepareNext(minCount) | 349 | !otherItr.prepareNext(minCount)) // NOT || 350 | break; 351 | } else { 352 | res.appendLiteral(concise_xor(thisItr.toLiteral(), otherItr.word)); 353 | thisItr.word--; 354 | if (!thisItr.prepareNext(1) | 355 | !otherItr.prepareNext()) // do NOT use "||" 356 | break; 357 | } 358 | } else if (!otherItr.IsLiteral) { 359 | res.appendLiteral(concise_xor(thisItr.word, otherItr.toLiteral())); 360 | otherItr.word--; 361 | if (!thisItr.prepareNext() | 362 | !otherItr.prepareNext(1)) // do NOT use "||" 363 | break; 364 | } else { 365 | res.appendLiteral(concise_xor(thisItr.word, otherItr.word)); 366 | if (!thisItr.prepareNext() | !otherItr.prepareNext()) // do NOT use "||" 367 | break; 368 | } 369 | } 370 | bool invalidLast = true; 371 | res.last = std::max(this->last, other.last); 372 | invalidLast = false; 373 | invalidLast |= thisItr.flush(res); 374 | invalidLast |= otherItr.flush(res); 375 | // remove trailing zeros 376 | res.trimZeros(); 377 | if (res.isEmpty()) 378 | return; 379 | // compute the greatest element 380 | if (invalidLast) 381 | res.updateLast(); 382 | return; 383 | } 384 | 385 | bool equals(const ConciseSet &other) const { 386 | return logicalxorEmpty(other); 387 | } 388 | 389 | bool logicalxorEmpty(const ConciseSet &other) const { 390 | if (this->isEmpty()) { 391 | return other.isEmpty(); 392 | } 393 | if (other.isEmpty()) { 394 | return this->isEmpty(); 395 | } 396 | // scan "this" and "other" 397 | WordIterator thisItr(*this); 398 | WordIterator otherItr(other); 399 | while (true) { 400 | if (!thisItr.IsLiteral) { 401 | if (!otherItr.IsLiteral) { 402 | int minCount = std::min(thisItr.count, otherItr.count); 403 | if(concise_xor(thisItr.word, otherItr.word) & SEQUENCE_BIT) 404 | return false; 405 | if (!thisItr.prepareNext(minCount) | 406 | !otherItr.prepareNext(minCount)) // NOT || 407 | break; 408 | } else { 409 | if(!isLiteralZero(concise_xor(thisItr.toLiteral(), otherItr.word))) return false; 410 | thisItr.word--; 411 | if (!thisItr.prepareNext(1) | 412 | !otherItr.prepareNext()) // do NOT use "||" 413 | break; 414 | } 415 | } else if (!otherItr.IsLiteral) { 416 | if(!isLiteralZero(concise_xor(thisItr.word, otherItr.toLiteral()))) return false; 417 | otherItr.word--; 418 | if (!thisItr.prepareNext() | 419 | !otherItr.prepareNext(1)) // do NOT use "||" 420 | break; 421 | } else { 422 | if(!isLiteralZero(concise_xor(thisItr.word, otherItr.word))) return false; 423 | if (!thisItr.prepareNext() | !otherItr.prepareNext()) // do NOT use "||" 424 | break; 425 | } 426 | } 427 | if(thisItr.flushEmpty() && otherItr.flushEmpty()) return true; 428 | return false; 429 | } 430 | 431 | size_t logicalandnotCount(const ConciseSet &other) const { 432 | if (isEmpty()) { 433 | return 0; 434 | } 435 | if (other.isEmpty()) { 436 | return this->size(); 437 | } 438 | size_t answer = 0; 439 | // scan "this" and "other" 440 | WordIterator thisItr(*this); 441 | WordIterator otherItr(other); 442 | while (true) { 443 | if (!thisItr.IsLiteral) { 444 | if (!otherItr.IsLiteral) { 445 | int minCount = std::min(thisItr.count, otherItr.count); 446 | if(concise_andnot(thisItr.word, otherItr.word) & SEQUENCE_BIT) 447 | answer += 31 * minCount; 448 | if (!thisItr.prepareNext(minCount) | 449 | !otherItr.prepareNext(minCount)) // NOT || 450 | break; 451 | } else { 452 | answer += getLiteralBitCount(concise_andnot(thisItr.toLiteral(), otherItr.word)); 453 | thisItr.word--; 454 | if (!thisItr.prepareNext(1) | 455 | !otherItr.prepareNext()) // do NOT use "||" 456 | break; 457 | } 458 | } else if (!otherItr.IsLiteral) { 459 | answer += getLiteralBitCount(concise_andnot(thisItr.word, otherItr.toLiteral())); 460 | otherItr.word--; 461 | if (!thisItr.prepareNext() | 462 | !otherItr.prepareNext(1)) // do NOT use "||" 463 | break; 464 | } else { 465 | answer += getLiteralBitCount(concise_andnot(thisItr.word, otherItr.word)); 466 | if (!thisItr.prepareNext() | !otherItr.prepareNext()) // do NOT use "||" 467 | break; 468 | } 469 | } 470 | answer += thisItr.flushCount(); 471 | return answer; 472 | } 473 | 474 | size_t logicalxorCount(const ConciseSet &other) const { 475 | if (this->isEmpty()) { 476 | return other.size(); 477 | } 478 | if (other.isEmpty()) { 479 | return this->size(); 480 | } 481 | size_t answer = 0; 482 | // scan "this" and "other" 483 | WordIterator thisItr(*this); 484 | WordIterator otherItr(other); 485 | while (true) { 486 | if (!thisItr.IsLiteral) { 487 | if (!otherItr.IsLiteral) { 488 | int minCount = std::min(thisItr.count, otherItr.count); 489 | if(concise_xor(thisItr.word, otherItr.word) & SEQUENCE_BIT) 490 | answer += 31 * minCount; 491 | if (!thisItr.prepareNext(minCount) | 492 | !otherItr.prepareNext(minCount)) // NOT || 493 | break; 494 | } else { 495 | answer += getLiteralBitCount(concise_xor(thisItr.toLiteral(), otherItr.word)); 496 | thisItr.word--; 497 | if (!thisItr.prepareNext(1) | 498 | !otherItr.prepareNext()) // do NOT use "||" 499 | break; 500 | } 501 | } else if (!otherItr.IsLiteral) { 502 | answer += getLiteralBitCount(concise_xor(thisItr.word, otherItr.toLiteral())); 503 | otherItr.word--; 504 | if (!thisItr.prepareNext() | 505 | !otherItr.prepareNext(1)) // do NOT use "||" 506 | break; 507 | } else { 508 | answer += getLiteralBitCount(concise_xor(thisItr.word, otherItr.word)); 509 | if (!thisItr.prepareNext() | !otherItr.prepareNext()) // do NOT use "||" 510 | break; 511 | } 512 | } 513 | answer += thisItr.flushCount(); 514 | answer += otherItr.flushCount(); 515 | return answer; 516 | } 517 | 518 | size_t logicalorCount(const ConciseSet &other) const { 519 | if (this->isEmpty()) { 520 | return other.size(); 521 | } 522 | if (other.isEmpty()) { 523 | return this->size(); 524 | } 525 | size_t answer = 0; 526 | // scan "this" and "other" 527 | WordIterator thisItr(*this); 528 | WordIterator otherItr(other); 529 | while (true) { 530 | if (!thisItr.IsLiteral) { 531 | if (!otherItr.IsLiteral) { 532 | int minCount = std::min(thisItr.count, otherItr.count); 533 | if((thisItr.word | otherItr.word) & SEQUENCE_BIT) 534 | answer += 31 * minCount; 535 | if (!thisItr.prepareNext(minCount) | 536 | !otherItr.prepareNext(minCount)) // NOT || 537 | break; 538 | } else { 539 | answer += getLiteralBitCount(thisItr.toLiteral() | otherItr.word); 540 | thisItr.word--; 541 | if (!thisItr.prepareNext(1) | 542 | !otherItr.prepareNext()) // do NOT use "||" 543 | break; 544 | } 545 | } else if (!otherItr.IsLiteral) { 546 | answer += getLiteralBitCount(thisItr.word | otherItr.toLiteral()); 547 | otherItr.word--; 548 | if (!thisItr.prepareNext() | 549 | !otherItr.prepareNext(1)) // do NOT use "||" 550 | break; 551 | } else { 552 | answer += getLiteralBitCount(thisItr.word | otherItr.word); 553 | if (!thisItr.prepareNext() | !otherItr.prepareNext()) // do NOT use "||" 554 | break; 555 | } 556 | } 557 | answer += thisItr.flushCount(); 558 | answer += otherItr.flushCount(); 559 | return answer; 560 | } 561 | 562 | void clear() { reset(); } 563 | 564 | void add(uint32_t e) { 565 | // range check 566 | if (e > MAX_ALLOWED_INTEGER) { 567 | std::cerr << "max integer allowed is " << MAX_ALLOWED_INTEGER 568 | << std::endl; 569 | throw std::runtime_error("out of bound value"); 570 | } 571 | // the element can be simply appended 572 | if ((int32_t)e > last) { 573 | append(e); 574 | return; 575 | } 576 | if ((int32_t)e == last) 577 | return; 578 | // check if the element can be put in a literal word 579 | uint32_t blockIndex = maxLiteralLengthDivision(e); 580 | uint32_t bitPosition = maxLiteralLengthModulus(e); 581 | for (int i = 0; i <= lastWordIndex && blockIndex >= 0; i++) { 582 | uint32_t w = words[i]; 583 | if (isLiteral(w)) { 584 | // check if the current literal word is the "right" one 585 | if (blockIndex == 0) { 586 | // bit already set 587 | if ((w & (UINT32_C(1) << bitPosition)) != 0) 588 | return; 589 | // By adding the bit we potentially create a sequence: 590 | // -- If the literal is made up of all zeros, it definitely 591 | // cannot be part of a sequence (otherwise it would not have 592 | // been created). Thus, we can create a 1-bit literal word 593 | // -- If there are MAX_LITERAL_LENGTH - 2 set bits, by adding 594 | // the new one we potentially allow for a 1's sequence 595 | // together with the successive word 596 | // -- If there are MAX_LITERAL_LENGTH - 1 set bits, by adding 597 | // the new one we potentially allow for a 1's sequence 598 | // together with the successive and/or the preceding words 599 | if (!wah_mode) { 600 | uint32_t bitCount = getLiteralBitCount(w); 601 | if (bitCount >= MAX_LITERAL_LENGTH - 2) 602 | break; 603 | } else { 604 | if (containsOnlyOneBit(~w) || w == ALL_ONES_LITERAL) 605 | break; 606 | } 607 | // set the bit 608 | words[i] |= UINT32_C(1) << bitPosition; 609 | return; 610 | } 611 | blockIndex--; 612 | } else { 613 | if (wah_mode) { 614 | if (isOneSequence(w) && (blockIndex <= getSequenceCount(w))) 615 | return; 616 | } else { 617 | // if we are at the beginning of a sequence, and it is 618 | // a set bit, the bit already exists 619 | if (blockIndex == 0 && (getLiteral(w) & (UINT32_C(1) << bitPosition)) != 0) 620 | return; 621 | 622 | // if we are in the middle of a sequence of 1's, the bit already exist 623 | if ((blockIndex > 0) && (blockIndex <= getSequenceCount(w)) && 624 | isOneSequence(w)) 625 | return; 626 | } 627 | // next word 628 | blockIndex -= getSequenceCount(w) + 1; 629 | } 630 | } 631 | // the bit is in the middle of a sequence or it may cause a literal to 632 | // become a sequence, thus the "easiest" way to add it is by ORing 633 | ConciseSet tmp; 634 | tmp.add(e); 635 | ConciseSet newbitmap = this->logicalor(tmp); 636 | this->swap(newbitmap); 637 | } 638 | 639 | void dump_buffer_content() const { 640 | printf("{buffer content \n"); 641 | for (int i = 0; i <= lastWordIndex; i++) { 642 | const uint32_t w = words[i]; 643 | std::cout << w << std::endl; 644 | } 645 | printf("}\n"); 646 | } 647 | 648 | void describe() const { 649 | printf("{cardinality = %d, \n", size()); 650 | for (int i = 0; i <= lastWordIndex; i++) { 651 | 652 | const uint32_t w = words[i]; 653 | 654 | const uint32_t t = w & UINT32_C(0xC0000000); // the first two bits... 655 | switch (t) { 656 | case UINT32_C(0x80000000): // LITERAL 657 | case UINT32_C(0xC0000000): // LITERAL 658 | // check if the current literal word is the "right" one 659 | printf("{literal word %u}\n", getLiteralBits(w)); 660 | 661 | break; 662 | case UINT32_C(0x00000000): // ZERO SEQUENCE 663 | printf("{zero sequence:"); 664 | if (!wah_mode) { 665 | printf("concise word with single 1-bit at %d (none if -1), \n", 666 | ((w >> 25) - 1)); 667 | } 668 | printf(" length= %u 31-bit words} \n", getSequenceCount(w) + 1); 669 | break; 670 | case UINT32_C(0x40000000): // ONE SEQUENCE 671 | printf("{one sequence:"); 672 | if (!wah_mode) { 673 | printf("concise word with single 0-bit at %d (none if -1), \n", 674 | ((UINT32_C(0x0000001F) & (w >> 25)) - 1)); 675 | } 676 | printf(" length= %u 31-bit words }\n", getSequenceCount(w) + 1); 677 | break; 678 | default: 679 | assert(false); 680 | } 681 | } 682 | 683 | printf("}\n"); 684 | } 685 | typedef ConciseSetBitForwardIterator const_iterator; 686 | 687 | const_iterator begin() const; 688 | 689 | const_iterator & end() const; 690 | 691 | bool contains(uint32_t o) const { 692 | if (isEmpty() || ((int32_t)o > last) || (o > MAX_ALLOWED_INTEGER)) { 693 | return false; 694 | } 695 | 696 | // check if the element is within a literal word 697 | int32_t block = (int32_t)maxLiteralLengthDivision(o); 698 | uint32_t bit = maxLiteralLengthModulus(o); 699 | assert(block * 31 + bit == o); 700 | 701 | for (int i = 0; i <= lastWordIndex; i++) { 702 | 703 | const uint32_t w = words[i]; 704 | const uint32_t t = w & UINT32_C(0xC0000000); // the first two bits... 705 | switch (t) { 706 | case UINT32_C(0x80000000): // LITERAL 707 | case UINT32_C(0xC0000000): // LITERAL 708 | // check if the current literal word is the "right" one 709 | if (block == 0) 710 | return (w & (UINT32_C(1) << bit)) != 0; 711 | block--; 712 | break; 713 | case UINT32_C(0x00000000): // ZERO SEQUENCE 714 | if (!wah_mode) 715 | if ((block == 0) && ((w >> 25) - 1) == bit) 716 | return true; 717 | block -= getSequenceCount(w) + 1; 718 | if (block < 0) 719 | return false; 720 | break; 721 | case UINT32_C(0x40000000): // ONE SEQUENCE 722 | if (!wah_mode) 723 | if ((block == 0) && (((UINT32_C(0x0000001F) & (w >> 25)) - 1)) == bit) 724 | return false; 725 | block -= getSequenceCount(w) + 1; 726 | if (block < 0) 727 | return true; 728 | break; 729 | } 730 | } 731 | // no more words 732 | return false; 733 | } 734 | 735 | uint32_t size() const { 736 | uint32_t cardsize = 0; 737 | for (int i = 0; i <= lastWordIndex; i++) { 738 | uint32_t w = words[i]; 739 | if (isLiteral(w)) { 740 | cardsize += getLiteralBitCount(w); 741 | } else { 742 | if (isZeroSequence(w)) { 743 | if (!isSequenceWithNoBits(w)) 744 | cardsize++; 745 | } else { 746 | cardsize += maxLiteralLengthMultiplication(getSequenceCount(w) + 1); 747 | if (!isSequenceWithNoBits(w)) 748 | cardsize--; 749 | } 750 | } 751 | } 752 | return cardsize; 753 | } 754 | 755 | static ConciseSet 756 | fast_logicalor(size_t n, const ConciseSet **inputs) { 757 | class ConcisePtr { 758 | 759 | public: 760 | ConcisePtr(const ConciseSet *p, bool o) : ptr(p), own(o) {} 761 | const ConciseSet *ptr; 762 | bool own; // whether to clean 763 | 764 | bool operator<(const ConcisePtr &o) const { 765 | return o.ptr->sizeInBytes() < ptr->sizeInBytes(); // backward on purpose 766 | } 767 | }; 768 | 769 | if (n == 0) { 770 | return ConciseSet(); 771 | } 772 | if (n == 1) { 773 | return ConciseSet(*inputs[0]); 774 | } 775 | std::priority_queue pq; 776 | for (size_t i = 0; i < n; i++) { 777 | // could use emplace 778 | pq.push(ConcisePtr(inputs[i], false)); 779 | } 780 | while (pq.size() > 2) { 781 | 782 | ConcisePtr x1 = pq.top(); 783 | pq.pop(); 784 | 785 | ConcisePtr x2 = pq.top(); 786 | pq.pop(); 787 | ConciseSet *buffer = new ConciseSet(); 788 | x1.ptr->logicalorToContainer(*(const ConciseSet *)x2.ptr, 789 | *buffer); 790 | if (x1.own) { 791 | delete x1.ptr; 792 | } 793 | if (x2.own) { 794 | delete x2.ptr; 795 | } 796 | pq.push(ConcisePtr(buffer, true)); 797 | } 798 | ConcisePtr x1 = pq.top(); 799 | pq.pop(); 800 | 801 | ConcisePtr x2 = pq.top(); 802 | pq.pop(); 803 | 804 | ConciseSet container = x1.ptr->logicalor(*x2.ptr); 805 | 806 | if (x1.own) { 807 | delete x1.ptr; 808 | } 809 | if (x2.own) { 810 | delete x2.ptr; 811 | } 812 | return container; 813 | } 814 | 815 | std::vector words; 816 | 817 | /** 818 | * Most significant set bit within the uncompressed bit string. 819 | */ 820 | int32_t last; 821 | 822 | /** 823 | * Index of the last word in words 824 | */ 825 | int32_t lastWordIndex; 826 | 827 | /** 828 | * Resets to an empty set 829 | */ 830 | void reset() { 831 | words.clear(); 832 | words.shrink_to_fit(); 833 | last = -1; 834 | lastWordIndex = -1; 835 | } 836 | 837 | uint32_t getLiteral(uint32_t word) { 838 | if (isLiteral(word)) 839 | return word; 840 | 841 | if (wah_mode) 842 | return isZeroSequence(word) ? ALL_ZEROS_LITERAL : ALL_ONES_LITERAL; 843 | 844 | // get bits from 30 to 26 and use them to set the corresponding bit 845 | // NOTE: "1 << (word >> 25)" and "1 << ((word >> 25) & 0x0000001F)" are 846 | // equivalent 847 | // NOTE: ">> 1" is required since 00000 represents no bits and 00001 the LSB 848 | // bit set 849 | uint32_t literal = (UINT32_C(1) << (word >> 25)) >> 1; 850 | return isZeroSequence(word) ? (ALL_ZEROS_LITERAL | literal) 851 | : (ALL_ONES_LITERAL & ~literal); 852 | } 853 | 854 | void clearBitsAfterInLastWord(int lastSetBit) { 855 | words[lastWordIndex] &= 856 | ALL_ZEROS_LITERAL | (UINT32_C(0xFFFFFFFF) >> (31 - lastSetBit)); 857 | } 858 | 859 | void ensureCapacity(size_t index) { 860 | if (words.size() > index) 861 | return; 862 | words.resize(index + 1); 863 | } 864 | 865 | void shrink_to_fit() { words.shrink_to_fit(); } 866 | 867 | void trimZeros() { 868 | // loop over ALL_ZEROS_LITERAL words 869 | uint32_t w; 870 | do { 871 | w = words[lastWordIndex]; 872 | if (w == ALL_ZEROS_LITERAL) { 873 | lastWordIndex--; 874 | } else if (isZeroSequence(w)) { 875 | if (wah_mode || isSequenceWithNoBits(w)) { 876 | lastWordIndex--; 877 | } else { 878 | // convert the sequence in a 1-bit literal word 879 | words[lastWordIndex] = getLiteral(w); 880 | return; 881 | } 882 | } else { 883 | // one sequence or literal 884 | return; 885 | } 886 | if (lastWordIndex < 0) { 887 | reset(); 888 | return; 889 | } 890 | } while (true); 891 | } 892 | 893 | void append(uint32_t i) { 894 | // special case of empty set 895 | if (isEmpty()) { 896 | uint32_t zeroBlocks = maxLiteralLengthDivision(i); 897 | if (zeroBlocks == 0) { 898 | words.resize(1); 899 | lastWordIndex = 0; 900 | } else if (zeroBlocks == 1) { 901 | words.resize(2); 902 | lastWordIndex = 1; 903 | words[0] = ALL_ZEROS_LITERAL; 904 | } else { 905 | words.resize(2); 906 | lastWordIndex = 1; 907 | words[0] = zeroBlocks - 1; 908 | } 909 | last = i; 910 | words[lastWordIndex] = 911 | ALL_ZEROS_LITERAL | (UINT32_C(1) << maxLiteralLengthModulus(i)); 912 | return; 913 | } 914 | 915 | // position of the next bit to set within the current literal 916 | uint32_t bit = maxLiteralLengthModulus(last) + i - last; 917 | 918 | // if we are outside the current literal, add zeros in 919 | // between the current word and the new 1-bit literal word 920 | if (bit >= MAX_LITERAL_LENGTH) { 921 | int zeroBlocks = maxLiteralLengthDivision(bit) - 1; 922 | bit = maxLiteralLengthModulus(bit); 923 | if (zeroBlocks == 0) { 924 | ensureCapacity(lastWordIndex + 1); 925 | } else { 926 | ensureCapacity(lastWordIndex + 2); 927 | appendFill(zeroBlocks, 0); 928 | } 929 | appendLiteral(ALL_ZEROS_LITERAL | UINT32_C(1) << bit); 930 | } else { 931 | words[lastWordIndex] |= UINT32_C(1) << bit; 932 | if (words[lastWordIndex] == ALL_ONES_LITERAL) { 933 | lastWordIndex--; 934 | appendLiteral(ALL_ONES_LITERAL); 935 | } 936 | } 937 | 938 | // update other info 939 | last = i; 940 | } 941 | 942 | void appendLiteral(uint32_t word) { 943 | // when we have a zero sequence of the maximum length (that is, 944 | // 00.00000.1111111111111111111111111 = 0x01FFFFFF), it could happen 945 | // that we try to append a zero literal because the result of the given 946 | // operation must be an 947 | // empty set. Whitout the following test, we would have increased the 948 | // counter of the zero sequence, thus obtaining 0x02000000 that 949 | // represents a sequence with the first bit set! 950 | if (lastWordIndex == 0 && word == ALL_ZEROS_LITERAL && 951 | words[0] == UINT32_C(0x01FFFFFF)) 952 | return; 953 | 954 | // first addition 955 | if (lastWordIndex < 0) { 956 | words[lastWordIndex = 0] = word; 957 | return; 958 | } 959 | 960 | const uint32_t lastWord = words[lastWordIndex]; 961 | if (word == ALL_ZEROS_LITERAL) { 962 | if (lastWord == ALL_ZEROS_LITERAL) 963 | words[lastWordIndex] = 1; 964 | else if (isZeroSequence(lastWord)) 965 | words[lastWordIndex]++; 966 | else if (!wah_mode && containsOnlyOneBit(getLiteralBits(lastWord))) 967 | words[lastWordIndex] = 1 | ((1 + __builtin_ctz(lastWord)) << 25); 968 | else 969 | words[++lastWordIndex] = word; 970 | } else if (word == ALL_ONES_LITERAL) { 971 | if (lastWord == ALL_ONES_LITERAL) 972 | words[lastWordIndex] = SEQUENCE_BIT | 1; 973 | else if (isOneSequence(lastWord)) 974 | words[lastWordIndex]++; 975 | else if (!wah_mode && containsOnlyOneBit(~lastWord)) 976 | words[lastWordIndex] = 977 | SEQUENCE_BIT | 1 | ((1 + __builtin_ctz(~lastWord)) << 25); 978 | else 979 | words[++lastWordIndex] = word; 980 | } else { 981 | words[++lastWordIndex] = word; 982 | } 983 | } 984 | 985 | void appendFill(uint32_t length, uint32_t fillType) { 986 | 987 | fillType &= SEQUENCE_BIT; 988 | 989 | // it is actually a literal... 990 | if (length == 1) { 991 | appendLiteral(fillType == 0 ? ALL_ZEROS_LITERAL : ALL_ONES_LITERAL); 992 | return; 993 | } 994 | // empty set 995 | if (lastWordIndex < 0) { 996 | words[lastWordIndex = 0] = fillType | (length - 1); 997 | return; 998 | } 999 | uint32_t lastWord = words[lastWordIndex]; 1000 | if (isLiteral(lastWord)) { 1001 | if (fillType == 0 && lastWord == ALL_ZEROS_LITERAL) { 1002 | words[lastWordIndex] = length; 1003 | } else if (fillType == SEQUENCE_BIT && lastWord == ALL_ONES_LITERAL) { 1004 | words[lastWordIndex] = SEQUENCE_BIT | length; 1005 | } else if (!wah_mode) { 1006 | if (fillType == 0 && containsOnlyOneBit(getLiteralBits(lastWord))) { 1007 | words[lastWordIndex] = length | ((1 + __builtin_ctz(lastWord)) << 25); 1008 | } else if (fillType == SEQUENCE_BIT && containsOnlyOneBit(~lastWord)) { 1009 | words[lastWordIndex] = 1010 | SEQUENCE_BIT | length | ((1 + __builtin_ctz(~lastWord)) << 25); 1011 | } else { 1012 | words[++lastWordIndex] = fillType | (length - 1); 1013 | } 1014 | } else { 1015 | words[++lastWordIndex] = fillType | (length - 1); 1016 | } 1017 | } else { 1018 | if ((lastWord & UINT32_C(0xC0000000)) == fillType) { 1019 | words[lastWordIndex] += length; 1020 | } else { 1021 | words[++lastWordIndex] = fillType | (length - 1); 1022 | } 1023 | } 1024 | } 1025 | 1026 | void updateLast() { 1027 | last = 0; 1028 | for (int32_t i = 0; i <= lastWordIndex; i++) { 1029 | uint32_t w = words[i]; 1030 | if (isLiteral(w)) 1031 | last += MAX_LITERAL_LENGTH; 1032 | else 1033 | last += maxLiteralLengthMultiplication(getSequenceCount(w) + 1); 1034 | } 1035 | 1036 | uint32_t w = words[lastWordIndex]; 1037 | if (isLiteral(w)) 1038 | last -= __builtin_clz(getLiteralBits(w)); 1039 | else 1040 | last--; 1041 | } 1042 | }; 1043 | 1044 | template class WordIterator { 1045 | public: 1046 | /** 1047 | * Initialize data 1048 | */ 1049 | WordIterator(const ConciseSet &p) 1050 | : IsLiteral(false), parent(p), index(-1), word(0), count(0) { 1051 | prepareNext(); 1052 | } 1053 | 1054 | /** 1055 | * @return true if there is no current word 1056 | */ 1057 | bool exhausted() { return index > parent.lastWordIndex; } 1058 | 1059 | void exhaust() { index = parent.lastWordIndex + 1; } 1060 | bool prepareNext(int c) { 1061 | count -= c; 1062 | if (count == 0) 1063 | return prepareNext(); 1064 | return true; 1065 | } 1066 | 1067 | bool prepareNext() { 1068 | if (!wah_mode && IsLiteral && count > 1) { 1069 | count--; 1070 | IsLiteral = false; 1071 | word = getSequenceWithNoBits(parent.words[index]) - 1; 1072 | return true; 1073 | } 1074 | 1075 | index++; 1076 | if (index > parent.lastWordIndex) 1077 | return false; 1078 | word = parent.words[index]; 1079 | IsLiteral = isLiteral(word); 1080 | if (!IsLiteral) { 1081 | count = getSequenceCount(word) + 1 ; 1082 | if (!wah_mode && !isSequenceWithNoBits(word)) { 1083 | IsLiteral = true; 1084 | int bit = (UINT32_C(1) << ((word >> 25) % 32)) >> 1; 1085 | word = isZeroSequence(word) ? (ALL_ZEROS_LITERAL | bit) 1086 | : (ALL_ONES_LITERAL & ~bit); 1087 | } 1088 | } else { 1089 | count = 1; 1090 | } 1091 | return true; 1092 | } 1093 | 1094 | uint32_t toLiteral() { 1095 | return ALL_ZEROS_LITERAL | 1096 | (uint32_t)(((int32_t)word << 1) >> MAX_LITERAL_LENGTH); 1097 | } 1098 | 1099 | /** true if {@link #word} is a literal */ 1100 | bool IsLiteral; 1101 | const ConciseSet &parent; 1102 | 1103 | /** current word index */ 1104 | int32_t index; 1105 | /** copy of the current word */ 1106 | uint32_t word; 1107 | 1108 | /** number of blocks in the current word (1 for literals, > 1 for sequences) 1109 | */ 1110 | uint32_t count; 1111 | 1112 | 1113 | 1114 | uint32_t flushCount() { 1115 | if(exhausted()) return 0; 1116 | uint32_t cardsize = 0; 1117 | do { 1118 | if (IsLiteral) { 1119 | cardsize += getLiteralBitCount(word); 1120 | } else { 1121 | if(word & SEQUENCE_BIT) { 1122 | cardsize += 31 * count; 1123 | } 1124 | } 1125 | } while (prepareNext()); 1126 | return cardsize; 1127 | } 1128 | 1129 | bool flushEmpty() { 1130 | if(exhausted()) return true; 1131 | do { 1132 | if (IsLiteral) { 1133 | if(!isLiteralZero(word)) return false; 1134 | } else { 1135 | if(word & SEQUENCE_BIT) 1136 | return false; 1137 | } 1138 | } while (prepareNext()); 1139 | return true; 1140 | } 1141 | 1142 | bool flush(ConciseSet &s) { 1143 | // nothing to flush 1144 | if (exhausted()) 1145 | return false; 1146 | 1147 | // try to "compress" the first few words 1148 | do { 1149 | if (IsLiteral) { 1150 | s.appendLiteral(word); 1151 | } else { 1152 | s.appendFill(count, word); 1153 | } 1154 | } while (prepareNext() && s.words[s.lastWordIndex] != word); 1155 | 1156 | // copy remaining words "as-is" 1157 | int32_t delta = parent.lastWordIndex - index + 1; 1158 | for (int i = 0; i < delta; ++i) { 1159 | s.words[s.lastWordIndex + 1 + i] = parent.words[index + i]; 1160 | } 1161 | s.lastWordIndex += delta; 1162 | s.last = parent.last; 1163 | return true; 1164 | } 1165 | }; 1166 | 1167 | template class ConciseSetBitForwardIterator { 1168 | public: 1169 | typedef std::forward_iterator_tag iterator_category; 1170 | typedef uint32_t *pointer; 1171 | typedef uint32_t &reference_type; 1172 | typedef uint32_t value_type; 1173 | typedef int32_t difference_type; 1174 | typedef ConciseSetBitForwardIterator type_of_iterator; 1175 | 1176 | /** 1177 | * Provides the location of the set bit. 1178 | */ 1179 | value_type operator*() const { return current_value; } 1180 | 1181 | bool operator<(const type_of_iterator &o) { 1182 | if (!has_value) 1183 | return false; 1184 | if (!o.has_value) 1185 | return true; 1186 | return current_value < *o; 1187 | } 1188 | 1189 | bool operator<=(const type_of_iterator &o) { 1190 | if (!o.has_value) 1191 | return true; 1192 | if (!has_value) 1193 | return false; 1194 | return current_value <= *o; 1195 | } 1196 | 1197 | bool operator>(const type_of_iterator &o) { 1198 | if (!o.has_value) 1199 | return false; 1200 | if (!has_value) 1201 | return true; 1202 | return current_value > *o; 1203 | } 1204 | 1205 | bool operator>=(const type_of_iterator &o) { 1206 | if (!has_value) 1207 | return true; 1208 | if (!o.has_value) 1209 | return false; 1210 | return current_value >= *o; 1211 | } 1212 | 1213 | type_of_iterator &operator++() { // ++i, must returned inc. value 1214 | advanceToNextBit(); 1215 | return *this; 1216 | } 1217 | 1218 | type_of_iterator operator++(int) { // i++, must return orig. value 1219 | ConciseSetBitForwardIterator orig(*this); 1220 | advanceToNextBit(); 1221 | return orig; 1222 | } 1223 | 1224 | bool operator==(const ConciseSetBitForwardIterator &o) { 1225 | if ((has_value == false) && (o.has_value == false)) 1226 | return true; 1227 | return (current_value == *o) && (has_value == o.has_value); 1228 | } 1229 | 1230 | bool operator!=(const ConciseSetBitForwardIterator &o) { 1231 | return !(*this == o); 1232 | } 1233 | ConciseSetBitForwardIterator(const ConciseSet &parent, 1234 | bool exhausted = false) 1235 | : word_location(0), current_value(0), has_value(true), word_value(0), 1236 | i(parent) { 1237 | if (exhausted) { 1238 | i.exhaust(); 1239 | has_value = false; 1240 | } else { 1241 | word_location = -1; // we will automatically advance to zero 1242 | advanceToNextBit(); 1243 | } 1244 | } 1245 | 1246 | void advanceToNextBit() { 1247 | if (word_value == 0) { 1248 | 1249 | word_location++; // one word exhausted 1250 | while (!i.exhausted()) { 1251 | if (i.IsLiteral) { 1252 | word_value = getLiteralBits(i.word); 1253 | i.prepareNext(); 1254 | if (word_value == 0) { 1255 | word_location++; // one word exhausted 1256 | continue; 1257 | } 1258 | break; 1259 | } 1260 | word_value = getLiteralBits(i.toLiteral()); 1261 | 1262 | if (word_value == 0) { 1263 | 1264 | word_location += i.count; 1265 | i.prepareNext(i.count); 1266 | } else { 1267 | i.word--; 1268 | i.prepareNext(1); 1269 | break; 1270 | } 1271 | } 1272 | } 1273 | if (word_value != 0) { 1274 | uint32_t t = word_value & (-word_value); 1275 | has_value = true; 1276 | current_value = word_location * 31 + __builtin_popcount(t - 1); 1277 | word_value ^= t; 1278 | } else { 1279 | has_value = false; 1280 | } 1281 | } 1282 | 1283 | ConciseSetBitForwardIterator & 1284 | operator=(const ConciseSetBitForwardIterator &o) = default; 1285 | ConciseSetBitForwardIterator & 1286 | operator=(ConciseSetBitForwardIterator &&o) = default; 1287 | 1288 | ~ConciseSetBitForwardIterator() = default; 1289 | 1290 | ConciseSetBitForwardIterator(const ConciseSetBitForwardIterator &o) 1291 | : i(o.i) {} 1292 | uint32_t word_location; 1293 | uint32_t current_value; 1294 | bool has_value; 1295 | uint32_t word_value; 1296 | WordIterator i; 1297 | }; 1298 | 1299 | template 1300 | inline ConciseSetBitForwardIterator 1301 | ConciseSet::begin() const { 1302 | return ConciseSetBitForwardIterator(*this); 1303 | } 1304 | 1305 | template 1306 | inline ConciseSetBitForwardIterator& 1307 | ConciseSet::end() const { 1308 | static ConciseSetBitForwardIterator endp(*this, true); 1309 | return endp; 1310 | } 1311 | -------------------------------------------------------------------------------- /include/conciseutil.h: -------------------------------------------------------------------------------- 1 | #ifndef CONCISEUTIL_H 2 | #define CONCISEUTIL_H 3 | #include 4 | 5 | /** 6 | * The highest representable integer. 7 | */ 8 | constexpr static uint32_t MAX_ALLOWED_INTEGER = 9 | 31 * (UINT32_C(1) << 25) + 30; // 1040187422 10 | 11 | /** 12 | * Maximum number of representable bits within a literal 13 | */ 14 | constexpr static uint32_t MAX_LITERAL_LENGTH = UINT32_C(31); 15 | 16 | /** 17 | * Literal that represents all bits set to 1 (and MSB = 1) 18 | */ 19 | constexpr static uint32_t ALL_ONES_LITERAL = UINT32_C(0xFFFFFFFF); 20 | 21 | /** 22 | * Literal that represents all bits set to 0 (and MSB = 1) 23 | */ 24 | constexpr static uint32_t ALL_ZEROS_LITERAL = UINT32_C(0x80000000); 25 | 26 | /** 27 | * All bits set to 1 and MSB = 0 28 | */ 29 | constexpr static uint32_t ALL_ONES_WITHOUT_MSB = UINT32_C(0x7FFFFFFF); 30 | 31 | /** 32 | * Sequence bit 33 | */ 34 | constexpr static uint32_t SEQUENCE_BIT = UINT32_C(0x40000000); 35 | 36 | /** 37 | * Calculates the modulus division by 31 in a faster way than using n % 31 38 | */ 39 | static inline uint32_t maxLiteralLengthModulus(uint32_t n) { 40 | return n % 31; 41 | // following code is a bad idea. Compilers can compiler n % 31 to something 42 | // faster. 43 | /** 44 | uint32_t m = (n & UINT32_C(0xC1F07C1F)) + ((n >> 5) & UINT32_C(0xC1F07C1F)); 45 | m = (m >> 15) + (m & UINT32_C(0x00007FFF)); 46 | if (m <= 31) 47 | return m == 31 ? 0 : m; 48 | m = (m >> 5) + (m & UINT32_C(0x0000001F)); 49 | if (m <= 31) 50 | return m == 31 ? 0 : m; 51 | m = (m >> 5) + (m & UINT32_C(0x0000001F)); 52 | if (m <= 31) 53 | return m == 31 ? 0 : m; 54 | m = (m >> 5) + (m & UINT32_C(0x0000001F)); 55 | if (m <= 31) 56 | return m == 31 ? 0 : m; 57 | m = (m >> 5) + (m & UINT32_C(0x0000001F)); 58 | if (m <= 31) 59 | return m == 31 ? 0 : m; 60 | m = (m >> 5) + (m & UINT32_C(0x0000001F)); 61 | return m == 31 ? 0 : m; 62 | **/ 63 | } 64 | 65 | /** 66 | * Calculates the multiplication by 31 in a faster way than using n * 31 67 | */ 68 | static inline uint32_t maxLiteralLengthMultiplication(uint32_t n) { 69 | return (n << 5) - n; // a good compiler should do this on its own 70 | } 71 | 72 | /** 73 | * Calculates the division by 31 74 | */ 75 | static inline uint32_t maxLiteralLengthDivision(uint32_t n) { return n / 31; } 76 | 77 | /** 78 | * Checks whether a word is a literal one 79 | */ 80 | static inline bool isLiteral(uint32_t word) { 81 | // "word" must be 1* 82 | // NOTE: this is faster than "return (word & 0x80000000) == 0x80000000" 83 | return (word & UINT32_C(0x80000000)) != 0; 84 | } 85 | 86 | /** 87 | * Checks whether a word contains a sequence of 1's 88 | */ 89 | static inline bool isOneSequence(uint32_t word) { 90 | // "word" must be 01* 91 | return (word & UINT32_C(0xC0000000)) == SEQUENCE_BIT; 92 | } 93 | 94 | /** 95 | * Checks whether a word contains a sequence of 0's 96 | */ 97 | static inline bool isZeroSequence(uint32_t word) { 98 | // "word" must be 00* 99 | return (word & UINT32_C(0xC0000000)) == 0; 100 | } 101 | 102 | /** 103 | * Checks whether a word contains a sequence of 0's with no set bit, or 1's 104 | * with no unset bit. 105 | */ 106 | static inline bool isSequenceWithNoBits(uint32_t word) { 107 | // "word" must be 0?00000* 108 | return (word & UINT32_C(0xBE000000)) == UINT32_C(0x00000000); 109 | } 110 | 111 | /** 112 | * Gets the number of blocks of 1's or 0's stored in a sequence word 113 | */ 114 | template 115 | static inline uint32_t getSequenceCount(uint32_t word) { 116 | // get the 25 LSB bits 117 | return word & (wah_mode? UINT32_C(0x3FFFFFFF) : UINT32_C(0x01FFFFFF)); 118 | } 119 | 120 | 121 | /** 122 | * Clears the (un)set bit in a sequence 123 | */ 124 | static inline uint32_t getSequenceWithNoBits(uint32_t word) { 125 | // clear 29 to 25 LSB bits 126 | return (word & UINT32_C(0xC1FFFFFF)); 127 | } 128 | /** 129 | * Returns true when the given 31-bit literal string (namely, 130 | * with MSB set) contains only one set bit 131 | */ 132 | static inline bool containsOnlyOneBit(uint32_t literal) { 133 | return (literal & (literal - 1)) == 0; 134 | } 135 | 136 | /** 137 | * Gets the position of the flipped bit within a sequence word. If the 138 | * sequence has no set/unset bit, returns -1. 139 | */ 140 | static inline int getFlippedBit(uint32_t word) { 141 | // get bits from 30 to 26 142 | // NOTE: "-1" is required since 00000 represents no bits and 00001 the LSB bit 143 | // set 144 | return ((word >> 25) & UINT32_C(0x0000001F)) - 1; 145 | } 146 | 147 | static inline uint32_t concise_xor(uint32_t literal1, uint32_t literal2) { 148 | return ALL_ZEROS_LITERAL | (literal1 ^ literal2); 149 | } 150 | static inline uint32_t concise_andnot(uint32_t literal1, uint32_t literal2) { 151 | return ALL_ZEROS_LITERAL | (literal1 & (~literal2)); 152 | } 153 | 154 | static inline uint32_t concise_and(uint32_t literal1, uint32_t literal2) { 155 | return ALL_ZEROS_LITERAL | (literal1 & literal2); 156 | } 157 | 158 | /** 159 | * Gets the bits contained within the literal word 160 | */ 161 | static inline uint32_t getLiteralBits(uint32_t word) { 162 | return ALL_ONES_WITHOUT_MSB & word; 163 | } 164 | /** 165 | * Gets the number of set bits within the literal word 166 | */ 167 | static inline int getLiteralBitCount(uint32_t word) { 168 | return __builtin_popcount(getLiteralBits(word)); 169 | } 170 | 171 | static inline bool isLiteralZero(uint32_t word) { 172 | return getLiteralBits(word) == 0; 173 | } 174 | 175 | #endif 176 | -------------------------------------------------------------------------------- /tests/unit.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include "concise.h" 6 | 7 | template void checkflush() { 8 | std::cout << "[[[" << __PRETTY_FUNCTION__ << "]]]" << std::endl; 9 | uint32_t data[] = { 10 | 3755999175, 4294934513, 4292866299, 1073741825, 4293394431, 2952790015, 11 | 3756582647, 2680160199, 3758030720, 4294967231, 2281422847, 3758175102, 12 | 4294967295, 2180480895, 4294967232, 4208984063, 3489660896, 4294967295, 13 | 4287102967, 4294967295, 4294966399, 4294949887, 4294967267, 4294900735, 14 | 2147483903, 4294966272, 1073741828, 4294966303, 1073741833, 4294950911, 15 | 3220963292, 4294967040, 1073741827, 4286545919, 1073741827, 4294963199, 16 | 1644167169, 4276084735, 1073741825, 4278190079, 1174405127, 1107296278, 17 | 4169072639, 2684289023, 1107296257, 3489660927, 1073741825, 1241513985, 18 | 4294967287, 3766484991, 2280131696, 2279606018, 201326594, 4093640704, 19 | 2415918081, 2680422384, 3091529696, 2406514692, 2147549564, 2147483648, 20 | 4161142784, 2814443519, 3089215115, 2147483648, 4229955520, 2240213951, 21 | 2365585408, 3758096416, 2147483648, 2147499008, 3724602370, 2147483692, 22 | 4294967295, 33554434, 4227858432, 4294705155, 2415919103, 3231715328, 23 | 2147512831, 2147483648, 2155871792, 2147483648, 2147548928, 2147552256, 24 | 2, 4294967288, 2147483711, 42, 4261412864, 2147491839, 25 | 3, 4294901760, 1073741829, 2148532223, 2, 3188489984, 26 | 2198083456, 2147483648, 2147516288, 5, 4294967040, 4294967295, 27 | 2214594559, 1107296314, 2147755391, 2802001856, 2147483648, 2161252320, 28 | 4197981184, 2612015615, 4028616012, 2147585087, 2147483648, 4294967232, 29 | 3221094407, 4035960832, 4275838855, 2281766776, 2384461861, 4294642816, 30 | 4294967295, 2147499246, 1, 4026531840, 2147876991, 2163729394, 31 | 2164260868, 2831155280, 2155626498, 2380267521, 2147483679, 2147516288, 32 | 2155868160, 2147483648, 2147483679, 23, 2952790016, 4286578688, 33 | 2147485695, 2147483648, 4294901760, 2147614719, 2147483648, 2147499904, 34 | 4290772992, 2147485695, 2147483772, 4294967168, 4294967295, 2214592511, 35 | 2147483648, 4278190080, 1073741825, 2147483663, 2214591488, 4278190080, 36 | 4294967295, 2147483775, 3498082307, 3893362688, 2385780971, 2348810240, 37 | 2218786304, 4294443013, 2153776127, 2147483648, 4294959104, 2147491839, 38 | 4278321024, 4261670943, 4286955567, 3257925631, 2147483679, 1, 39 | 4294901984, 4286775299, 2147680303, 2972715008, 4177535616, 2147483666, 40 | 2147484640, 2147484152, 14, 4294705152, 2147484159, 2147483648, 41 | 2147614592, 2147483648, 4294967288, 2181038079, 2147483648, 4294967040, 42 | 3114287111, 2650800128, 2148598052, 2630811648, 2148401154, 2413864232, 43 | 2268065822, 2147486832, 3772792576, 2147484128, 2, 3221225472, 44 | 4294959105, 4294967295, 4278222847, 4294967295, 4160815047, 2216689663, 45 | 2277904564, 3791810048, 2178941184, 1, 4278190080, 2155872255, 46 | 4294937724, 1073741845, 3256877055, 2147488928, 1, 1107296550, 47 | 2684354559, 2149548100, 2, 2149580796, 3237994496, 2152726496, 48 | 4, 2181038048, 2147483648, 2164260848, 637534212, 4160847872, 49 | 3087090751, 2147484544, 3284140032, 4294739967, 4294967295, 2669641727, 50 | 2147483648, 2214461440, 2454715904, 2684354572, 4027027472, 3219382499, 51 | 2, 1107296258, 2214592511, 27, 4261412864, 1073741831, 52 | 2148007935, 10, 4290772992, 1073741827, 3221225471, 2, 53 | 3758096384, 3204939783, 2147516408, 4294965248, 3220430849, 2147483648, 54 | 2411724800, 2147485688, 3087008206, 2147483648, 4162813952, 4294967292, 55 | 1073741834, 4294967071, 1845493762, 2650800079, 2148401664, 4294836224, 56 | 2147484671, 26, 4227858432, 1073741825, 2147516415, 2, 57 | 4294443008, 4294967295, 2214592511, 2147483648, 2264924160, 2198208512, 58 | 2147991552, 14, 3758096384, 2148007935, 3, 4292870144, 59 | 3221225471, 2, 1040187395, 2147485568, 3758161919, 2147975184, 60 | 2, 4294967232, 2147485695, 4026531840, 2148925443, 4291817472, 61 | 2147484611, 2279620608, 2147483768, 13, 4294967280, 1073741826, 62 | 2147483655, 382, 3758096384, 1073741837, 2147549183, 64, 63 | 4294965248, 1073741858, 49, 2147499008, 1, 3758128128, 64 | 4161798143, 2, 2684354556, 2415902720, 2147516416, 2953838592, 65 | 2550136856, 5, 4294836224, 2285920259, 3372236800, 2147857347, 66 | 2151677951, 2147483648, 4294446080, 2445435391, 2273443312, 2174992384, 67 | 4227921920, 2348929540, 1, 4227858432, 2281701375, 30, 68 | 4290772992, 1073741831, 2147483651, 2147483648, 2153905152, 2147664944, 69 | 2277554412, 2147745856, 2147483774, 2180907008, 2684403792, 2149548032, 70 | 7, 4292870144, 2684354559, 2282291200, 4278190080, 2147545103, 71 | 2151678232, 1, 2157968896, 3229351944, 2348810273, 2147643896, 72 | 3221848256, 2157969601, 2148548608, 2148015104, 3187671041, 2147483648, 73 | 436207628, 301989889, 4290772992, 4294967295, 2415919103, 1, 74 | 2147498012, 2149056528, 2147483652, 2683830272, 2147483779, 3221062080, 75 | 4027056127, 4294966911, 4294967295, 4194287615, 4294967295, 3489644543, 76 | 4227858431, 4294606799, 4260364287, 4294967232, 1073741825, 3756523503, 77 | 4294837247, 2681208831, 2147483711, 2147483648, 4294967280, 1073741830, 78 | 4294967280, 1073741834, 4225732607, 4294442975, 4270284787, 4026531840, 79 | 1107296262, 1442840578, 2113929217, 4294832127, 2147549183}; 80 | const int N = sizeof(data) / sizeof(uint32_t); 81 | ConciseSet c; 82 | c.words.resize(N); 83 | for (int i = 0; i < N; ++i) 84 | c.words[++c.lastWordIndex] = data[i]; 85 | ConciseSet res; 86 | res.words.resize(c.words.size()); 87 | WordIterator thisItr(c); 88 | thisItr.flush(res); 89 | assert(c.lastWordIndex == res.lastWordIndex); 90 | assert(c.size() == res.size()); 91 | } 92 | 93 | template void heaportest() { 94 | std::cout << "[[[" << __PRETTY_FUNCTION__ << "]]]" << std::endl; 95 | ConciseSet *test[3]; 96 | ConciseSet test1; 97 | for (int k = 0; k < 100; k += 7) 98 | test1.add(k); 99 | ConciseSet test2; 100 | for (int k = 0; k < 100; k += 15) 101 | test2.add(k); 102 | ConciseSet test3; 103 | for (int k = 0; k < 100; k += 2) 104 | test3.add(k); 105 | test[0] = &test1; 106 | test[1] = &test2; 107 | test[2] = &test3; 108 | ConciseSet answer = ConciseSet::fast_logicalor( 109 | 3, (const ConciseSet **)&test[0]); 110 | assert(answer.size() == 60); 111 | size_t longcounter = 0; 112 | for (auto i = answer.begin(); i != answer.end(); ++i) 113 | longcounter++; 114 | assert(longcounter == 60); 115 | ConciseSet tmp; 116 | size_t expectedandsize1 = answer.logicalandCount(test1); 117 | tmp = answer.logicaland(test1); 118 | assert(expectedandsize1 == tmp.size()); 119 | assert(tmp.size() == test1.size()); 120 | size_t expectedandsize2 = answer.logicalandCount(test2); 121 | tmp = answer.logicaland(test2); 122 | assert(expectedandsize2 == tmp.size()); 123 | assert(tmp.size() == test2.size()); 124 | size_t expectedandsize3 = answer.logicalandCount(test3); 125 | tmp = answer.logicaland(test3); 126 | assert(expectedandsize3 == tmp.size()); 127 | assert(tmp.size() == test3.size()); 128 | } 129 | 130 | template void basictest() { 131 | std::cout << "[[[" << __PRETTY_FUNCTION__ << "]]]" << std::endl; 132 | ConciseSet test1; 133 | test1.add(1); 134 | assert(test1.contains(1)); 135 | test1.add(2); 136 | assert(test1.contains(2)); 137 | test1.add(3); 138 | assert(test1.contains(3)); 139 | test1.add(100); 140 | assert(test1.contains(100)); 141 | test1.add(1000); 142 | assert(test1.contains(1000)); 143 | assert(test1.size() == 5); 144 | ConciseSet test2; 145 | test2.add(0); 146 | assert(test2.contains(0)); 147 | test2.add(2); 148 | assert(test2.contains(2)); 149 | test2.add(3); 150 | assert(test2.contains(3)); 151 | test2.add(100); 152 | assert(test2.contains(100)); 153 | test2.add(3000); 154 | assert(test2.contains(3000)); 155 | assert(test2.size() == 5); 156 | ConciseSet tmp; 157 | assert(test1.logicalorCount(test2) == 7); 158 | tmp = test1.logicalor(test2); 159 | assert(tmp.size() == 7); 160 | assert(test1.logicalandCount(test2) == 3); 161 | tmp = test1.logicaland(test2); 162 | assert(tmp.size() == 3); 163 | tmp.add(100000); 164 | assert(tmp.size() == 4); 165 | tmp.shrink_to_fit(); 166 | } 167 | 168 | template void longtest() { 169 | std::cout << "[[[" << __PRETTY_FUNCTION__ << "]]]" << std::endl; 170 | 171 | ConciseSet testc; 172 | for (int k = 0; k < 1000; ++k) { 173 | testc.add(k * 2); 174 | testc.add(k * 2 + 1); 175 | } 176 | assert(testc.size() == 2000); 177 | for (int k = 0; k < 1000; ++k) { 178 | assert(testc.contains(k * 2)); 179 | assert(testc.contains(k * 2 + 1)); 180 | } 181 | ConciseSet test1; 182 | for (int k = 0; k < 1000; ++k) { 183 | test1.add(k * 2); 184 | } 185 | for (int k = 0; k < 1000; ++k) { 186 | assert(test1.contains(k * 2)); 187 | assert(!test1.contains(k * 2 + 1)); 188 | } 189 | assert(test1.size() == 1000); 190 | ConciseSet shouldbetest1; 191 | assert(testc.logicalandCount(test1) == 1000); 192 | shouldbetest1 = testc.logicaland(test1); 193 | assert(shouldbetest1.size() == 1000); 194 | for (int k = 0; k < 1000; ++k) { 195 | assert(shouldbetest1.contains(k * 2)); 196 | assert(!shouldbetest1.contains(k * 2 + 1)); 197 | } 198 | ConciseSet test2; 199 | for (int k = 0; k < 1000; ++k) { 200 | test2.add(k * 2 + 1); 201 | } 202 | for (int k = 0; k < 1000; ++k) { 203 | assert(!test2.contains(k * 2)); 204 | assert(test2.contains(k * 2 + 1)); 205 | } 206 | assert(test2.size() == 1000); 207 | 208 | ConciseSet tmp; 209 | assert(test1.logicalorCount(test2) == 2000); 210 | tmp = test1.logicalor(test2); 211 | assert(tmp.size() == 2000); 212 | for (int k = 0; k < 1000; ++k) { 213 | assert(tmp.contains(k * 2)); 214 | assert(tmp.contains(k * 2 + 1)); 215 | } 216 | assert(tmp.intersects(test2)); 217 | assert(tmp.logicalandCount(test2) == 1000); 218 | tmp = tmp.logicaland(test2); 219 | assert(tmp.size() == 1000); 220 | assert(test1.intersects(test2) == false); 221 | assert(test1.logicalandCount(test2) == 0); 222 | tmp = test1.logicaland(test2); 223 | assert(tmp.size() == 0); 224 | } 225 | 226 | static std::set subtract(std::set h1, 227 | std::set h2) { 228 | std::set answer; 229 | answer.clear(); 230 | for (std::set::iterator i = h1.begin(); i != h1.end(); i++) { 231 | if (h2.find(*i) == h2.end()) 232 | answer.insert(*i); 233 | } 234 | return answer; 235 | } 236 | 237 | static std::set symmetrically_subtract(std::set h1, 238 | std::set h2) { 239 | std::set answer; 240 | answer.insert(h1.begin(), h1.end()); 241 | for (std::set::iterator i = h2.begin(); i != h2.end(); i++) { 242 | auto x = answer.find(*i); 243 | if (x == answer.end()) 244 | answer.insert(*i); 245 | else 246 | answer.erase(x); 247 | } 248 | return answer; 249 | } 250 | 251 | static std::set unite(std::set s1, std::set s2) { 252 | std::set answer; 253 | for (std::set::iterator i = s1.begin(); i != s1.end(); i++) { 254 | answer.insert(*i); 255 | } 256 | for (std::set::iterator i = s2.begin(); i != s2.end(); i++) { 257 | answer.insert(*i); 258 | } 259 | return answer; 260 | } 261 | 262 | static std::set intersect(std::set s1, 263 | std::set s2) { 264 | std::set answer; 265 | for (std::set::iterator i = s1.begin(); i != s1.end(); i++) { 266 | if (s2.find(*i) != s2.end()) { 267 | answer.insert(*i); 268 | } 269 | } 270 | return answer; 271 | } 272 | 273 | template 274 | static bool equals(std::set s, ConciseSet c) { 275 | if (s.size() != c.size()) 276 | return false; 277 | // we go one way 278 | for (std::set::iterator i = s.begin(); i != s.end(); i++) { 279 | if (!c.contains(*i)) 280 | return false; 281 | } 282 | // we go both ways 283 | std::set::iterator a = s.begin(); 284 | auto b = c.begin(); 285 | for (; (a != s.end()) && (b != c.end()); a++, b++) { 286 | if (*a != *b) { 287 | return false; 288 | } 289 | } 290 | 291 | // we go another way 292 | for (auto i = c.begin(); i != c.end(); i++) { 293 | if (s.find(*i) == s.end()) { 294 | std::cout << " There is probably an issue with the ConciseSet iterators? " 295 | << std::endl; 296 | return false; 297 | } 298 | } 299 | return true; 300 | } 301 | 302 | template void toytest() { 303 | 304 | std::cout << "[[[" << __PRETTY_FUNCTION__ << "]]]" << std::endl; 305 | 306 | ConciseSet test1; 307 | std::set set1; 308 | 309 | for (int k = 0; k < 30; k += 3) { 310 | test1.add(k); 311 | set1.insert(k); 312 | } 313 | 314 | ConciseSet test2; 315 | std::set set2; 316 | for (int k = 0; k < 30; k += 5) { 317 | test2.add(k); 318 | set2.insert(k); 319 | } 320 | std::set trueunion = unite(set1, set2); 321 | std::set trueinter = intersect(set1, set2); 322 | std::set truesubtract = subtract(set1, set2); 323 | std::set truesymsubtract = symmetrically_subtract(set1, set2); 324 | ConciseSet union1; 325 | ConciseSet union2; 326 | size_t expunion1 = test1.logicalorCount(test2); 327 | union1 = test1.logicalor(test2); 328 | assert(union1.size() == expunion1); 329 | union2 = test1.logicalor(test2); 330 | assert(equals(trueunion, union1)); 331 | assert(equals(trueunion, union2)); 332 | ConciseSet intersect1; 333 | ConciseSet intersect2; 334 | size_t expinter1 = test1.logicalandCount(test2); 335 | intersect1 = test1.logicaland(test2); 336 | assert(expinter1 == intersect1.size()); 337 | intersect2 = test1.logicaland(test2); 338 | assert(equals(trueinter, intersect1)); 339 | assert(equals(trueinter, intersect2)); 340 | ConciseSet symsubtract1; 341 | ConciseSet symsubtract2; 342 | symsubtract1 = test1.logicalxor(test2); 343 | symsubtract2 = test1.logicalxor(test2); 344 | assert(equals(truesymsubtract, symsubtract1)); 345 | assert(equals(truesymsubtract, symsubtract2)); 346 | ConciseSet subtract1; 347 | ConciseSet subtract2; 348 | subtract1 = test1.logicalandnot(test2); 349 | subtract2 = test1.logicalandnot(test2); 350 | assert(equals(truesubtract, subtract1)); 351 | assert(equals(truesubtract, subtract2)); 352 | } 353 | template void iteratortest() { 354 | std::cout << "[[[" << __PRETTY_FUNCTION__ << "]]]" << std::endl; 355 | 356 | uint32_t data[] = {3515, 5185, 7796, 33347, 45641, 51779, 53188, 357 | 60664, 68454, 68574, 74234, 78260, 82877, 100026, 358 | 111016, 116633, 117789, 119044, 119103, 146771, 159597, 359 | 163210, 181124, 182343, 187302, 187876, 191494}; 360 | const int N = sizeof(data) / sizeof(uint32_t); 361 | ConciseSet test1; 362 | for (int k = 0; k < N; ++k) { 363 | test1.add(data[k]); 364 | } 365 | assert(test1.size() == N); 366 | int c = 0; 367 | for (auto i = test1.begin(); i != test1.end(); ++i) { 368 | assert(c < N); 369 | assert(*i == data[c]); 370 | c++; 371 | } 372 | assert(c == N); 373 | } 374 | 375 | template void variedtest() { 376 | 377 | std::cout << "[[[" << __PRETTY_FUNCTION__ << "]]]" << std::endl; 378 | 379 | ConciseSet test1; 380 | std::set set1; 381 | 382 | for (int k = 0; k < 1000; ++k) { 383 | test1.add(k); 384 | set1.insert(k); 385 | } 386 | for (int k = 1000; k < 2000; k += 2) { 387 | test1.add(k); 388 | set1.insert(k); 389 | } 390 | for (int k = 5000; k < 6000; k += 1) { 391 | test1.add(k); 392 | set1.insert(k); 393 | } 394 | for (int k = 6500; k < 8000; k += 1) { 395 | test1.add(k); 396 | set1.insert(k); 397 | } 398 | 399 | ConciseSet test2; 400 | std::set set2; 401 | for (int k = 0; k < 1100; k += 3) { 402 | test2.add(k); 403 | set2.insert(k); 404 | } 405 | for (int k = 1100; k < 2000; ++k) { 406 | test2.add(k); 407 | set2.insert(k); 408 | } 409 | for (int k = 5500; k < 6500; ++k) { 410 | test2.add(k); 411 | set2.insert(k); 412 | } 413 | for (int k = 7500; k < 8100; ++k) { 414 | test2.add(k); 415 | set2.insert(k); 416 | } 417 | std::set trueunion = unite(set1, set2); 418 | std::set trueinter = intersect(set1, set2); 419 | std::set truesubtract = subtract(set1, set2); 420 | std::set truesymsubtract = symmetrically_subtract(set1, set2); 421 | 422 | ConciseSet union1; 423 | ConciseSet union2; 424 | size_t expunion1 = test1.logicalorCount(test2); 425 | union1 = test1.logicalor(test2); 426 | assert(union1.size() == expunion1); 427 | union2 = test1.logicalor(test2); 428 | assert(equals(trueunion, union1)); 429 | assert(equals(trueunion, union2)); 430 | ConciseSet intersect1; 431 | ConciseSet intersect2; 432 | size_t expinter1 = test1.logicalandCount(test2); 433 | intersect1 = test1.logicaland(test2); 434 | assert(expinter1 == intersect1.size()); 435 | intersect2 = test1.logicaland(test2); 436 | assert(equals(trueinter, intersect1)); 437 | assert(equals(trueinter, intersect2)); 438 | ConciseSet symsubtract1; 439 | ConciseSet symsubtract2; 440 | symsubtract1 = test1.logicalxor(test2); 441 | symsubtract2 = test1.logicalxor(test2); 442 | assert(equals(truesymsubtract, symsubtract1)); 443 | assert(equals(truesymsubtract, symsubtract2)); 444 | ConciseSet subtract1; 445 | ConciseSet subtract2; 446 | subtract1 = test1.logicalandnot(test2); 447 | subtract2 = test1.logicalandnot(test2); 448 | assert(equals(truesubtract, subtract1)); 449 | assert(equals(truesubtract, subtract2)); 450 | } 451 | 452 | template void realtest() { 453 | 454 | std::cout << "[[[" << __PRETTY_FUNCTION__ << "]]]" << std::endl; 455 | 456 | uint32_t data1[] = { 457 | 1, 13, 62, 120, 132, 133, 134, 136, 143, 159, 170, 175, 458 | 191, 222, 233, 255, 274, 317, 342, 346, 375, 388, 419, 453, 459 | 455, 470, 485, 503, 506, 523, 536, 542, 548, 556, 570, 574, 460 | 587, 600, 603, 622, 634, 674, 703, 714, 757, 764, 765, 768, 461 | 796, 836, 841, 844, 851, 867, 881, 942, 946, 947, 960, 985, 462 | 1035, 1045, 1106, 1126, 1128, 1135, 1165, 1191, 1201, 1239, 1248, 1267, 463 | 1286, 1357, 1362, 1374, 1375, 1388, 1392, 1401, 1405, 1475, 1477, 1494, 464 | 1509, 1517, 1518, 1535, 1544, 1548, 1553, 1564, 1573, 1577, 1584, 1589, 465 | 1610, 1615, 1629, 1643, 1649, 1673, 1677, 1702, 1718, 1723, 1730, 1734, 466 | 1749, 1758, 1774, 1846, 1847, 1878, 1880, 1912, 1913, 1915, 1919, 1946, 467 | 1962, 1984, 2034, 2038, 2040, 2082, 2083, 2094, 2101, 2102, 2110, 2125, 468 | 2129, 2134, 2136, 2151, 2155, 2165, 2176, 2204, 2209, 2229, 2237, 2258, 469 | 2276, 2283, 2284, 2292, 2295, 2300, 2306, 2308, 2339, 2343, 2347, 2370, 470 | 2386, 2420, 2430, 2450, 2451, 2454, 2459, 2462, 2463, 2489, 2496, 2511, 471 | 2521, 2522, 2525, 2535, 2537, 2563, 2564, 2571, 2576, 2609, 2619, 2626, 472 | 2638, 2647, 2654, 2672, 2702, 2703, 2719, 2743, 2744, 2757, 2798, 2816, 473 | 2836, 2864, 2898, 2907, 2916, 2923, 2934, 2935, 2941, 3032, 3074, 3083, 474 | 3113, 3126, 3130, 3137, 3160, 3169, 3192, 3196, 3198, 3215, 3223, 3258, 475 | 3259, 3293, 3309, 3350, 3394, 3416, 3427, 3442, 3455, 3521, 3525, 3533, 476 | 3536, 3553, 3561, 3593, 3615, 3630, 3659, 3677, 3698, 3707, 3718, 3722, 477 | 3742, 3752, 3753, 3757, 3759, 3832, 3834, 3856, 3857, 3860, 3862, 3866, 478 | 3886, 3891, 3896, 3897, 3945, 3962, 3964, 3977, 3986, 3995, 4006, 4011, 479 | 4015, 4022, 4030, 4058, 4065, 4086, 4100, 4133, 4134, 4152, 4182, 4198, 480 | 4207, 4214, 4246, 4275, 4280, 4301, 4303, 4308, 4311, 4312, 4339, 4344, 481 | 4357, 4361, 4372, 4384, 4387, 4395, 4412, 4420, 4449, 4459, 4462, 4504, 482 | 4509, 4523, 4531, 4536, 4545, 4565, 4616, 4627, 4677, 4679, 4699, 4702, 483 | 4725, 4728, 4740, 4746, 4750, 4777, 4787, 4805, 4812, 4821, 4828, 4837, 484 | 4844, 4860, 4873, 4876, 4878, 4908, 4920, 4926, 4928, 4939, 4958, 4963, 485 | 4970, 4990, 4994, 4995, 5007, 5021, 5029, 5050, 5051, 5062, 5063, 5076, 486 | 5095, 5132, 5168, 5191, 5195, 5218, 5243, 5258, 5304, 5326, 5341, 5347, 487 | 5373, 5379, 5407, 5431, 5446, 5458, 5465, 5477, 5486, 5501, 5522, 5549, 488 | 5578, 5603, 5604, 5626, 5640, 5643, 5663, 5664, 5679, 5686, 5687, 5692, 489 | 5701, 5710, 5723, 5724, 5759, 5760, 5761, 5767, 5784, 5788, 5796, 5807, 490 | 5828, 5839, 5846, 5851, 5881, 5909, 5910, 5911, 5923, 5954, 5989, 5993, 491 | 6002, 6003, 6020, 6022, 6034, 6070, 6083, 6087, 6118, 6123, 6127, 6134, 492 | 6140, 6174, 6180, 6197, 6218, 6227, 6237, 6250, 6260, 6275, 6280, 6304, 493 | 6325, 6338, 6352, 6366, 6384, 6397, 6417, 6425, 6446, 6460, 6464, 6495, 494 | 6504, 6526, 6532, 6543, 6564, 6583, 6643, 6650, 6651, 6701, 6704, 6730, 495 | 6743, 6750, 6776, 6777, 6779, 6784, 6798, 6807, 6846, 6850, 6871, 6922, 496 | 6924, 6952, 6955, 6956, 6968, 6974, 6998, 7003, 7029, 7038, 7090, 7106, 497 | 7107, 7108, 7133, 7193, 7209, 7210, 7212, 7228, 7240, 7242, 7289, 7301, 498 | 7325, 7351, 7391, 7393, 7404, 7429, 7453, 7490, 7491, 7495, 7545, 7580, 499 | 7601, 7611, 7660, 7670, 7682, 7713, 7782, 7790, 7803, 7823, 7828, 7830, 500 | 7843, 7846, 7876, 7885, 7913, 7922, 7932, 7942, 7947, 7964, 7983, 7996, 501 | 7998, 8003, 8007, 8011, 8016, 8034, 8054, 8076, 8086, 8118, 8123, 8134, 502 | 8143, 8149, 8158, 8160, 8208, 8212, 8230, 8245, 8252, 8253, 8257, 8260, 503 | 8295, 8303, 8331, 8350, 8362, 8365, 8367, 8370, 8374, 8378, 8409, 8441, 504 | 8510, 8541, 8571, 8587, 8592, 8621, 8664, 8681, 8692, 8695, 8699, 8719, 505 | 8729, 8776, 8783, 8804, 8812, 8827, 8857, 8904, 8928, 8938, 8961, 8969, 506 | 8974, 8995, 8998, 9002, 9004, 9017, 9028, 9029, 9048, 9068, 9079, 9098}; 507 | const int N1 = sizeof(data1) / sizeof(uint32_t); 508 | 509 | uint32_t data2[] = { 510 | 0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 511 | 15, 16, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 512 | 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 513 | 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 514 | 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 515 | 72, 73, 74, 75, 76, 77, 78, 80, 81, 82, 83, 84, 85, 86, 516 | 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 100, 101, 517 | 102, 103, 104, 105, 106, 107, 108, 110, 111, 112, 113, 114, 115, 116, 518 | 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 128, 130, 131, 132, 519 | 133, 134, 135, 136, 137, 138, 139, 140, 141, 143, 144, 145, 146, 148, 520 | 149, 150, 151, 152, 153, 154, 155, 157, 158, 159, 160, 161, 162, 164, 521 | 165, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 522 | 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 523 | 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 524 | 208, 209, 210, 211, 212, 213, 214, 215, 217, 219, 220, 221, 222, 223, 525 | 224, 225, 226, 227, 228, 229, 231, 233, 234, 235, 236, 237, 238, 239, 526 | 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 527 | 254, 255, 256, 257, 258, 259, 260, 261, 263, 264, 265, 266, 267, 268, 528 | 270, 271, 272, 273, 274, 275, 276, 277, 278, 279, 280, 281, 282, 283, 529 | 284, 285, 286, 287, 288, 289, 290, 291, 292, 294, 295, 296, 297, 298, 530 | 299, 300, 301, 302, 303, 304, 305, 306, 307, 308, 309, 310, 311, 313, 531 | 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, 325, 326, 327, 328, 532 | 329, 330, 331, 332, 333, 334, 335, 336, 337, 338, 339, 340, 341, 342, 533 | 343, 344, 345, 346, 347, 348, 349, 350, 351, 352, 353, 355, 356, 357, 534 | 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 535 | 372, 373, 374, 375, 376, 377, 379, 380, 381, 382, 383, 385, 386, 387, 536 | 388, 389, 390, 391, 392, 393, 394, 395, 396, 397, 398, 400, 401, 402, 537 | 403, 405, 406, 407, 408, 409, 410, 411, 412, 413, 414, 415, 416, 417, 538 | 418, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 430, 431, 539 | 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 443, 444, 445, 540 | 446, 447, 448, 449, 450, 451, 452, 453, 454, 455, 456, 457, 458, 459, 541 | 460, 461, 462, 463, 465, 466, 467, 468, 469, 470, 471, 472, 473, 474, 542 | 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 543 | 489, 490, 491, 492, 493, 494, 495, 496, 497, 498, 499, 500, 501, 502, 544 | 503, 504, 505, 506, 508, 509, 510, 511, 512, 513, 514, 515, 516, 517, 545 | 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 546 | 532, 533, 534, 535, 536, 537, 538, 539, 540, 541, 542, 543, 544, 545, 547 | 546, 548, 549, 550, 551, 552, 553, 554, 556, 557, 558, 559, 560, 561, 548 | 562, 563, 564, 565, 566, 567, 568, 569, 570, 572, 573, 574, 575, 576, 549 | 577, 578, 579, 580, 581, 582, 584, 585, 586, 587, 588, 589, 590, 591, 550 | 592, 593, 594, 596, 597, 598, 599, 600, 601, 602, 603, 604, 605, 606, 551 | 607, 608, 609, 610, 611, 613, 614, 615, 616, 617, 618, 619, 620, 621, 552 | 622, 623, 625, 626, 628, 629, 630, 631, 632, 634, 635, 636}; 553 | const int N2 = sizeof(data2) / sizeof(uint32_t); 554 | 555 | ConciseSet test1; 556 | std::set set1; 557 | 558 | for (int k = 0; k < N1; ++k) { 559 | test1.add(data1[k]); 560 | set1.insert(data1[k]); 561 | } 562 | 563 | ConciseSet test2; 564 | std::set set2; 565 | for (int k = 0; k < N2; ++k) { 566 | test2.add(data2[k]); 567 | set2.insert(data2[k]); 568 | } 569 | 570 | 571 | assert(test1.equals(test1)); 572 | assert(test2.equals(test2)); 573 | assert(!test2.equals(test1)); 574 | assert(!test1.equals(test2)); 575 | 576 | std::set trueunion = unite(set1, set2); 577 | std::set trueinter = intersect(set1, set2); 578 | std::set truesubtract = subtract(set1, set2); 579 | std::set truesymsubtract = symmetrically_subtract(set1, set2); 580 | 581 | ConciseSet union1; 582 | ConciseSet union2; 583 | 584 | size_t expunion1 = test1.logicalorCount(test2); 585 | union1 = test1.logicalor(test2); 586 | assert(union1.size() == expunion1); 587 | union2 = test1.logicalor(test2); 588 | assert(equals(trueunion, union1)); 589 | assert(equals(trueunion, union2)); 590 | ConciseSet intersect1; 591 | ConciseSet intersect2; 592 | size_t expinter1 = test1.logicalandCount(test2); 593 | intersect1 = test1.logicaland(test2); 594 | assert(expinter1 == intersect1.size()); 595 | intersect2 = test1.logicaland(test2); 596 | assert(equals(trueinter, intersect1)); 597 | assert(equals(trueinter, intersect2)); 598 | ConciseSet symsubtract1; 599 | ConciseSet symsubtract2; 600 | symsubtract1 = test1.logicalxor(test2); 601 | symsubtract2 = test1.logicalxor(test2); 602 | assert(equals(truesymsubtract, symsubtract1)); 603 | assert(equals(truesymsubtract, symsubtract2)); 604 | ConciseSet subtract1; 605 | ConciseSet subtract2; 606 | subtract1 = test1.logicalandnot(test2); 607 | subtract2 = test1.logicalandnot(test2); 608 | assert(equals(truesubtract, subtract1)); 609 | assert(equals(truesubtract, subtract2)); 610 | } 611 | 612 | int main() { 613 | checkflush(); 614 | // checkflush();// not actually safe (limitation in original code) 615 | iteratortest(); 616 | iteratortest(); 617 | heaportest(); 618 | heaportest(); 619 | basictest(); 620 | basictest(); 621 | longtest(); 622 | longtest(); 623 | toytest(); 624 | toytest(); 625 | variedtest(); 626 | variedtest(); 627 | realtest(); 628 | realtest(); 629 | 630 | std::cout << "code might be ok" << std::endl; 631 | } 632 | --------------------------------------------------------------------------------