├── .travis.yml ├── Makefile ├── bloom_filter.hpp ├── bloom_filter_example01.cpp ├── bloom_filter_example02.cpp ├── bloom_filter_example03.cpp ├── random-list.txt ├── readme.md ├── word-list-extra-large.txt ├── word-list-large.txt └── word-list.txt /.travis.yml: -------------------------------------------------------------------------------- 1 | dist: bionic 2 | 3 | language: cpp 4 | 5 | sudo: required 6 | 7 | compiler: 8 | - gcc 9 | 10 | script: 11 | - make clean all 12 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # 2 | # Open Bloom Filter MakeFile 3 | # By Arash Partow - 2000 4 | # 5 | # URL: http://www.partow.net/programming/hashfunctions/index.html 6 | # 7 | # Copyright Notice: 8 | # Free use of this library is permitted under the guidelines 9 | # and in accordance with the most current version of the 10 | # MIT License. 11 | # http://www.opensource.org/licenses/MIT 12 | # 13 | 14 | COMPILER = -c++ 15 | #COMPILER = -clang 16 | OPTIMIZATION_OPT = -O3 17 | OPTIONS = -pedantic-errors -ansi -Wall -Wextra -Werror -Wno-long-long $(OPTIMIZATION_OPT) 18 | LINKER_OPT = -L/usr/lib -lstdc++ -lm 19 | 20 | BUILD_LIST+=bloom_filter_example01 21 | BUILD_LIST+=bloom_filter_example02 22 | BUILD_LIST+=bloom_filter_example03 23 | 24 | all: $(BUILD_LIST) 25 | 26 | $(BUILD_LIST) : %: %.cpp bloom_filter.hpp 27 | $(COMPILER) $(OPTIONS) -o $@ $@.cpp $(LINKER_OPT) 28 | 29 | strip_bin : 30 | @for f in $(BUILD_LIST); do if [ -f $$f ]; then strip -s $$f; echo $$f; fi done; 31 | 32 | valgrind : 33 | @for f in $(BUILD_LIST); do \ 34 | if [ -f $$f ]; then \ 35 | cmd="valgrind --leak-check=full --show-reachable=yes --track-origins=yes --log-file=$$f.log -v ./$$f"; \ 36 | echo $$cmd; \ 37 | $$cmd; \ 38 | fi done; 39 | 40 | clean: 41 | rm -f core *.o *.bak *stackdump *# 42 | 43 | # 44 | # The End ! 45 | # 46 | -------------------------------------------------------------------------------- /bloom_filter.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | ********************************************************************* 3 | * * 4 | * Open Bloom Filter * 5 | * * 6 | * Author: Arash Partow - 2000 * 7 | * URL: http://www.partow.net * 8 | * URL: http://www.partow.net/programming/hashfunctions/index.html * 9 | * * 10 | * Copyright notice: * 11 | * Free use of the Open Bloom Filter Library is permitted under the * 12 | * guidelines and in accordance with the MIT License. * 13 | * http://www.opensource.org/licenses/MIT * 14 | * * 15 | ********************************************************************* 16 | */ 17 | 18 | 19 | #ifndef INCLUDE_BLOOM_FILTER_HPP 20 | #define INCLUDE_BLOOM_FILTER_HPP 21 | 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | 32 | static const std::size_t bits_per_char = 0x08; // 8 bits in 1 char(unsigned) 33 | 34 | static const unsigned char bit_mask[bits_per_char] = { 35 | 0x01, //00000001 36 | 0x02, //00000010 37 | 0x04, //00000100 38 | 0x08, //00001000 39 | 0x10, //00010000 40 | 0x20, //00100000 41 | 0x40, //01000000 42 | 0x80 //10000000 43 | }; 44 | 45 | class bloom_parameters 46 | { 47 | public: 48 | 49 | bloom_parameters() 50 | : minimum_size(1), 51 | maximum_size(std::numeric_limits::max()), 52 | minimum_number_of_hashes(1), 53 | maximum_number_of_hashes(std::numeric_limits::max()), 54 | projected_element_count(10000), 55 | false_positive_probability(1.0 / projected_element_count), 56 | random_seed(0xA5A5A5A55A5A5A5AULL) 57 | {} 58 | 59 | virtual ~bloom_parameters() 60 | {} 61 | 62 | inline bool operator!() 63 | { 64 | return (minimum_size > maximum_size) || 65 | (minimum_number_of_hashes > maximum_number_of_hashes) || 66 | (minimum_number_of_hashes < 1) || 67 | (0 == maximum_number_of_hashes) || 68 | (0 == projected_element_count) || 69 | (false_positive_probability < 0.0) || 70 | (std::numeric_limits::infinity() == std::abs(false_positive_probability)) || 71 | (0 == random_seed) || 72 | (0xFFFFFFFFFFFFFFFFULL == random_seed); 73 | } 74 | 75 | // Allowable min/max size of the bloom filter in bits 76 | unsigned long long int minimum_size; 77 | unsigned long long int maximum_size; 78 | 79 | // Allowable min/max number of hash functions 80 | unsigned int minimum_number_of_hashes; 81 | unsigned int maximum_number_of_hashes; 82 | 83 | // The approximate number of elements to be inserted 84 | // into the bloom filter, should be within one order 85 | // of magnitude. The default is 10000. 86 | unsigned long long int projected_element_count; 87 | 88 | // The approximate false positive probability expected 89 | // from the bloom filter. The default is assumed to be 90 | // the reciprocal of the projected_element_count. 91 | double false_positive_probability; 92 | 93 | unsigned long long int random_seed; 94 | 95 | struct optimal_parameters_t 96 | { 97 | optimal_parameters_t() 98 | : number_of_hashes(0), 99 | table_size(0) 100 | {} 101 | 102 | unsigned int number_of_hashes; 103 | unsigned long long int table_size; 104 | }; 105 | 106 | optimal_parameters_t optimal_parameters; 107 | 108 | virtual bool compute_optimal_parameters() 109 | { 110 | /* 111 | Note: 112 | The following will attempt to find the number of hash functions 113 | and minimum amount of storage bits required to construct a bloom 114 | filter consistent with the user defined false positive probability 115 | and estimated element insertion count. 116 | */ 117 | 118 | if (!(*this)) 119 | return false; 120 | 121 | double min_m = std::numeric_limits::infinity(); 122 | double min_k = 0.0; 123 | double k = 1.0; 124 | 125 | while (k < 1000.0) 126 | { 127 | const double numerator = (- k * projected_element_count); 128 | const double denominator = std::log(1.0 - std::pow(false_positive_probability, 1.0 / k)); 129 | 130 | const double curr_m = numerator / denominator; 131 | 132 | if (curr_m < min_m) 133 | { 134 | min_m = curr_m; 135 | min_k = k; 136 | } 137 | 138 | k += 1.0; 139 | } 140 | 141 | optimal_parameters_t& optp = optimal_parameters; 142 | 143 | optp.number_of_hashes = static_cast(min_k); 144 | 145 | optp.table_size = static_cast(min_m); 146 | 147 | optp.table_size += (((optp.table_size % bits_per_char) != 0) ? (bits_per_char - (optp.table_size % bits_per_char)) : 0); 148 | 149 | if (optp.number_of_hashes < minimum_number_of_hashes) 150 | optp.number_of_hashes = minimum_number_of_hashes; 151 | else if (optp.number_of_hashes > maximum_number_of_hashes) 152 | optp.number_of_hashes = maximum_number_of_hashes; 153 | 154 | if (optp.table_size < minimum_size) 155 | optp.table_size = minimum_size; 156 | else if (optp.table_size > maximum_size) 157 | optp.table_size = maximum_size; 158 | 159 | return true; 160 | } 161 | 162 | }; 163 | 164 | class bloom_filter 165 | { 166 | protected: 167 | 168 | typedef unsigned int bloom_type; 169 | typedef unsigned char cell_type; 170 | typedef std::vector table_type; 171 | 172 | public: 173 | 174 | bloom_filter() 175 | : salt_count_(0), 176 | table_size_(0), 177 | projected_element_count_(0), 178 | inserted_element_count_ (0), 179 | random_seed_(0), 180 | desired_false_positive_probability_(0.0) 181 | {} 182 | 183 | bloom_filter(const bloom_parameters& p) 184 | : projected_element_count_(p.projected_element_count), 185 | inserted_element_count_(0), 186 | random_seed_((p.random_seed * 0xA5A5A5A5) + 1), 187 | desired_false_positive_probability_(p.false_positive_probability) 188 | { 189 | salt_count_ = p.optimal_parameters.number_of_hashes; 190 | table_size_ = p.optimal_parameters.table_size; 191 | 192 | generate_unique_salt(); 193 | 194 | bit_table_.resize(table_size_ / bits_per_char, static_cast(0x00)); 195 | } 196 | 197 | bloom_filter(const bloom_filter& filter) 198 | { 199 | this->operator=(filter); 200 | } 201 | 202 | inline bool operator == (const bloom_filter& f) const 203 | { 204 | if (this != &f) 205 | { 206 | return 207 | (salt_count_ == f.salt_count_ ) && 208 | (table_size_ == f.table_size_ ) && 209 | (bit_table_.size() == f.bit_table_.size() ) && 210 | (projected_element_count_ == f.projected_element_count_ ) && 211 | (inserted_element_count_ == f.inserted_element_count_ ) && 212 | (random_seed_ == f.random_seed_ ) && 213 | (desired_false_positive_probability_ == f.desired_false_positive_probability_) && 214 | (salt_ == f.salt_ ) && 215 | (bit_table_ == f.bit_table_ ) ; 216 | } 217 | else 218 | return true; 219 | } 220 | 221 | inline bool operator != (const bloom_filter& f) const 222 | { 223 | return !operator==(f); 224 | } 225 | 226 | inline bloom_filter& operator = (const bloom_filter& f) 227 | { 228 | if (this != &f) 229 | { 230 | salt_count_ = f.salt_count_; 231 | table_size_ = f.table_size_; 232 | bit_table_ = f.bit_table_; 233 | salt_ = f.salt_; 234 | 235 | projected_element_count_ = f.projected_element_count_; 236 | inserted_element_count_ = f.inserted_element_count_; 237 | 238 | random_seed_ = f.random_seed_; 239 | 240 | desired_false_positive_probability_ = f.desired_false_positive_probability_; 241 | } 242 | 243 | return *this; 244 | } 245 | 246 | virtual ~bloom_filter() 247 | {} 248 | 249 | inline bool operator!() const 250 | { 251 | return (0 == table_size_); 252 | } 253 | 254 | inline void clear() 255 | { 256 | std::fill(bit_table_.begin(), bit_table_.end(), static_cast(0x00)); 257 | inserted_element_count_ = 0; 258 | } 259 | 260 | inline void insert(const unsigned char* key_begin, const std::size_t& length) 261 | { 262 | std::size_t bit_index = 0; 263 | std::size_t bit = 0; 264 | 265 | for (std::size_t i = 0; i < salt_.size(); ++i) 266 | { 267 | compute_indices(hash_ap(key_begin, length, salt_[i]), bit_index, bit); 268 | 269 | bit_table_[bit_index / bits_per_char] |= bit_mask[bit]; 270 | } 271 | 272 | ++inserted_element_count_; 273 | } 274 | 275 | template 276 | inline void insert(const T& t) 277 | { 278 | // Note: T must be a C++ POD type. 279 | insert(reinterpret_cast(&t),sizeof(T)); 280 | } 281 | 282 | inline void insert(const std::string& key) 283 | { 284 | insert(reinterpret_cast(key.data()),key.size()); 285 | } 286 | 287 | inline void insert(const char* data, const std::size_t& length) 288 | { 289 | insert(reinterpret_cast(data),length); 290 | } 291 | 292 | template 293 | inline void insert(const InputIterator begin, const InputIterator end) 294 | { 295 | InputIterator itr = begin; 296 | 297 | while (end != itr) 298 | { 299 | insert(*(itr++)); 300 | } 301 | } 302 | 303 | inline virtual bool contains(const unsigned char* key_begin, const std::size_t length) const 304 | { 305 | std::size_t bit_index = 0; 306 | std::size_t bit = 0; 307 | 308 | for (std::size_t i = 0; i < salt_.size(); ++i) 309 | { 310 | compute_indices(hash_ap(key_begin, length, salt_[i]), bit_index, bit); 311 | 312 | if ((bit_table_[bit_index / bits_per_char] & bit_mask[bit]) != bit_mask[bit]) 313 | { 314 | return false; 315 | } 316 | } 317 | 318 | return true; 319 | } 320 | 321 | template 322 | inline bool contains(const T& t) const 323 | { 324 | return contains(reinterpret_cast(&t),static_cast(sizeof(T))); 325 | } 326 | 327 | inline bool contains(const std::string& key) const 328 | { 329 | return contains(reinterpret_cast(key.c_str()),key.size()); 330 | } 331 | 332 | inline bool contains(const char* data, const std::size_t& length) const 333 | { 334 | return contains(reinterpret_cast(data),length); 335 | } 336 | 337 | template 338 | inline InputIterator contains_all(const InputIterator begin, const InputIterator end) const 339 | { 340 | InputIterator itr = begin; 341 | 342 | while (end != itr) 343 | { 344 | if (!contains(*itr)) 345 | { 346 | return itr; 347 | } 348 | 349 | ++itr; 350 | } 351 | 352 | return end; 353 | } 354 | 355 | template 356 | inline InputIterator contains_none(const InputIterator begin, const InputIterator end) const 357 | { 358 | InputIterator itr = begin; 359 | 360 | while (end != itr) 361 | { 362 | if (contains(*itr)) 363 | { 364 | return itr; 365 | } 366 | 367 | ++itr; 368 | } 369 | 370 | return end; 371 | } 372 | 373 | inline virtual unsigned long long int size() const 374 | { 375 | return table_size_; 376 | } 377 | 378 | inline unsigned long long int element_count() const 379 | { 380 | return inserted_element_count_; 381 | } 382 | 383 | inline double effective_fpp() const 384 | { 385 | /* 386 | Note: 387 | The effective false positive probability is calculated using the 388 | designated table size and hash function count in conjunction with 389 | the current number of inserted elements - not the user defined 390 | predicated/expected number of inserted elements. 391 | */ 392 | return std::pow(1.0 - std::exp(-1.0 * salt_.size() * inserted_element_count_ / size()), 1.0 * salt_.size()); 393 | } 394 | 395 | inline bloom_filter& operator &= (const bloom_filter& f) 396 | { 397 | /* intersection */ 398 | if ( 399 | (salt_count_ == f.salt_count_ ) && 400 | (table_size_ == f.table_size_ ) && 401 | (random_seed_ == f.random_seed_) 402 | ) 403 | { 404 | for (std::size_t i = 0; i < bit_table_.size(); ++i) 405 | { 406 | bit_table_[i] &= f.bit_table_[i]; 407 | } 408 | } 409 | 410 | return *this; 411 | } 412 | 413 | inline bloom_filter& operator |= (const bloom_filter& f) 414 | { 415 | /* union */ 416 | if ( 417 | (salt_count_ == f.salt_count_ ) && 418 | (table_size_ == f.table_size_ ) && 419 | (random_seed_ == f.random_seed_) 420 | ) 421 | { 422 | for (std::size_t i = 0; i < bit_table_.size(); ++i) 423 | { 424 | bit_table_[i] |= f.bit_table_[i]; 425 | } 426 | } 427 | 428 | return *this; 429 | } 430 | 431 | inline bloom_filter& operator ^= (const bloom_filter& f) 432 | { 433 | /* difference */ 434 | if ( 435 | (salt_count_ == f.salt_count_ ) && 436 | (table_size_ == f.table_size_ ) && 437 | (random_seed_ == f.random_seed_) 438 | ) 439 | { 440 | for (std::size_t i = 0; i < bit_table_.size(); ++i) 441 | { 442 | bit_table_[i] ^= f.bit_table_[i]; 443 | } 444 | } 445 | 446 | return *this; 447 | } 448 | 449 | inline const cell_type* table() const 450 | { 451 | return bit_table_.data(); 452 | } 453 | 454 | inline std::size_t hash_count() 455 | { 456 | return salt_.size(); 457 | } 458 | 459 | protected: 460 | 461 | inline virtual void compute_indices(const bloom_type& hash, std::size_t& bit_index, std::size_t& bit) const 462 | { 463 | bit_index = hash % table_size_; 464 | bit = bit_index % bits_per_char; 465 | } 466 | 467 | void generate_unique_salt() 468 | { 469 | /* 470 | Note: 471 | A distinct hash function need not be implementation-wise 472 | distinct. In the current implementation "seeding" a common 473 | hash function with different values seems to be adequate. 474 | */ 475 | const unsigned int predef_salt_count = 128; 476 | 477 | static const bloom_type predef_salt[predef_salt_count] = 478 | { 479 | 0xAAAAAAAA, 0x55555555, 0x33333333, 0xCCCCCCCC, 480 | 0x66666666, 0x99999999, 0xB5B5B5B5, 0x4B4B4B4B, 481 | 0xAA55AA55, 0x55335533, 0x33CC33CC, 0xCC66CC66, 482 | 0x66996699, 0x99B599B5, 0xB54BB54B, 0x4BAA4BAA, 483 | 0xAA33AA33, 0x55CC55CC, 0x33663366, 0xCC99CC99, 484 | 0x66B566B5, 0x994B994B, 0xB5AAB5AA, 0xAAAAAA33, 485 | 0x555555CC, 0x33333366, 0xCCCCCC99, 0x666666B5, 486 | 0x9999994B, 0xB5B5B5AA, 0xFFFFFFFF, 0xFFFF0000, 487 | 0xB823D5EB, 0xC1191CDF, 0xF623AEB3, 0xDB58499F, 488 | 0xC8D42E70, 0xB173F616, 0xA91A5967, 0xDA427D63, 489 | 0xB1E8A2EA, 0xF6C0D155, 0x4909FEA3, 0xA68CC6A7, 490 | 0xC395E782, 0xA26057EB, 0x0CD5DA28, 0x467C5492, 491 | 0xF15E6982, 0x61C6FAD3, 0x9615E352, 0x6E9E355A, 492 | 0x689B563E, 0x0C9831A8, 0x6753C18B, 0xA622689B, 493 | 0x8CA63C47, 0x42CC2884, 0x8E89919B, 0x6EDBD7D3, 494 | 0x15B6796C, 0x1D6FDFE4, 0x63FF9092, 0xE7401432, 495 | 0xEFFE9412, 0xAEAEDF79, 0x9F245A31, 0x83C136FC, 496 | 0xC3DA4A8C, 0xA5112C8C, 0x5271F491, 0x9A948DAB, 497 | 0xCEE59A8D, 0xB5F525AB, 0x59D13217, 0x24E7C331, 498 | 0x697C2103, 0x84B0A460, 0x86156DA9, 0xAEF2AC68, 499 | 0x23243DA5, 0x3F649643, 0x5FA495A8, 0x67710DF8, 500 | 0x9A6C499E, 0xDCFB0227, 0x46A43433, 0x1832B07A, 501 | 0xC46AFF3C, 0xB9C8FFF0, 0xC9500467, 0x34431BDF, 502 | 0xB652432B, 0xE367F12B, 0x427F4C1B, 0x224C006E, 503 | 0x2E7E5A89, 0x96F99AA5, 0x0BEB452A, 0x2FD87C39, 504 | 0x74B2E1FB, 0x222EFD24, 0xF357F60C, 0x440FCB1E, 505 | 0x8BBE030F, 0x6704DC29, 0x1144D12F, 0x948B1355, 506 | 0x6D8FD7E9, 0x1C11A014, 0xADD1592F, 0xFB3C712E, 507 | 0xFC77642F, 0xF9C4CE8C, 0x31312FB9, 0x08B0DD79, 508 | 0x318FA6E7, 0xC040D23D, 0xC0589AA7, 0x0CA5C075, 509 | 0xF874B172, 0x0CF914D5, 0x784D3280, 0x4E8CFEBC, 510 | 0xC569F575, 0xCDB2A091, 0x2CC016B4, 0x5C5F4421 511 | }; 512 | 513 | if (salt_count_ <= predef_salt_count) 514 | { 515 | std::copy(predef_salt, 516 | predef_salt + salt_count_, 517 | std::back_inserter(salt_)); 518 | 519 | for (std::size_t i = 0; i < salt_.size(); ++i) 520 | { 521 | /* 522 | Note: 523 | This is done to integrate the user defined random seed, 524 | so as to allow for the generation of unique bloom filter 525 | instances. 526 | */ 527 | salt_[i] = salt_[i] * salt_[(i + 3) % salt_.size()] + static_cast(random_seed_); 528 | } 529 | } 530 | else 531 | { 532 | std::copy(predef_salt, predef_salt + predef_salt_count, std::back_inserter(salt_)); 533 | 534 | srand(static_cast(random_seed_)); 535 | 536 | while (salt_.size() < salt_count_) 537 | { 538 | bloom_type current_salt = static_cast(rand()) * static_cast(rand()); 539 | 540 | if (0 == current_salt) 541 | continue; 542 | 543 | if (salt_.end() == std::find(salt_.begin(), salt_.end(), current_salt)) 544 | { 545 | salt_.push_back(current_salt); 546 | } 547 | } 548 | } 549 | } 550 | 551 | inline bloom_type hash_ap(const unsigned char* begin, std::size_t remaining_length, bloom_type hash) const 552 | { 553 | const unsigned char* itr = begin; 554 | unsigned int loop = 0; 555 | 556 | while (remaining_length >= 8) 557 | { 558 | const unsigned int& i1 = *(reinterpret_cast(itr)); itr += sizeof(unsigned int); 559 | const unsigned int& i2 = *(reinterpret_cast(itr)); itr += sizeof(unsigned int); 560 | 561 | hash ^= (hash << 7) ^ i1 * (hash >> 3) ^ 562 | (~((hash << 11) + (i2 ^ (hash >> 5)))); 563 | 564 | remaining_length -= 8; 565 | } 566 | 567 | if (remaining_length) 568 | { 569 | if (remaining_length >= 4) 570 | { 571 | const unsigned int& i = *(reinterpret_cast(itr)); 572 | 573 | if (loop & 0x01) 574 | hash ^= (hash << 7) ^ i * (hash >> 3); 575 | else 576 | hash ^= (~((hash << 11) + (i ^ (hash >> 5)))); 577 | 578 | ++loop; 579 | 580 | remaining_length -= 4; 581 | 582 | itr += sizeof(unsigned int); 583 | } 584 | 585 | if (remaining_length >= 2) 586 | { 587 | const unsigned short& i = *(reinterpret_cast(itr)); 588 | 589 | if (loop & 0x01) 590 | hash ^= (hash << 7) ^ i * (hash >> 3); 591 | else 592 | hash ^= (~((hash << 11) + (i ^ (hash >> 5)))); 593 | 594 | ++loop; 595 | 596 | remaining_length -= 2; 597 | 598 | itr += sizeof(unsigned short); 599 | } 600 | 601 | if (remaining_length) 602 | { 603 | hash += ((*itr) ^ (hash * 0xA5A5A5A5)) + loop; 604 | } 605 | } 606 | 607 | return hash; 608 | } 609 | 610 | std::vector salt_; 611 | std::vector bit_table_; 612 | unsigned int salt_count_; 613 | unsigned long long int table_size_; 614 | unsigned long long int projected_element_count_; 615 | unsigned long long int inserted_element_count_; 616 | unsigned long long int random_seed_; 617 | double desired_false_positive_probability_; 618 | }; 619 | 620 | inline bloom_filter operator & (const bloom_filter& a, const bloom_filter& b) 621 | { 622 | bloom_filter result = a; 623 | result &= b; 624 | return result; 625 | } 626 | 627 | inline bloom_filter operator | (const bloom_filter& a, const bloom_filter& b) 628 | { 629 | bloom_filter result = a; 630 | result |= b; 631 | return result; 632 | } 633 | 634 | inline bloom_filter operator ^ (const bloom_filter& a, const bloom_filter& b) 635 | { 636 | bloom_filter result = a; 637 | result ^= b; 638 | return result; 639 | } 640 | 641 | class compressible_bloom_filter : public bloom_filter 642 | { 643 | public: 644 | 645 | compressible_bloom_filter(const bloom_parameters& p) 646 | : bloom_filter(p) 647 | { 648 | size_list.push_back(table_size_); 649 | } 650 | 651 | inline unsigned long long int size() const 652 | { 653 | return size_list.back(); 654 | } 655 | 656 | inline bool compress(const double& percentage) 657 | { 658 | if ( 659 | (percentage < 0.0) || 660 | (percentage >= 100.0) 661 | ) 662 | { 663 | return false; 664 | } 665 | 666 | unsigned long long int original_table_size = size_list.back(); 667 | unsigned long long int new_table_size = static_cast((size_list.back() * (1.0 - (percentage / 100.0)))); 668 | 669 | new_table_size -= new_table_size % bits_per_char; 670 | 671 | if ( 672 | (bits_per_char > new_table_size) || 673 | (new_table_size >= original_table_size) 674 | ) 675 | { 676 | return false; 677 | } 678 | 679 | desired_false_positive_probability_ = effective_fpp(); 680 | 681 | const unsigned long long int new_tbl_raw_size = new_table_size / bits_per_char; 682 | 683 | table_type tmp(new_tbl_raw_size); 684 | 685 | std::copy(bit_table_.begin(), bit_table_.begin() + new_tbl_raw_size, tmp.begin()); 686 | 687 | typedef table_type::iterator itr_t; 688 | 689 | itr_t itr = bit_table_.begin() + (new_table_size / bits_per_char); 690 | itr_t end = bit_table_.begin() + (original_table_size / bits_per_char); 691 | itr_t itr_tmp = tmp.begin(); 692 | 693 | while (end != itr) 694 | { 695 | *(itr_tmp++) |= (*itr++); 696 | } 697 | 698 | std::swap(bit_table_, tmp); 699 | 700 | size_list.push_back(new_table_size); 701 | 702 | return true; 703 | } 704 | 705 | private: 706 | 707 | inline void compute_indices(const bloom_type& hash, std::size_t& bit_index, std::size_t& bit) const 708 | { 709 | bit_index = hash; 710 | 711 | for (std::size_t i = 0; i < size_list.size(); ++i) 712 | { 713 | bit_index %= size_list[i]; 714 | } 715 | 716 | bit = bit_index % bits_per_char; 717 | } 718 | 719 | std::vector size_list; 720 | }; 721 | 722 | #endif 723 | 724 | 725 | /* 726 | Note 1: 727 | If it can be guaranteed that bits_per_char will be of the form 2^n then 728 | the following optimization can be used: 729 | 730 | bit_table_[bit_index >> n] |= bit_mask[bit_index & (bits_per_char - 1)]; 731 | 732 | Note 2: 733 | For performance reasons where possible when allocating memory it should 734 | be aligned (aligned_alloc) according to the architecture being used. 735 | */ 736 | -------------------------------------------------------------------------------- /bloom_filter_example01.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ********************************************************************* 3 | * * 4 | * Open Bloom Filter * 5 | * * 6 | * Description: Basic Bloom Filter Usage * 7 | * Author: Arash Partow - 2000 * 8 | * URL: http://www.partow.net * 9 | * URL: http://www.partow.net/programming/hashfunctions/index.html * 10 | * * 11 | * Copyright notice: * 12 | * Free use of the Open Bloom Filter Library is permitted under the * 13 | * guidelines and in accordance with the MIT License. * 14 | * http://www.opensource.org/licenses/MIT * 15 | * * 16 | ********************************************************************* 17 | */ 18 | 19 | 20 | /* 21 | Description: This example demonstrates basic usage of the Bloom filter. 22 | Initially some values are inserted then they are subsequently 23 | queried, noting any false positives or errors. 24 | */ 25 | 26 | 27 | #include 28 | #include 29 | 30 | #include "bloom_filter.hpp" 31 | 32 | int main() 33 | { 34 | bloom_parameters parameters; 35 | 36 | // How many elements roughly do we expect to insert? 37 | parameters.projected_element_count = 1000; 38 | 39 | // Maximum tolerable false positive probability? (0,1) 40 | parameters.false_positive_probability = 0.0001; // 1 in 10000 41 | 42 | // Simple randomizer (optional) 43 | parameters.random_seed = 0xA5A5A5A5; 44 | 45 | if (!parameters) 46 | { 47 | std::cout << "Error - Invalid set of bloom filter parameters!" << std::endl; 48 | return 1; 49 | } 50 | 51 | parameters.compute_optimal_parameters(); 52 | 53 | //Instantiate Bloom Filter 54 | bloom_filter filter(parameters); 55 | 56 | std::string str_list[] = { "AbC", "iJk", "XYZ" }; 57 | 58 | // Insert into Bloom Filter 59 | { 60 | // Insert some strings 61 | for (std::size_t i = 0; i < (sizeof(str_list) / sizeof(std::string)); ++i) 62 | { 63 | filter.insert(str_list[i]); 64 | } 65 | 66 | // Insert some numbers 67 | for (std::size_t i = 0; i < 100; ++i) 68 | { 69 | filter.insert(i); 70 | } 71 | } 72 | 73 | // Query Bloom Filter 74 | { 75 | // Query the existence of strings 76 | for (std::size_t i = 0; i < (sizeof(str_list) / sizeof(std::string)); ++i) 77 | { 78 | if (filter.contains(str_list[i])) 79 | { 80 | std::cout << "BF contains: " << str_list[i] << std::endl; 81 | } 82 | } 83 | 84 | // Query the existence of numbers 85 | for (std::size_t i = 0; i < 100; ++i) 86 | { 87 | if (filter.contains(i)) 88 | { 89 | std::cout << "BF contains: " << i << std::endl; 90 | } 91 | } 92 | 93 | std::string invalid_str_list[] = { "AbCX", "iJkX", "XYZX" }; 94 | 95 | // Query the existence of invalid strings 96 | for (std::size_t i = 0; i < (sizeof(invalid_str_list) / sizeof(std::string)); ++i) 97 | { 98 | if (filter.contains(invalid_str_list[i])) 99 | { 100 | std::cout << "BF falsely contains: " << invalid_str_list[i] << std::endl; 101 | } 102 | } 103 | 104 | // Query the existence of invalid numbers 105 | for (int i = -1; i > -100; --i) 106 | { 107 | if (filter.contains(i)) 108 | { 109 | std::cout << "BF falsely contains: " << i << std::endl; 110 | } 111 | } 112 | } 113 | 114 | return 0; 115 | } 116 | -------------------------------------------------------------------------------- /bloom_filter_example02.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ********************************************************************* 3 | * * 4 | * Open Bloom Filter * 5 | * * 6 | * Description: Demonstration of a Bloom Filter * 7 | * Author: Arash Partow - 2000 * 8 | * URL: http://www.partow.net * 9 | * URL: http://www.partow.net/programming/hashfunctions/index.html * 10 | * * 11 | * Copyright notice: * 12 | * Free use of the Open Bloom Filter Library is permitted under the * 13 | * guidelines and in accordance with the MIT License. * 14 | * http://www.opensource.org/licenses/MIT * 15 | * * 16 | ********************************************************************* 17 | */ 18 | 19 | 20 | /* 21 | Description: This example will demonstrate how to instantiate a Bloom filter, 22 | insert strings and then query the inserted strings and a set of 23 | outlier strings for membership status within the Bloom filter. 24 | Furthermore this process will be carried out upon 1000 unique 25 | instances of Bloom filter. The objective is to empirically 26 | determine which "random" seed that when used to construct a 27 | Bloom filter will provide the smallest observed false positive 28 | probability for the given sets of data. The optimal seed will 29 | be the one associated with the round that has the smallest 30 | difference percentage of false positive probability against 31 | the user specified false positive probability. 32 | */ 33 | 34 | 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | #include 45 | 46 | #include "bloom_filter.hpp" 47 | 48 | bool load_word_list(int argc, char* argv[], std::vector& word_list); 49 | 50 | template class Container> 53 | bool read_file(const std::string& file_name, Container& c); 54 | 55 | std::string uppercase(std::string str); 56 | std::string reverse(std::string str); 57 | 58 | void generate_outliers(const std::vector& word_list, std::deque& outliers); 59 | void purify_outliers(const std::vector& word_list,std::deque& outliers); 60 | 61 | int main(int argc, char* argv[]) 62 | { 63 | std::vector word_list; 64 | std::deque outliers; 65 | 66 | if (!load_word_list(argc,argv,word_list)) 67 | { 68 | return 1; 69 | } 70 | 71 | generate_outliers(word_list,outliers); 72 | 73 | unsigned int random_seed = 0; 74 | std::size_t word_list_storage_size = 0; 75 | 76 | for (unsigned int i = 0; i < word_list.size(); ++i) 77 | { 78 | word_list_storage_size += word_list[i].size(); 79 | } 80 | 81 | std::size_t total_number_of_queries = 0; 82 | 83 | const double desired_probability_of_false_positive = 1.0 / word_list.size(); 84 | 85 | printf("Round\t Queries\t FPQ\t IPFP\t PFP\t DPFP\t TvD\n"); 86 | 87 | unsigned int max_false_positive_count = 0; 88 | unsigned int min_false_positive_count = std::numeric_limits::max(); 89 | unsigned int total_false_positive = 0; 90 | unsigned int total_zero_fp = 0; 91 | unsigned long long int bloom_filter_size = 0; 92 | 93 | 94 | static const unsigned int rounds = 1000; 95 | 96 | while (random_seed < rounds) 97 | { 98 | bloom_parameters parameters; 99 | 100 | parameters.projected_element_count = word_list.size(); 101 | parameters.false_positive_probability = desired_probability_of_false_positive; 102 | parameters.random_seed = ++random_seed; 103 | 104 | if (!parameters) 105 | { 106 | std::cout << "Error - Invalid set of bloom filter parameters!" << std::endl; 107 | return 1; 108 | } 109 | 110 | parameters.compute_optimal_parameters(); 111 | 112 | bloom_filter filter(parameters); 113 | 114 | filter.insert(word_list.begin(),word_list.end()); 115 | 116 | std::vector::iterator it = filter.contains_all(word_list.begin(),word_list.end()); 117 | 118 | if (word_list.end() != it) 119 | { 120 | std::cout << "ERROR: key not found in bloom filter! =>" << (*it) << std::endl; 121 | return 1; 122 | } 123 | 124 | unsigned int current_total_false_positive = 0; 125 | 126 | for (std::deque::iterator itr = outliers.begin(); itr != outliers.end(); ++itr) 127 | { 128 | if (filter.contains(*itr)) 129 | { 130 | ++current_total_false_positive; 131 | } 132 | } 133 | 134 | total_number_of_queries += (outliers.size() + word_list.size()); 135 | 136 | // Overall false positive probability 137 | double pfp = current_total_false_positive / (1.0 * outliers.size()); 138 | 139 | printf("%6llu\t%10llu\t%6d\t%8.7f\t%8.7f\t%9.3f%%\t%8.6f\n", 140 | static_cast(random_seed), 141 | static_cast(total_number_of_queries), 142 | current_total_false_positive, 143 | desired_probability_of_false_positive, 144 | pfp, 145 | (100.0 * pfp) / desired_probability_of_false_positive, 146 | (100.0 * filter.size()) / (bits_per_char * word_list_storage_size)); 147 | 148 | if (current_total_false_positive < min_false_positive_count) 149 | min_false_positive_count = current_total_false_positive; 150 | else if (current_total_false_positive > max_false_positive_count) 151 | max_false_positive_count = current_total_false_positive; 152 | 153 | total_false_positive += current_total_false_positive; 154 | 155 | if (0 == current_total_false_positive) 156 | ++total_zero_fp; 157 | 158 | bloom_filter_size = filter.size(); 159 | } 160 | 161 | double average_fpc = (1.0 * total_false_positive) / rounds; 162 | double average_fpp = average_fpc / (outliers.size() + word_list.size()); 163 | 164 | printf("Bloom Filter Statistics\n" 165 | "MinFPC: %d\tMaxFPC: %d\tAverageFPC: %8.5f\tAverageFPP: %9.8f Zero-FPC:%d\n" 166 | "Filter Size: %lluKB\tData Size: %dKB\n", 167 | min_false_positive_count, 168 | max_false_positive_count, 169 | average_fpc, 170 | average_fpp, 171 | total_zero_fp, 172 | bloom_filter_size / (8 * 1024), 173 | static_cast(word_list_storage_size / 1024)); 174 | 175 | /* 176 | Terminology 177 | MinFPC : Minimum (smallest) False Positive Count 178 | MaxFPC : Maximum (largest) False Positive Count 179 | AverageFPC : Average False Positive Count 180 | AverageFPP : Average False Positive Probability 181 | 182 | FPQ : False Positive Queries 183 | IPFP : Indicative (desired) False Positive Probability 184 | PFP : Probability of a False Positive (based on the FPQ) 185 | DPFP : Difference as a percentage between IPFP and PFP 186 | TvD : percentage of the filter size versus the raw data size 187 | */ 188 | 189 | return 0; 190 | } 191 | 192 | bool load_word_list(int argc, char* argv[], std::vector& word_list) 193 | { 194 | // Note: The word-lists can be obtained from: 195 | // https://github.com/ArashPartow/bloom 196 | static const std::string wl_list[] = 197 | { "word-list.txt", 198 | "word-list-large.txt", 199 | "word-list-extra-large.txt", 200 | "random-list.txt" 201 | }; 202 | 203 | std::size_t index = 0; 204 | 205 | if (2 == argc) 206 | { 207 | index = ::atoi(argv[1]); 208 | 209 | const std::size_t wl_list_size = sizeof(wl_list) / sizeof(std::string); 210 | 211 | if (index >= wl_list_size) 212 | { 213 | std::cout << "Invalid world list index: " << index << std::endl; 214 | return false; 215 | } 216 | } 217 | 218 | std::cout << "Loading list " << wl_list[index] << "....."; 219 | 220 | if (!read_file(wl_list[index],word_list)) 221 | { 222 | return false; 223 | } 224 | 225 | if (word_list.empty()) 226 | { 227 | std::cout << "No word list - Either none requested, or desired word list could not be loaded." << std::endl; 228 | return false; 229 | } 230 | else 231 | std::cout << " Complete." << std::endl; 232 | 233 | return true; 234 | } 235 | 236 | template class Container> 239 | bool read_file(const std::string& file_name, Container& c) 240 | { 241 | std::ifstream stream(file_name.c_str()); 242 | 243 | if (!stream) 244 | { 245 | std::cout << "Error: Failed to open file '" << file_name << "'" << std::endl; 246 | return false; 247 | } 248 | 249 | std::string buffer; 250 | 251 | while (std::getline(stream,buffer)) 252 | { 253 | c.push_back(buffer); 254 | c.push_back(uppercase(buffer)); 255 | } 256 | 257 | return true; 258 | } 259 | 260 | std::string uppercase(std::string str) 261 | { 262 | for (std::size_t i = 0; i < str.size(); ++i) 263 | { 264 | str[i] = static_cast(toupper(str[i])); 265 | } 266 | 267 | return str; 268 | } 269 | 270 | std::string reverse(std::string str) 271 | { 272 | // Not the most efficient way of doing this. 273 | std::reverse(str.begin(),str.end()); 274 | return str; 275 | } 276 | 277 | void generate_outliers(const std::vector& word_list, std::deque& outliers) 278 | { 279 | std::cout << "Generating outliers..... "; 280 | 281 | for (std::vector::const_iterator it = word_list.begin(); it != word_list.end(); ++it) 282 | { 283 | if ((*it) != reverse((*it))) 284 | { 285 | outliers.push_back((*it) + reverse((*it))); 286 | outliers.push_back((*it) + (*it)); 287 | outliers.push_back(reverse((*it)) + (*it) + reverse((*it))); 288 | } 289 | 290 | std::string ns = *it; 291 | 292 | for (unsigned int i = 0; i < ns.size(); ++i) 293 | { 294 | if (1 == (i & 0x00)) ns[i] = ~ns[i]; 295 | } 296 | 297 | outliers.push_back(ns); 298 | } 299 | 300 | static const std::string rand_str[] = 301 | { 302 | "oD5l", "pccW", "5yHt", "ndaN", "OaJh", "tWPc", "Cr9C", "a9zE", 303 | "H1wL", "yo1V", "16D7", "f2WR", "0MVQ", "PkKn", "PlVa", "MvzL", 304 | "9Csl", "JQTv", "IveD", "FDVS", "Q7HE", "QgcF", "Q9Vo", "V8zJ", 305 | "EJWT", "GuLC", "rM3d", "PJF4", "HXPW", "qKx3", "ztRP", "t4KP", 306 | "m1zV", "fn12", "B1QP", "Jr4I", "Mf8M", "4jBd", "anGR", "Pipt", 307 | "QHon", "GNlc", "UeXM", "mVM5", "ABI8", "RhB3", "5h2s", "hOYo", 308 | "gaId", "DX40", "THMu", "EwlP", "n9Mz", "oC1S", "BfMl", "uCZ1", 309 | "G2bA", "MOH9", "zZ0O", "PKDO", "3nRU", "Z6ie", "4cso", "LnQO", 310 | "MJTtT","td3rC","A5JNR","1yL5B","rQnJk","jNKYF","CD0XD","pFLSG", 311 | "fxO1a","CAjBE","ORk4e","0LERI","R7d0x","Qqd7v","6Kih5","9tTCB", 312 | "yCg9U","D2Tv7","XpNHn","6zeFQ","BT2cs","WGhKW","zTv6B","TTPFk", 313 | "XjNVX","pg9yW","4pKiZ","mQUhL","xrXzR","kVRm5","NSyC4","olXm9", 314 | "UWkYy","8Ys6r","yd4Fl","5L4mB","nP3nH","f0DFb","glnQa","DlXQa", 315 | "cQdH6","eBmIN","fDj6F","ezLow","C15vu","I2Z2j","BQgzg","eVBid", 316 | "hn5TO","WZyQN","xXgsE","sL6nK","8DKD8","jcrbp","AcRak","h8N5o", 317 | "LViwC","ThEKf","O7fd5","oN0Id","OM1m0","4OLiR","VIa8N","bJZFG", 318 | "9j3rL","SzW0N","7m7pY","mY9bg","k1p3e","3OFm1","r45se","VYwz3", 319 | "pDjXt","ZcqcJ","npPHx","hA3bw","w7lSO","jEmZL","1x3AZ","FN47G", 320 | "kThNf","aC4fq","rzDwi","CYRNG","gCeuG","wCVqO","d1R60","bEauW", 321 | "KeUwW","lIKhO","RfPv3","dK5wE","1X7qu","tRwEn","1c03P","GwHCl", 322 | "CsJaO","zl4j1","e0aEc","Uskgi","rgTGR","jyR4g","Tt6l4","lRoaw", 323 | "94ult","qZwBX","eYW8S","Qf6UH","AbV56","N1hJq","JIaVe","8LHEx", 324 | "DeNbS","30I0a","hm6qw","3jcaO","4WkuA","mQ219","Gb81C","yx4HM", 325 | "Chlqfi9S1y", "BMwUgVFu2X", "ZmpEGOVrVe", "13ggJxrPkC", "fcJJpyMGjm", "9T00Dv4ZAb", 326 | "p3YRcP7M2o", "sR0qNUXCHv", "gCxWZbJ6rb", "R4YtzRXXUl", "vwyYz5j6pY", "XPWUvLXhJ7", 327 | "7PwfnVVb7U", "1f34Q6hOYz", "1EM2abZY61", "0a6Ivi4S0a", "Teq2LrQs2T", "dWXLCgWHc8", 328 | "LawMv7ujn4", "N8VFgbZQx5", "tfvHHxoDgi", "ImwYgXA2tf", "KkIES9NqZO", "ajcz0qjjda", 329 | "6Vz28vlGs9", "VMCc5W8cCt", "BiQB8BRJ98", "43CpOJSMpA", "jfBJdqwXcU", "ecHR9EO2ib", 330 | "LH7CcXyCZ7", "JntqGSgSpa", "0MbTMpZPFW", "5FJSdiCXzR", "5gda2AhA2x", "lrDFc1lnXk", 331 | "zrEwECHvjs", "B0JldDxFa1", "6DYal4QxKa", "Hsqx6kP2S4", "zZwnALSuFh", "Shh4ISZcKW", 332 | "P9VDaNSk7Z", "mEI2PLSCO6", "WyTyrQORtu", "IvJyMMRgh3", "Q6pgJq8Nkv", "dhOgR3tDAD", 333 | "Y9h6bVgbxO", "wA15tiOPTm", "8TaIKf1zCO", "z75dzabHBs", "AS6OPnwoJI", "2DSZka9Auj", 334 | "QLzUjV2CWs", "KZSN2SVhia", "7ttYKWF2ue", "1Zxfu7B2ST", "RnkpmwjsCi", "YpcSIzaqx5", 335 | "RDEwFD9gmX", "Nlx3V4Cjw4", "9ZdvITOj8M", "httUPWMNXO", "Ypv9PjxGwa", "LlwyNolNnH", 336 | "6xpJOht47a", "tbmz4WIdcG", "OwzuVDlb7D", "PBQKJxo8DQ", "uVnMQn7hK6", "rlnZINuDUa", 337 | "2feyyYukPa", "teOlpKuDBn", "LxBSWh0dL1", "Onyb7r4Jp0", "bZxXE6xOXg", "d9NSvNTunQ", 338 | "ONerLBic32", "8mar4rKmFk", "5cCN9uwaCg", "ElVrYOHHMv", "YF6Og8DX40", "OgiCwpCQ5a", 339 | "K6nSRZVxdR", "gqyXXXoVFW", "ulyRYizcBP", "khUx31K5UR", "qZFRzVthju", "pQBh0vnB20", 340 | "dk8NIN7ajy", "XP7ed1OjZx", "IRYNwA5iFR", "hiSEBhTukC", "Ns4jJ3jzGo", "dYoCSxjIvM", 341 | "HzGLbl5i1g", "baizENd4ko", "6rCqGBO8t1", "QWGfC8UaA7", "JFhRfxQe4K", "8R4W6IWANz", 342 | "2TnWf1w7JH", "0z69e0wcoG", "8SN1mRHCY7", "oFGCYHHwGX", "G8xqnBgxjO", "6B3SAOayHt", 343 | "XRW3ZSG1gw", "WcIjTxMxOM", "wNqCAIaTb2", "gO4em4HW8H", "TgGFSMEtbG", "WiwmbEw3QA", 344 | "D2xshYUgpu", "xRUZCQVzBs", "nCnUmMgIjE", "p4Ewt1yCJr", "MeOjDcaMY5", "1XelMeXiiI" 345 | }; 346 | 347 | static const std::size_t rand_str_size = sizeof(rand_str) / sizeof(std::string); 348 | 349 | for (unsigned int i = 0; i < rand_str_size; ++i) 350 | { 351 | std::string s0 = rand_str[i]; 352 | std::string s1 = rand_str[(i + 1) % rand_str_size]; 353 | std::string s2 = rand_str[(i + 2) % rand_str_size]; 354 | std::string s3 = rand_str[(i + 3) % rand_str_size]; 355 | std::string s4 = rand_str[(i + 4) % rand_str_size]; 356 | std::string s5 = rand_str[(i + 5) % rand_str_size]; 357 | std::string s6 = rand_str[(i + 6) % rand_str_size]; 358 | 359 | outliers.push_back(s0); 360 | outliers.push_back(s0 + s1); 361 | outliers.push_back(s0 + s2 + s4); 362 | outliers.push_back(s0 + s1 + s3); 363 | outliers.push_back(s0 + s1 + s2 + s3 + s4 + s5); 364 | outliers.push_back(s0 + s1 + s2 + s3 + s4 + s5 + s6); 365 | 366 | outliers.push_back(reverse(s0)); 367 | outliers.push_back(reverse(s0 + s1)); 368 | outliers.push_back(reverse(s0 + s2 + s4)); 369 | outliers.push_back(reverse(s0 + s1 + s3)); 370 | outliers.push_back(reverse(s0 + s1 + s2 + s3 + s4 + s5)); 371 | outliers.push_back(reverse(s0 + s1 + s2 + s3 + s4 + s5 + s6)); 372 | } 373 | 374 | std::sort(outliers.begin(),outliers.end()); 375 | 376 | purify_outliers(word_list,outliers); 377 | 378 | std::cout << "Complete." << std::endl; 379 | } 380 | 381 | void purify_outliers(const std::vector& word_list, std::deque& outliers) 382 | { 383 | std::set set1; 384 | std::set set2; 385 | 386 | std::copy(word_list.begin(), word_list.end(),std::inserter(set1,set1.begin())); 387 | std::copy(outliers.begin(), outliers.end(), std::inserter(set2,set2.begin())); 388 | 389 | std::deque intersect_list; 390 | 391 | std::set_intersection(set1.begin(),set1.end(), 392 | set2.begin(),set2.end(), 393 | std::back_inserter(intersect_list)); 394 | 395 | std::sort(intersect_list.begin(),intersect_list.end()); 396 | 397 | if (!intersect_list.empty()) 398 | { 399 | std::deque new_outliers; 400 | 401 | for (std::deque::iterator it = outliers.begin(); it != outliers.end(); ++it) 402 | { 403 | if (!std::binary_search(intersect_list.begin(),intersect_list.end(),*it)) 404 | { 405 | new_outliers.push_back(*it); 406 | } 407 | } 408 | 409 | outliers.swap(new_outliers); 410 | } 411 | } 412 | -------------------------------------------------------------------------------- /bloom_filter_example03.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | ********************************************************************* 3 | * * 4 | * Open Bloom Filter * 5 | * * 6 | * Description: Usage pattern of Compressible Bloom Filter * 7 | * Author: Arash Partow - 2000 * 8 | * URL: http://www.partow.net * 9 | * URL: http://www.partow.net/programming/hashfunctions/index.html * 10 | * * 11 | * Copyright notice: * 12 | * Free use of the Open Bloom Filter Library is permitted under the * 13 | * guidelines and in accordance with the MIT License. * 14 | * http://www.opensource.org/licenses/MIT * 15 | * * 16 | ********************************************************************* 17 | */ 18 | 19 | 20 | /* 21 | Description: This example will demonstrate how to instantiate a compressible 22 | Bloom filter, insert strings and then query the inserted strings 23 | and a set of outlier strings for membership status in the filter. 24 | Furthermore on each round the size of the Bloom filter will be 25 | reduced/compressed by 5%. In theory this should cause the 26 | effective/theoretical false positive probability to increase. 27 | The objective of this exercise is to track the observed false 28 | positive probability against the effective false positive 29 | probability as the filter's size is gradually reduced. 30 | */ 31 | 32 | 33 | #include 34 | #include 35 | #include 36 | #include 37 | #include 38 | #include 39 | #include 40 | #include 41 | #include 42 | #include 43 | 44 | #include "bloom_filter.hpp" 45 | 46 | bool load_word_list(int argc, char* argv[], std::vector& word_list); 47 | 48 | template class Container> 51 | bool read_file(const std::string& file_name, Container& c); 52 | 53 | void generate_outliers(const std::vector& word_list, std::deque& outliers); 54 | void purify_outliers(const std::vector& word_list,std::deque& outliers); 55 | 56 | int main(int argc, char* argv[]) 57 | { 58 | std::vector word_list; 59 | std::deque outliers; 60 | 61 | if (!load_word_list(argc,argv,word_list)) 62 | { 63 | return 1; 64 | } 65 | 66 | generate_outliers(word_list,outliers); 67 | 68 | unsigned int random_seed = 0xA57EC3B2; 69 | 70 | const double desired_probability_of_false_positive = 1.0 / word_list.size(); 71 | 72 | bloom_parameters parameters; 73 | parameters.projected_element_count = word_list.size(); 74 | parameters.false_positive_probability = desired_probability_of_false_positive; 75 | parameters.random_seed = random_seed++; 76 | parameters.maximum_number_of_hashes = 7; 77 | 78 | if (!parameters) 79 | { 80 | std::cout << "Error - Invalid set of bloom filter parameters!" << std::endl; 81 | return 1; 82 | } 83 | 84 | parameters.compute_optimal_parameters(); 85 | 86 | compressible_bloom_filter filter(parameters); 87 | 88 | filter.insert(word_list.begin(),word_list.end()); 89 | 90 | std::cout << "Filter Size\tEFPP \tOFPP \tDiff" << std::endl; 91 | 92 | while (filter.size() > 1) 93 | { 94 | std::vector::iterator it = filter.contains_all(word_list.begin(),word_list.end()); 95 | 96 | if (word_list.end() != it) 97 | { 98 | std::cout << "ERROR: key not found in bloom filter! =>" << (*it) << std::endl; 99 | return 1; 100 | } 101 | 102 | std::size_t total_false_positive = 0; 103 | 104 | for (std::deque::iterator itr = outliers.begin(); itr != outliers.end(); ++itr) 105 | { 106 | if (filter.contains(*itr)) ++total_false_positive; 107 | } 108 | 109 | double pfp = total_false_positive / (1.0 * outliers.size()); 110 | 111 | printf("%11llu\t%8.7f\t%8.7f\t%8.6f\n", 112 | static_cast(filter.size()), 113 | filter.effective_fpp(), 114 | pfp, 115 | 100.0 * (pfp / filter.effective_fpp())); 116 | 117 | if (!filter.compress(5.0)) 118 | { 119 | std::cout << "Filter cannot be compressed any further." << std::endl; 120 | break; 121 | } 122 | } 123 | 124 | return 0; 125 | } 126 | 127 | bool load_word_list(int argc, char* argv[], std::vector& word_list) 128 | { 129 | // Note: The word-lists can be obtained from: 130 | // https://github.com/ArashPartow/bloom 131 | static const std::string wl_list[] = 132 | { "word-list.txt", 133 | "word-list-large.txt", 134 | "word-list-extra-large.txt", 135 | "random-list.txt" 136 | }; 137 | 138 | std::size_t index = 0; 139 | 140 | if (2 == argc) 141 | { 142 | index = ::atoi(argv[1]); 143 | 144 | const std::size_t wl_list_size = sizeof(wl_list) / sizeof(std::string); 145 | 146 | if (index >= wl_list_size) 147 | { 148 | std::cout << "Invalid world list index: " << index << std::endl; 149 | return false; 150 | } 151 | } 152 | 153 | std::cout << "Loading list " << wl_list[index] << "....."; 154 | 155 | if (!read_file(wl_list[index],word_list)) 156 | { 157 | return false; 158 | } 159 | 160 | if (word_list.empty()) 161 | { 162 | std::cout << "No word list - Either none requested, or desired word list could not be loaded." << std::endl; 163 | return false; 164 | } 165 | else 166 | std::cout << " Complete." << std::endl; 167 | 168 | return true; 169 | } 170 | 171 | template class Container> 174 | bool read_file(const std::string& file_name, Container& c) 175 | { 176 | std::ifstream stream(file_name.c_str()); 177 | 178 | if (!stream) 179 | { 180 | std::cout << "Error: Failed to open file '" << file_name << "'" << std::endl; 181 | return false; 182 | } 183 | 184 | std::string buffer; 185 | 186 | while (std::getline(stream,buffer)) 187 | { 188 | c.push_back(buffer); 189 | } 190 | 191 | return true; 192 | } 193 | 194 | std::string reverse(std::string str) 195 | { 196 | // Not the most efficient way of doing this. 197 | std::reverse(str.begin(),str.end()); 198 | return str; 199 | } 200 | 201 | void generate_outliers(const std::vector& word_list, std::deque& outliers) 202 | { 203 | std::cout << "Generating outliers..... "; 204 | 205 | for (std::vector::const_iterator it = word_list.begin(); it != word_list.end(); ++it) 206 | { 207 | if ((*it) != reverse((*it))) 208 | { 209 | outliers.push_back((*it) + reverse((*it))); 210 | outliers.push_back((*it) + (*it)); 211 | outliers.push_back(reverse((*it)) + (*it) + reverse((*it))); 212 | } 213 | 214 | std::string ns = *it; 215 | 216 | for (unsigned int i = 0; i < ns.size(); ++i) 217 | { 218 | if (1 == (i & 0x00)) ns[i] = ~ns[i]; 219 | } 220 | 221 | outliers.push_back(ns); 222 | } 223 | 224 | static const std::string rand_str[] = 225 | { 226 | "oD5l", "pccW", "5yHt", "ndaN", "OaJh", "tWPc", "Cr9C", "a9zE", 227 | "H1wL", "yo1V", "16D7", "f2WR", "0MVQ", "PkKn", "PlVa", "MvzL", 228 | "9Csl", "JQTv", "IveD", "FDVS", "Q7HE", "QgcF", "Q9Vo", "V8zJ", 229 | "EJWT", "GuLC", "rM3d", "PJF4", "HXPW", "qKx3", "ztRP", "t4KP", 230 | "m1zV", "fn12", "B1QP", "Jr4I", "Mf8M", "4jBd", "anGR", "Pipt", 231 | "QHon", "GNlc", "UeXM", "mVM5", "ABI8", "RhB3", "5h2s", "hOYo", 232 | "gaId", "DX40", "THMu", "EwlP", "n9Mz", "oC1S", "BfMl", "uCZ1", 233 | "G2bA", "MOH9", "zZ0O", "PKDO", "3nRU", "Z6ie", "4cso", "LnQO", 234 | "MJTtT","td3rC","A5JNR","1yL5B","rQnJk","jNKYF","CD0XD","pFLSG", 235 | "fxO1a","CAjBE","ORk4e","0LERI","R7d0x","Qqd7v","6Kih5","9tTCB", 236 | "yCg9U","D2Tv7","XpNHn","6zeFQ","BT2cs","WGhKW","zTv6B","TTPFk", 237 | "XjNVX","pg9yW","4pKiZ","mQUhL","xrXzR","kVRm5","NSyC4","olXm9", 238 | "UWkYy","8Ys6r","yd4Fl","5L4mB","nP3nH","f0DFb","glnQa","DlXQa", 239 | "cQdH6","eBmIN","fDj6F","ezLow","C15vu","I2Z2j","BQgzg","eVBid", 240 | "hn5TO","WZyQN","xXgsE","sL6nK","8DKD8","jcrbp","AcRak","h8N5o", 241 | "LViwC","ThEKf","O7fd5","oN0Id","OM1m0","4OLiR","VIa8N","bJZFG", 242 | "9j3rL","SzW0N","7m7pY","mY9bg","k1p3e","3OFm1","r45se","VYwz3", 243 | "pDjXt","ZcqcJ","npPHx","hA3bw","w7lSO","jEmZL","1x3AZ","FN47G", 244 | "kThNf","aC4fq","rzDwi","CYRNG","gCeuG","wCVqO","d1R60","bEauW", 245 | "KeUwW","lIKhO","RfPv3","dK5wE","1X7qu","tRwEn","1c03P","GwHCl", 246 | "CsJaO","zl4j1","e0aEc","Uskgi","rgTGR","jyR4g","Tt6l4","lRoaw", 247 | "94ult","qZwBX","eYW8S","Qf6UH","AbV56","N1hJq","JIaVe","8LHEx", 248 | "DeNbS","30I0a","hm6qw","3jcaO","4WkuA","mQ219","Gb81C","yx4HM", 249 | "Chlqfi9S1y", "BMwUgVFu2X", "ZmpEGOVrVe", "13ggJxrPkC", "fcJJpyMGjm", "9T00Dv4ZAb", 250 | "p3YRcP7M2o", "sR0qNUXCHv", "gCxWZbJ6rb", "R4YtzRXXUl", "vwyYz5j6pY", "XPWUvLXhJ7", 251 | "7PwfnVVb7U", "1f34Q6hOYz", "1EM2abZY61", "0a6Ivi4S0a", "Teq2LrQs2T", "dWXLCgWHc8", 252 | "LawMv7ujn4", "N8VFgbZQx5", "tfvHHxoDgi", "ImwYgXA2tf", "KkIES9NqZO", "ajcz0qjjda", 253 | "6Vz28vlGs9", "VMCc5W8cCt", "BiQB8BRJ98", "43CpOJSMpA", "jfBJdqwXcU", "ecHR9EO2ib", 254 | "LH7CcXyCZ7", "JntqGSgSpa", "0MbTMpZPFW", "5FJSdiCXzR", "5gda2AhA2x", "lrDFc1lnXk", 255 | "zrEwECHvjs", "B0JldDxFa1", "6DYal4QxKa", "Hsqx6kP2S4", "zZwnALSuFh", "Shh4ISZcKW", 256 | "P9VDaNSk7Z", "mEI2PLSCO6", "WyTyrQORtu", "IvJyMMRgh3", "Q6pgJq8Nkv", "dhOgR3tDAD", 257 | "Y9h6bVgbxO", "wA15tiOPTm", "8TaIKf1zCO", "z75dzabHBs", "AS6OPnwoJI", "2DSZka9Auj", 258 | "QLzUjV2CWs", "KZSN2SVhia", "7ttYKWF2ue", "1Zxfu7B2ST", "RnkpmwjsCi", "YpcSIzaqx5", 259 | "RDEwFD9gmX", "Nlx3V4Cjw4", "9ZdvITOj8M", "httUPWMNXO", "Ypv9PjxGwa", "LlwyNolNnH", 260 | "6xpJOht47a", "tbmz4WIdcG", "OwzuVDlb7D", "PBQKJxo8DQ", "uVnMQn7hK6", "rlnZINuDUa", 261 | "2feyyYukPa", "teOlpKuDBn", "LxBSWh0dL1", "Onyb7r4Jp0", "bZxXE6xOXg", "d9NSvNTunQ", 262 | "ONerLBic32", "8mar4rKmFk", "5cCN9uwaCg", "ElVrYOHHMv", "YF6Og8DX40", "OgiCwpCQ5a", 263 | "K6nSRZVxdR", "gqyXXXoVFW", "ulyRYizcBP", "khUx31K5UR", "qZFRzVthju", "pQBh0vnB20", 264 | "dk8NIN7ajy", "XP7ed1OjZx", "IRYNwA5iFR", "hiSEBhTukC", "Ns4jJ3jzGo", "dYoCSxjIvM", 265 | "HzGLbl5i1g", "baizENd4ko", "6rCqGBO8t1", "QWGfC8UaA7", "JFhRfxQe4K", "8R4W6IWANz", 266 | "2TnWf1w7JH", "0z69e0wcoG", "8SN1mRHCY7", "oFGCYHHwGX", "G8xqnBgxjO", "6B3SAOayHt", 267 | "XRW3ZSG1gw", "WcIjTxMxOM", "wNqCAIaTb2", "gO4em4HW8H", "TgGFSMEtbG", "WiwmbEw3QA", 268 | "D2xshYUgpu", "xRUZCQVzBs", "nCnUmMgIjE", "p4Ewt1yCJr", "MeOjDcaMY5", "1XelMeXiiI" 269 | }; 270 | static const std::size_t rand_str_size = sizeof(rand_str) / sizeof(std::string); 271 | 272 | for (unsigned int i = 0; i < rand_str_size; ++i) 273 | { 274 | std::string s0 = rand_str[i]; 275 | std::string s1 = rand_str[(i + 1) % rand_str_size]; 276 | std::string s2 = rand_str[(i + 2) % rand_str_size]; 277 | std::string s3 = rand_str[(i + 3) % rand_str_size]; 278 | std::string s4 = rand_str[(i + 4) % rand_str_size]; 279 | std::string s5 = rand_str[(i + 5) % rand_str_size]; 280 | std::string s6 = rand_str[(i + 6) % rand_str_size]; 281 | 282 | outliers.push_back(s0); 283 | outliers.push_back(s0 + s1); 284 | outliers.push_back(s0 + s2 + s4); 285 | outliers.push_back(s0 + s1 + s3); 286 | outliers.push_back(s0 + s1 + s2 + s3 + s4 + s5); 287 | outliers.push_back(s0 + s1 + s2 + s3 + s4 + s5 + s6); 288 | 289 | outliers.push_back(reverse(s0)); 290 | outliers.push_back(reverse(s0 + s1)); 291 | outliers.push_back(reverse(s0 + s2 + s4)); 292 | outliers.push_back(reverse(s0 + s1 + s3)); 293 | outliers.push_back(reverse(s0 + s1 + s2 + s3 + s4 + s5)); 294 | outliers.push_back(reverse(s0 + s1 + s2 + s3 + s4 + s5 + s6)); 295 | } 296 | 297 | std::sort(outliers.begin(),outliers.end()); 298 | 299 | purify_outliers(word_list,outliers); 300 | 301 | std::cout << "Complete." << std::endl; 302 | } 303 | 304 | void purify_outliers(const std::vector& word_list, std::deque& outliers) 305 | { 306 | std::set set1; 307 | std::set set2; 308 | 309 | std::copy(word_list.begin(), word_list.end(), std::inserter(set1,set1.begin())); 310 | std::copy(outliers .begin(), outliers .end(), std::inserter(set2,set2.begin())); 311 | 312 | std::deque intersect_list; 313 | 314 | std::set_intersection(set1.begin(),set1.end(), 315 | set2.begin(),set2.end(), 316 | std::back_inserter(intersect_list)); 317 | 318 | std::sort(intersect_list.begin(),intersect_list.end()); 319 | 320 | if (!intersect_list.empty()) 321 | { 322 | std::deque new_outliers; 323 | 324 | for (std::deque::iterator it = outliers.begin(); it != outliers.end(); ++it) 325 | { 326 | if (!std::binary_search(intersect_list.begin(),intersect_list.end(),*it)) 327 | { 328 | new_outliers.push_back(*it); 329 | } 330 | } 331 | 332 | outliers.swap(new_outliers); 333 | } 334 | } 335 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ArashPartow/bloom/7309d713e0f02189e059a997bdb5e5dd3ea4a6a1/readme.md -------------------------------------------------------------------------------- /word-list.txt: -------------------------------------------------------------------------------- 1 | above 2 | abuse 3 | accident 4 | accord 5 | account 6 | accounting 7 | acoustic 8 | action 9 | activex 10 | adams 11 | adderss 12 | adolescent 13 | adult 14 | adults 15 | advantage 16 | advertising 17 | affirmative 18 | african 19 | agco 20 | agency 21 | aging 22 | agreement 23 | agricultural 24 | agrresiv 25 | ahmanson 26 | air 27 | airfare 28 | airfares 29 | airline 30 | airlines 31 | airplane 32 | album 33 | alcatel 34 | alcohol 35 | alexis 36 | alicia 37 | allan 38 | allergen 39 | allergens 40 | allergic 41 | allergies 42 | allergy 43 | alley 44 | alliance 45 | allysa 46 | alone 47 | alta 48 | alternate 49 | aluminum 50 | amateur 51 | amatuer 52 | amazonflodens 53 | america 54 | american 55 | ancient 56 | and 57 | aneima 58 | angel 59 | angeles 60 | animal 61 | animals 62 | animated 63 | animation 64 | anime 65 | aniston 66 | answering 67 | antihistamine 68 | antihistamines 69 | antonio 70 | anzwers 71 | apartment 72 | aplastic 73 | arabic 74 | arawak 75 | arco 76 | are 77 | area 78 | aristotle 79 | aristotles 80 | aristotless 81 | arizona 82 | army 83 | arquitectura 84 | art 85 | arthur 86 | articles 87 | artist 88 | artschools 89 | asheville 90 | ashland 91 | ashley 92 | asian 93 | atapi 94 | atlantic 95 | aubrie 96 | audit 97 | aunt 98 | auntie 99 | austin 100 | australia 101 | australian 102 | authorized 103 | auto 104 | autoimmune 105 | automation 106 | automobile 107 | average 108 | aviation 109 | awards 110 | babies 111 | back 112 | bacteria 113 | badbad 114 | baggett 115 | bahamas 116 | baja 117 | ball 118 | ballet 119 | ballston 120 | bands 121 | bangkok 122 | bank 123 | banking 124 | bar 125 | barbi 126 | barco 127 | bargain 128 | barreled 129 | barrymore 130 | baseball 131 | baselga 132 | bazar 133 | bc 134 | bdsm 135 | beanie 136 | beannie 137 | bearded 138 | beautiful 139 | before 140 | bell 141 | bellcore 142 | belle 143 | ben 144 | bender 145 | bennington 146 | bernoulies 147 | better 148 | betty 149 | beyond 150 | bible 151 | bikini 152 | billet 153 | binary 154 | biography 155 | biological 156 | biology 157 | birdie 158 | birth 159 | black 160 | blake 161 | blaspheme 162 | blasting 163 | bleeth 164 | blockers 165 | blood 166 | bloodhound 167 | bloody 168 | blue 169 | board 170 | boarders 171 | boat 172 | bob 173 | boca 174 | bocasxzip 175 | bode 176 | body 177 | boise 178 | bokhandel 179 | bold 180 | bollywood 181 | bombs 182 | bone 183 | book 184 | books 185 | boone 186 | borland 187 | boston 188 | boulder 189 | boulser 190 | bowls 191 | box 192 | boy 193 | bradstreet 194 | brain 195 | brand 196 | brasil 197 | brian 198 | britannica 199 | britannicas 200 | british 201 | brittanica 202 | brokerage 203 | bromptons 204 | brother 205 | brothers 206 | brown 207 | bryan 208 | building 209 | bullwinkles 210 | bureau 211 | business 212 | cable 213 | cadillac 214 | cahiers 215 | cahsbuilder 216 | caisse 217 | caisses 218 | calaveras 219 | califorina 220 | california 221 | call 222 | calvinism 223 | camelion 224 | camping 225 | canada 226 | canadian 227 | canaveral 228 | candy 229 | cannons 230 | canterbury 231 | canveral 232 | cape 233 | capitals 234 | capitol 235 | caps 236 | card 237 | cardiovascular 238 | cards 239 | cardston 240 | career 241 | caribbean 242 | carl 243 | carles 244 | carling 245 | carol 246 | carolina 247 | carribean 248 | carrington 249 | cashbuilder 250 | casino 251 | castle 252 | catering 253 | catholic 254 | cave 255 | cdrom 256 | cedar 257 | celebs 258 | cellular 259 | celtic 260 | cement 261 | center 262 | centers 263 | centraal 264 | central 265 | century 266 | cerebrospinal 267 | certificate 268 | cervical 269 | ceverett 270 | chambers 271 | chanelst 272 | changers 273 | channel 274 | chaplin 275 | charlie 276 | charter 277 | chase 278 | chasey 279 | chat 280 | cheap 281 | cheating 282 | cheese 283 | chemical 284 | chemiosmotic 285 | chemistry 286 | cherokee 287 | chicago 288 | china 289 | chinamath 290 | chinese 291 | chipset 292 | chisa 293 | chlb 294 | chords 295 | chris 296 | christian 297 | christina 298 | christopher 299 | christy 300 | chromosomal 301 | churchs 302 | cidade 303 | cigars 304 | cindy 305 | circle 306 | civ 307 | clan 308 | class 309 | classification 310 | claudia 311 | cliff 312 | cliffs 313 | clinique 314 | clock 315 | cloister 316 | club 317 | clubscamelionthe 318 | clyde 319 | cmdlinestartjava 320 | cnn 321 | cntraveler 322 | code 323 | collectables 324 | college 325 | colleges 326 | collegiate 327 | collie 328 | colonial 329 | color 330 | colorado 331 | coloradp 332 | colubia 333 | columbia 334 | columbus 335 | comdex 336 | come 337 | comedian 338 | comedians 339 | comedy 340 | comics 341 | communitcations 342 | companies 343 | company 344 | compiler 345 | compliance 346 | composer 347 | composers 348 | computer 349 | computers 350 | computing 351 | comsumer 352 | concerning 353 | congo 354 | congress 355 | congressional 356 | connecticut 357 | connection 358 | conservative 359 | consolidator 360 | consolidators 361 | conspiercy 362 | conspiracy 363 | construction 364 | consturction 365 | consulate 366 | contact 367 | conte 368 | contra 369 | control 370 | contry 371 | convention 372 | convert 373 | converting 374 | cookie 375 | copanys 376 | corns 377 | cornus 378 | cosmetic 379 | cost 380 | costumes 381 | countries 382 | country 383 | county 384 | cove 385 | cover 386 | crack 387 | cracks 388 | crank 389 | crash 390 | creative 391 | creativelabs 392 | credit 393 | creflo 394 | crestar 395 | crime 396 | critics 397 | cross 398 | cruise 399 | crusie 400 | cruz 401 | crysler 402 | crystal 403 | cuba 404 | cup 405 | cursing 406 | curves 407 | custom 408 | cutoff 409 | cyrex 410 | cyrilic 411 | czip 412 | daily 413 | dali 414 | damen 415 | damon 416 | dance 417 | dancing 418 | danni 419 | darby 420 | dark 421 | darts 422 | data 423 | database 424 | dating 425 | david 426 | davis 427 | days 428 | dbase 429 | death 430 | decision 431 | decription 432 | deer 433 | definition 434 | degree 435 | delta 436 | demo 437 | democracy 438 | demonic 439 | demons 440 | dennis 441 | design 442 | desjardins 443 | development 444 | devices 445 | devil 446 | deyo 447 | diamond 448 | diane 449 | diaper 450 | diapers 451 | diapper 452 | diesel 453 | different 454 | diocese 455 | dirty 456 | disabilities 457 | discounted 458 | discussion 459 | disgusting 460 | disk 461 | disney 462 | distance 463 | dive 464 | dividend 465 | division 466 | doctrines 467 | dodge 468 | dog 469 | doggy 470 | doppler 471 | douglas 472 | dover 473 | down 474 | downer 475 | download 476 | downloadable 477 | downloading 478 | dragan 479 | dragon 480 | dresdner 481 | drew 482 | drilling 483 | drinking 484 | drive 485 | driver 486 | drivers 487 | dsp 488 | dublin 489 | dubuque 490 | duetzallis 491 | duke 492 | earth 493 | earthquake 494 | easily 495 | east 496 | eastern 497 | easterwood 498 | ecave 499 | economic 500 | edie 501 | editing 502 | edmonton 503 | education 504 | educational 505 | egypt 506 | egyptens 507 | egyptian 508 | elder 509 | electric 510 | electronic 511 | elizabeth 512 | elle 513 | elliptic 514 | ellis 515 | email 516 | embroidery 517 | emeryville 518 | employment 519 | emulation 520 | emulator 521 | emulators 522 | encyclopidia 523 | end 524 | endangered 525 | energy 526 | engineering 527 | england 528 | english 529 | entertainment 530 | entrepreneurs 531 | episcopal 532 | ericsson 533 | error 534 | escape 535 | escort 536 | espncom 537 | essay 538 | estadistica 539 | estate 540 | etheopian 541 | evening 542 | event 543 | evil 544 | evolution 545 | examples 546 | exchange 547 | executive 548 | exercise 549 | exhibitionist 550 | expires 551 | express 552 | fact 553 | faculdade 554 | faculty 555 | fajitas 556 | falcons 557 | false 558 | family 559 | fan 560 | fantasy 561 | farm 562 | farmaceutica 563 | farmacia 564 | farmacie 565 | farming 566 | farrah 567 | farrier 568 | federal 569 | female 570 | females 571 | fetch 572 | fiat 573 | fidelity 574 | fifa 575 | files 576 | fille 577 | film 578 | final 579 | find 580 | finders 581 | finding 582 | findings 583 | fire 584 | firearms 585 | firewall 586 | firms 587 | first 588 | fish 589 | fitniss 590 | five 591 | fkk 592 | flavors 593 | fleetwood 594 | fletcher 595 | flight 596 | floorplan 597 | florida 598 | flow 599 | fluid 600 | flute 601 | fly 602 | flying 603 | fmidizip 604 | folks 605 | followup 606 | football 607 | footballand 608 | for 609 | fordham 610 | foreign 611 | forest 612 | form 613 | formula 614 | foster 615 | foundation 616 | fourteeners 617 | foxwood 618 | france 619 | frank 620 | franklin 621 | free 622 | freebies 623 | freelance 624 | freewarez 625 | froeign 626 | ftp 627 | fugitive 628 | furniture 629 | gabriella 630 | galleries 631 | gallery 632 | game 633 | gameboy 634 | games 635 | garden 636 | gardening 637 | garnet 638 | garry 639 | garters 640 | gary 641 | gender 642 | general 643 | geneseo 644 | genesis 645 | genetic 646 | geneva 647 | genital 648 | genrel 649 | geologic 650 | georgina 651 | geriatric 652 | gigabit 653 | girl 654 | girls 655 | glass 656 | gleaner 657 | global 658 | gloria 659 | glory 660 | gmbh 661 | go 662 | golden 663 | golf 664 | good 665 | goodman 666 | gopher 667 | goth 668 | gotu 669 | government 670 | graduate 671 | grand 672 | graphical 673 | graphics 674 | grateful 675 | greeces 676 | greenbay 677 | greensburg 678 | grendal 679 | grill 680 | grit 681 | gross 682 | ground 683 | group 684 | growth 685 | guardian 686 | guide 687 | guiding 688 | guitar 689 | gun 690 | haitian 691 | ham 692 | handbook 693 | handmade 694 | hardrock 695 | harrah 696 | harvard 697 | haven 698 | hawaii 699 | headphone 700 | headphones 701 | health 702 | heart 703 | heat 704 | heather 705 | heathrow 706 | height 707 | heights 708 | hellfire 709 | help 710 | her 711 | herbology 712 | hexeh 713 | hexen 714 | high 715 | highlander 716 | hinduism 717 | hints 718 | histology 719 | historic 720 | historical 721 | history 722 | hit 723 | holiday 724 | holy 725 | home 726 | homes 727 | hong 728 | horseback 729 | horticulture 730 | horton 731 | hot 732 | hotel 733 | hotels 734 | hott 735 | house 736 | houston 737 | how 738 | howard 739 | human 740 | hunting 741 | hurricane 742 | ian 743 | ibizan 744 | ibm 745 | idaho 746 | iici 747 | ilands 748 | image 749 | immigration 750 | immortal 751 | immortals 752 | implant 753 | importer 754 | importers 755 | in 756 | incognito 757 | indentured 758 | india 759 | indian 760 | industrial 761 | industries 762 | industry 763 | inflight 764 | information 765 | informix 766 | ingalls 767 | inquirer 768 | instant 769 | integrity 770 | intel 771 | international 772 | internet 773 | introduction 774 | inventory 775 | investigator 776 | investigators 777 | investing 778 | iradation 779 | iran 780 | irish 781 | iron 782 | ironing 783 | irradiated 784 | is 785 | iso 786 | isp 787 | issues 788 | italian 789 | item 790 | jackie 791 | jacobs 792 | jacques 793 | jamboree 794 | jameson 795 | jane 796 | janin 797 | janine 798 | japan 799 | japanese 800 | japenese 801 | jason 802 | jenna 803 | jenny 804 | jerky 805 | jerkys 806 | jerry 807 | jersey 808 | jessica 809 | jim 810 | jimi 811 | jo 812 | joann 813 | job 814 | jobe 815 | jobs 816 | john 817 | johnny 818 | jokes 819 | jose 820 | journal 821 | journalism 822 | jr 823 | js 824 | juan 825 | julian 826 | julie 827 | juluis 828 | jump 829 | jungle 830 | junior 831 | jurassic 832 | kalman 833 | kama 834 | kansas 835 | kart 836 | kelley 837 | kelly 838 | kemble 839 | kenneth 840 | kenwood 841 | key 842 | keys 843 | kidman 844 | killerserieal 845 | killington 846 | kings 847 | kins 848 | kiosk 849 | kirin 850 | kirk 851 | kitty 852 | kittycom 853 | klinefelter 854 | knave 855 | knitting 856 | knowledge 857 | kong 858 | korn 859 | kruger 860 | ksfytv 861 | kunai 862 | labor 863 | lack 864 | ladies 865 | lady 866 | lake 867 | lakeland 868 | lambda 869 | languages 870 | lantronix 871 | large 872 | larry 873 | las 874 | late 875 | latvia 876 | laura 877 | learning 878 | leasure 879 | leeann 880 | legault 881 | legg 882 | legs 883 | les 884 | lesson 885 | leters 886 | letterman 887 | lexus 888 | library 889 | licked 890 | lien 891 | life 892 | lifestyle 893 | light 894 | lightning 895 | limousine 896 | line 897 | lingerie 898 | lisa 899 | list 900 | literature 901 | little 902 | live 903 | livestock 904 | ljr 905 | lna 906 | locating 907 | locke 908 | logitech 909 | london 910 | long 911 | lord 912 | loren 913 | lorena 914 | lorissa 915 | los 916 | lotto 917 | lotus 918 | louis 919 | love 920 | low 921 | lucy 922 | luftmeister 923 | lukas 924 | mac 925 | machine 926 | macintosh 927 | mackenzie 928 | macleod 929 | macpherson 930 | macross 931 | macworld 932 | madden 933 | magazines 934 | magellen 935 | mail 936 | main 937 | maker 938 | male 939 | males 940 | mallorca 941 | malone 942 | man 943 | management 944 | manhattan 945 | map 946 | maple 947 | maps 948 | marcus 949 | margaret 950 | mariah 951 | marilyn 952 | marine 953 | mario 954 | market 955 | marketing 956 | marriage 957 | marry 958 | marshall 959 | martha 960 | marthas 961 | martina 962 | marys 963 | mason 964 | massachussets 965 | master 966 | math 967 | matrimony 968 | mattress 969 | maui 970 | mayflower 971 | mayo 972 | mccurleygenealogy 973 | mci 974 | mcneal 975 | mcpherson 976 | meadow 977 | meatloaf 978 | mechanical 979 | media 980 | medical 981 | medicare 982 | medicinal 983 | medicine 984 | meditrranean 985 | meeting 986 | melissa 987 | melrose 988 | memorials 989 | memory 990 | men 991 | meow 992 | mercury 993 | merrimack 994 | mesopotamia 995 | mesopotamian 996 | messenger 997 | meta 998 | metal 999 | metalicca 1000 | metallica 1001 | mexican 1002 | mexico 1003 | miami 1004 | michael 1005 | michigan 1006 | microbiology 1007 | microsoft 1008 | midcontinent 1009 | middle 1010 | midi 1011 | midisoft 1012 | migraine 1013 | migraines 1014 | mike 1015 | military 1016 | millicom 1017 | mineralcrank 1018 | mining 1019 | ministy 1020 | mira 1021 | mirc 1022 | miserables 1023 | mission 1024 | mitosis 1025 | model 1026 | modeling 1027 | moderate 1028 | modicon 1029 | mods 1030 | molecular 1031 | monash 1032 | monday 1033 | monet 1034 | monterey 1035 | montezuma 1036 | montgomery 1037 | moon 1038 | mortgage 1039 | mossberg 1040 | mossbergcom 1041 | most 1042 | motel 1043 | motherboard 1044 | motley 1045 | motocross 1046 | motor 1047 | motorcycle 1048 | mount 1049 | mountain 1050 | mouvement 1051 | movie 1052 | mozart 1053 | mpeg 1054 | msft 1055 | msn 1056 | mtv 1057 | murderers 1058 | murders 1059 | museum 1060 | music 1061 | mutation 1062 | mutual 1063 | my 1064 | myob 1065 | myra 1066 | nacional 1067 | names 1068 | namibia 1069 | naples 1070 | nasal 1071 | nast 1072 | natasha 1073 | national 1074 | native 1075 | nato 1076 | naturally 1077 | naughty 1078 | naval 1079 | nba 1080 | near 1081 | nema 1082 | netscape 1083 | network 1084 | networking 1085 | new 1086 | newton 1087 | nichole 1088 | nickel 1089 | night 1090 | nike 1091 | niki 1092 | nikki 1093 | nirvana 1094 | nittany 1095 | nixonsan 1096 | nmma 1097 | noaa 1098 | non 1099 | noras 1100 | north 1101 | northern 1102 | northrup 1103 | northwest 1104 | northwood 1105 | norton 1106 | nortons 1107 | norwalk 1108 | norwegian 1109 | november 1110 | ntfs 1111 | nubile 1112 | nudes 1113 | nudist 1114 | numbers 1115 | nutrition 1116 | oak 1117 | oasis 1118 | occupied 1119 | odds 1120 | odyssey 1121 | of 1122 | off 1123 | office 1124 | ohio 1125 | old 1126 | older 1127 | olive 1128 | omega 1129 | on 1130 | online 1131 | operating 1132 | opium 1133 | opportunities 1134 | oprah 1135 | organizer 1136 | origo 1137 | orlandoflorida 1138 | orleans 1139 | oshii 1140 | oslo 1141 | othello 1142 | our 1143 | outdoor 1144 | outer 1145 | outhouse 1146 | ovid 1147 | ozarks 1148 | pace 1149 | pacific 1150 | pages 1151 | paige 1152 | painting 1153 | pam 1154 | pamala 1155 | pamela 1156 | panties 1157 | paper 1158 | papua 1159 | park 1160 | parque 1161 | party 1162 | pasadena 1163 | patch 1164 | patents 1165 | paul 1166 | paulina 1167 | paxton 1168 | pbs 1169 | peer 1170 | pegasus 1171 | penn 1172 | pennsylvania 1173 | penthouse 1174 | peonies 1175 | people 1176 | pepsicola 1177 | peptic 1178 | performing 1179 | permenate 1180 | perodic 1181 | persian 1182 | person 1183 | personal 1184 | persons 1185 | pesticide 1186 | pet 1187 | petersburg 1188 | pharmaceitical 1189 | pharmaceutical 1190 | pharmacuetical 1191 | phenethyl 1192 | philippines 1193 | phone 1194 | photo 1195 | photos 1196 | photoshop 1197 | piano 1198 | pid 1199 | pike 1200 | pilgrims 1201 | pion 1202 | piracy 1203 | pirate 1204 | pirated 1205 | pizarro 1206 | planet 1207 | plant 1208 | plate 1209 | playgirl 1210 | plexiglass 1211 | police 1212 | polish 1213 | political 1214 | politics 1215 | pontiac 1216 | pop 1217 | pope 1218 | populaire 1219 | populaires 1220 | population 1221 | portage 1222 | positivity 1223 | post 1224 | postage 1225 | potato 1226 | power 1227 | powerball 1228 | powerful 1229 | ppd 1230 | practical 1231 | preferences 1232 | pregnancy 1233 | prehistoric 1234 | premier 1235 | prenatal 1236 | presidents 1237 | press 1238 | preteen 1239 | pricing 1240 | pricingstained 1241 | primality 1242 | prime 1243 | princes 1244 | princess 1245 | princeton 1246 | principle 1247 | private 1248 | probalidad 1249 | prodigy 1250 | programs 1251 | project 1252 | propranolol 1253 | prostate 1254 | protein 1255 | protestantism 1256 | provocative 1257 | public 1258 | publications 1259 | publisher 1260 | punish 1261 | punished 1262 | quad 1263 | qualifying 1264 | queen 1265 | quest 1266 | quick 1267 | quickening 1268 | race 1269 | rachael 1270 | racing 1271 | racingformula 1272 | racquel 1273 | radar 1274 | radation 1275 | radford 1276 | radio 1277 | radioactive 1278 | ragen 1279 | rain 1280 | raleigh 1281 | rand 1282 | reading 1283 | real 1284 | realestate 1285 | realm 1286 | realtid 1287 | recipes 1288 | recording 1289 | red 1290 | redbook 1291 | ref 1292 | referee 1293 | refugee 1294 | regeringschef 1295 | registry 1296 | related 1297 | relations 1298 | remote 1299 | renee 1300 | rental 1301 | report 1302 | reporting 1303 | reports 1304 | reptile 1305 | research 1306 | reservations 1307 | resteraint 1308 | restless 1309 | retail 1310 | return 1311 | reunion 1312 | reverend 1313 | review 1314 | rhodesian 1315 | rich 1316 | richard 1317 | richards 1318 | rides 1319 | rights 1320 | robert 1321 | roces 1322 | rochester 1323 | rock 1324 | rolling 1325 | romance 1326 | roms 1327 | rooms 1328 | root 1329 | rosarito 1330 | rosen 1331 | ross 1332 | royal 1333 | rubber 1334 | rudolf 1335 | runny 1336 | russ 1337 | russia 1338 | sacramento 1339 | sahara 1340 | sailor 1341 | saint 1342 | sakura 1343 | salary 1344 | sales 1345 | sample 1346 | sandra 1347 | santa 1348 | sanyo 1349 | sara 1350 | sarah 1351 | satan 1352 | satanic 1353 | satanism 1354 | satanist 1355 | satanists 1356 | satans 1357 | saturn 1358 | saudi 1359 | savannah 1360 | say 1361 | scaphoid 1362 | scarlet 1363 | schedule 1364 | schiffer 1365 | school 1366 | schools 1367 | schumann 1368 | science 1369 | sciences 1370 | scouting 1371 | scouts 1372 | screen 1373 | seal 1374 | sealant 1375 | security 1376 | select 1377 | selection 1378 | seltzer 1379 | senate 1380 | senuous 1381 | sequence 1382 | service 1383 | services 1384 | shaker 1385 | shanghai 1386 | shareware 1387 | shawn 1388 | she 1389 | sheet 1390 | sheetmusic 1391 | shelby 1392 | sherman 1393 | shermanl 1394 | sheryl 1395 | shield 1396 | ship 1397 | shop 1398 | shoppingstar 1399 | shoulder 1400 | show 1401 | sick 1402 | sigma 1403 | silicone 1404 | simmons 1405 | simon 1406 | simply 1407 | simulation 1408 | singing 1409 | singles 1410 | sirens 1411 | sister 1412 | sj 1413 | skateboarding 1414 | skiing 1415 | skydome 1416 | sleazy 1417 | small 1418 | smartsuite 1419 | smog 1420 | sndshfzip 1421 | sneezing 1422 | snoop 1423 | snow 1424 | snowboard 1425 | snowboarder 1426 | snowboarders 1427 | snowboarding 1428 | snowboards 1429 | snowhalfpipe 1430 | snowskateboarding 1431 | soccer 1432 | society 1433 | software 1434 | solar 1435 | solomon 1436 | sony 1437 | sophie 1438 | sound 1439 | south 1440 | southland 1441 | space 1442 | spaceballs 1443 | spanish 1444 | spanking 1445 | speed 1446 | speices 1447 | spice 1448 | spirit 1449 | spirits 1450 | spoken 1451 | sports 1452 | sportscenter 1453 | sportszone 1454 | spread 1455 | springer 1456 | stained 1457 | stamp 1458 | stamps 1459 | standard 1460 | stanley 1461 | stanly 1462 | star 1463 | startup 1464 | state 1465 | station 1466 | step 1467 | stepping 1468 | stern 1469 | stewart 1470 | stock 1471 | stolen 1472 | stones 1473 | strangeuniverse 1474 | strategic 1475 | street 1476 | streettalk 1477 | studio 1478 | style 1479 | subdural 1480 | submarine 1481 | submissive 1482 | submit 1483 | subspace 1484 | substance 1485 | substances 1486 | success 1487 | sumerian 1488 | sun 1489 | super 1490 | supercharger 1491 | supermodel 1492 | supermodels 1493 | supply 1494 | support 1495 | surf 1496 | surnames 1497 | susan 1498 | svga 1499 | swan 1500 | sweden 1501 | swinger 1502 | swingers 1503 | swissprot 1504 | symbols 1505 | syndrome 1506 | syntheic 1507 | system 1508 | systems 1509 | tab 1510 | tablature 1511 | tahoe 1512 | tail 1513 | taiwan 1514 | tales 1515 | talk 1516 | talking 1517 | tallahassee 1518 | tanned 1519 | tanum 1520 | tax 1521 | teasing 1522 | technical 1523 | teen 1524 | telemedicine 1525 | telephone 1526 | televions 1527 | television 1528 | tempe 1529 | test 1530 | texas 1531 | theatre 1532 | their 1533 | therapy 1534 | thinkpad 1535 | thirtyfirst 1536 | this 1537 | thong 1538 | thrace 1539 | thurobred 1540 | ticketmaster 1541 | tiernay 1542 | tietmeyer 1543 | tiffany 1544 | tijuana 1545 | time 1546 | tool 1547 | top 1548 | topic 1549 | toronto 1550 | torque 1551 | tory 1552 | traci 1553 | tracks 1554 | trader 1555 | trading 1556 | traffic 1557 | trails 1558 | trainer 1559 | training 1560 | travel 1561 | tree 1562 | tribune 1563 | trinune 1564 | tropical 1565 | trouble 1566 | truck 1567 | trucking 1568 | true 1569 | trumpet 1570 | truth 1571 | ttol 1572 | tucson 1573 | tumi 1574 | tupac 1575 | tuskegee 1576 | tutionbest 1577 | tutorial 1578 | type 1579 | uk 1580 | ukraine 1581 | ultra 1582 | umex 1583 | uncirns 1584 | uncle 1585 | under 1586 | understanding 1587 | uni 1588 | unified 1589 | uniform 1590 | united 1591 | universal 1592 | universi 1593 | universidade 1594 | universities 1595 | university 1596 | unix 1597 | upper 1598 | us 1599 | user 1600 | utilities 1601 | utility 1602 | vacation 1603 | valencia 1604 | valhalla 1605 | validate 1606 | valley 1607 | vanessa 1608 | veitnam 1609 | velocity 1610 | venezuela 1611 | venus 1612 | verapamil 1613 | vermont 1614 | vespa 1615 | victims 1616 | victorias 1617 | video 1618 | vietnam 1619 | villa 1620 | villanova 1621 | vineyards 1622 | violence 1623 | virginia 1624 | virtual 1625 | visa 1626 | vision 1627 | visual 1628 | vitamin 1629 | volcano 1630 | wall 1631 | wally 1632 | war 1633 | warcraft 1634 | warez 1635 | warfield 1636 | warner 1637 | washington 1638 | water 1639 | wave 1640 | way 1641 | weapons 1642 | weather 1643 | web 1644 | webchat 1645 | website 1646 | weehawkin 1647 | went 1648 | werz 1649 | west 1650 | westminster 1651 | whale 1652 | wheeler 1653 | wheeling 1654 | white 1655 | wholesale 1656 | wholistic 1657 | whore 1658 | why 1659 | wicked 1660 | wide 1661 | widow 1662 | widows 1663 | wife 1664 | wild 1665 | william 1666 | williamsburg 1667 | wilson 1668 | wind 1669 | windows 1670 | wine 1671 | winfrey 1672 | winter 1673 | witch 1674 | with 1675 | wives 1676 | wizard 1677 | wolf 1678 | wolfgang 1679 | woman 1680 | women 1681 | work 1682 | working 1683 | world 1684 | yahoo 1685 | yale 1686 | yasmine 1687 | yeast 1688 | yoda 1689 | york 1690 | young 1691 | your 1692 | zenith 1693 | zip --------------------------------------------------------------------------------