├── Python
└── pd_remove.py
├── .idea
├── .name
├── Comparing_Filters.iml
├── vcs.xml
├── .gitignore
├── misc.xml
└── modules.xml
├── cuckoofilter
├── .gitignore
├── benchmarks
│ ├── timing.h
│ ├── Makefile
│ ├── random.h
│ ├── conext-figure5.cc
│ └── conext-table3.cc
├── LICENSE
├── src
│ ├── printutil.cc
│ ├── printutil.h
│ ├── bitsutil.h
│ ├── debug.h
│ ├── permencoding.h
│ ├── hashutil.h
│ ├── simd-block.h
│ ├── singletable.h
│ └── cuckoofilter.h
├── Makefile
├── example
│ └── test.cc
├── .clang-format
└── README.md
├── PD_Filter
├── HashTables
│ ├── HashTables_Tests
│ │ ├── console.txt
│ │ └── Hashtable_main.cpp
│ ├── HistoryLog.cpp
│ ├── Level3.hpp
│ └── Spare_Validator.hpp
├── Fixed_PD
│ ├── v_tests.hpp
│ ├── v_tests.cpp
│ ├── wrap_fpd.hpp
│ ├── fixed_pd_45.cpp
│ ├── wrap_fpd.cpp
│ └── fpd.cpp
├── macros.h
├── basic_function_util.h
├── basic_function_util.cpp
├── Dict320
│ ├── pd256.cpp
│ └── pd320.cpp
└── L2_pd
│ ├── temp_main.cpp
│ └── tpd.cpp
├── Hash_functions
├── BobJenkins.h
├── MurmurHash3.h
├── wyhash.h
├── xxhash64.h
└── MurmurHash3.cpp
├── Tests
├── new_tests.cpp
└── minimal_tests.hpp
├── LICENSE
├── .gitignore~
├── .gitignore
├── morton
├── morton_filter.h
├── test_util.h
├── fixed_point.h
├── morton_util.h
├── hash_util.h
├── bf.h
├── vector_types.h
├── compressed_cuckoo_config.h
└── util.h
├── .clang-format
├── Bloom_Filter
├── bloom.hpp
└── simd-block.h
├── README.md
├── hashutil.h
└── Spec_main.cpp
/Python/pd_remove.py:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/.idea/.name:
--------------------------------------------------------------------------------
1 | Comparing_Filters_Simpler
--------------------------------------------------------------------------------
/.idea/Comparing_Filters.iml:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/cuckoofilter/.gitignore:
--------------------------------------------------------------------------------
1 | #*
2 | #*#
3 | *#
4 | *.*#
5 | *.class
6 | *.dSYM
7 | *.la
8 | *.lo
9 | *.o
10 | *.so
11 | test
12 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/.gitignore:
--------------------------------------------------------------------------------
1 | # Default ignored files
2 | /shelf/
3 | /workspace.xml
4 | # Datasource local storage ignored files
5 | /dataSources/
6 | /dataSources.local.xml
7 | # Editor-based HTTP Client requests
8 | /httpRequests/
9 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/cuckoofilter/benchmarks/timing.h:
--------------------------------------------------------------------------------
1 | // Timers for use in benchmarking.
2 |
3 | #pragma once
4 |
5 | #include
6 | #include
7 |
8 | ::std::uint64_t NowNanos() {
9 | return ::std::chrono::duration_cast<::std::chrono::nanoseconds>(
10 | ::std::chrono::steady_clock::now().time_since_epoch())
11 | .count();
12 | }
13 |
--------------------------------------------------------------------------------
/PD_Filter/HashTables/HashTables_Tests/console.txt:
--------------------------------------------------------------------------------
1 | TODO!!!! deleting the first quot and rem, instead of the 4th.
2 | pd_index quot rem
3 | 37130 7 127
4 |
5 |
6 |
7 | c_remove case 3
8 | capacity: 5
9 | 0)[3, 1, 199, , 0]
10 | 1)[4, 1, 133, , 0]
11 | 2)[10, 14, 10, , 1]
12 | 3)[10, 7, 127, , 1]
13 | 4)[24, 8, 102, , 0]
14 | 14:
15 | pd_index quot rem
16 | 37130 7 127
17 |
18 | c_remove case 3
19 | capacity: 4
20 | 0)[3, 1, 133, , 0]
21 | 1)[4, 14, 10, , 1]
22 | 2)[10, 7, 127, , 1]
23 | 3)[24, 8, 102, , 0]
--------------------------------------------------------------------------------
/Hash_functions/BobJenkins.h:
--------------------------------------------------------------------------------
1 | #ifndef CF_BOBJENKINGS_H
2 | #define CF_BOBJENKINGS_H
3 | #include
4 | #include
5 | #include
6 |
7 | // using namespace std;
8 | // Bob Jenkins Hash
9 | namespace BobJenkins {
10 | uint32_t BobHash(const void *buf, size_t length, uint32_t seed = 0);
11 |
12 | void BobHash(const void *buf, size_t length, uint32_t *idx1, uint32_t *idx2);
13 | // static uint64_t BobHash(const void *buf, size_t length, uint32_t seed = 0);
14 | }// namespace BobJenkins
15 |
16 | #endif// CF_BOBJENKINGS_H
17 |
--------------------------------------------------------------------------------
/Tests/new_tests.cpp:
--------------------------------------------------------------------------------
1 | //
2 | // Created by root on 31/05/2020.
3 | //
4 |
5 | #include "new_tests.hpp"
6 |
7 | auto example1() {
8 | ulong shift = 16u;
9 | size_t filter_max_capacity = 1u << shift;
10 | size_t lookup_reps = 1u << (shift + 2u);
11 | size_t error_power_inv = BITS_PER_ELEMENT_MACRO;
12 | size_t bench_precision = 16;
13 | // b_all_wrapper(filter_max_capacity, lookup_reps, error_power_inv, bench_precision,
14 | // 1, 1, 1, 1, 1);
15 | }
16 |
17 |
18 |
19 |
20 |
21 |
22 |
--------------------------------------------------------------------------------
/cuckoofilter/benchmarks/Makefile:
--------------------------------------------------------------------------------
1 | # Uncomment one of the following to switch between debug and opt mode
2 | OPT = -O3 -DNDEBUG
3 | #OPT = -g -ggdb
4 |
5 | CXXFLAGS += -fno-strict-aliasing -Wall -std=c++11 -I. -I../src/ $(OPT) -march=core-avx2
6 |
7 | LDFLAGS+= -Wall -lpthread -lssl -lcrypto
8 |
9 | HEADERS = $(wildcard ../src/*.h) *.h
10 |
11 | SRC = ../src/hashutil.cc
12 |
13 | .PHONY: all
14 |
15 | BINS = conext-table3.exe conext-figure5.exe bulk-insert-and-query.exe
16 |
17 | all: $(BINS)
18 |
19 | clean:
20 | /bin/rm -f $(BINS)
21 |
22 | %.exe: %.cc ${HEADERS} ${SRC} Makefile
23 | $(CXX) $(CXXFLAGS) $< -o $@ $(SRC) $(LDFLAGS)
24 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2021, All rights reserved.
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 |
--------------------------------------------------------------------------------
/cuckoofilter/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (C) 2013, Carnegie Mellon University and Intel Corporation
2 |
3 | Licensed under the Apache License, Version 2.0 (the "License");
4 | you may not use this file except in compliance with the License.
5 | You may obtain a copy of the License at
6 |
7 | http://www.apache.org/licenses/LICENSE-2.0
8 |
9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 |
--------------------------------------------------------------------------------
/cuckoofilter/src/printutil.cc:
--------------------------------------------------------------------------------
1 | #include "printutil.h"
2 |
3 | #include
4 |
5 | #include
6 |
7 | namespace cuckoofilter {
8 |
9 | std::string PrintUtil::bytes_to_hex(const char *data, size_t len) {
10 | std::string hexstr = "";
11 | static const char hexes[] = "0123456789ABCDEF ";
12 |
13 | for (size_t i = 0; i < len; i++) {
14 | unsigned char c = data[i];
15 | hexstr.push_back(hexes[c >> 4]);
16 | hexstr.push_back(hexes[c & 0xf]);
17 | hexstr.push_back(hexes[16]);
18 | }
19 | return hexstr;
20 | };
21 |
22 | std::string PrintUtil::bytes_to_hex(const std::string &s) {
23 | return bytes_to_hex((const char *)s.data(), s.size());
24 | };
25 |
26 | } // namespace cuckoofilter
27 |
--------------------------------------------------------------------------------
/PD_Filter/Fixed_PD/v_tests.hpp:
--------------------------------------------------------------------------------
1 | #ifndef FILTERS_Wrap_Fixed_pd_TESTS_HPP
2 | #define FILTERS_Wrap_Fixed_pd_TESTS_HPP
3 |
4 | #include "Wrap_Fixed_pd.hpp"
5 |
6 | namespace fpd_tests {
7 |
8 | // using namespace Wrap_Fixed_pd;
9 |
10 | auto insert_find_single(int64_t quot, uint8_t rem, Wrap_Fixed_pd::packed_fpd *pd) -> bool;
11 |
12 | auto insert_find_single_with_capacity(int64_t quot, uint8_t rem, Wrap_Fixed_pd::packed_fpd *pd) -> bool;
13 |
14 | auto insert_find_all(Wrap_Fixed_pd::packed_fpd *pd) -> bool;
15 |
16 | auto insert_find_all() -> bool;
17 |
18 | auto rand_test1() -> bool;
19 |
20 | auto rand_test2() -> bool;
21 |
22 | auto determ_no_false_positive()->bool;
23 |
24 |
25 | }// namespace fpd_tests
26 |
27 | #endif//FILTERS_Wrap_Fixed_pd_TESTS_HPP
--------------------------------------------------------------------------------
/Hash_functions/MurmurHash3.h:
--------------------------------------------------------------------------------
1 | #ifndef CF_MURMURHASH3_H
2 | #define CF_MURMURHASH3_H
3 | #include
4 |
5 | //-----------------------------------------------------------------------------
6 | // MurmurHash3 was written by Austin Appleby, and is placed in the public
7 | // domain. The author hereby disclaims copyright to this source code.
8 | //-----------------------------------------------------------------------------
9 | // namespace MurmurHash {
10 | void MurmurHash3_x86_32(const void *key, int len, uint32_t seed, void *out);
11 |
12 | void MurmurHash3_x86_128(const void *key, int len, uint32_t seed, void *out);
13 |
14 | void MurmurHash3_x64_128(const void *key, int len, uint32_t seed, void *out);
15 | // }
16 | //-----------------------------------------------------------------------------
17 |
18 | #endif // CF_MURMURHASH3_H
--------------------------------------------------------------------------------
/cuckoofilter/src/printutil.h:
--------------------------------------------------------------------------------
1 | #ifndef CUCKOO_FILTER_PRINTUTIL_H_
2 | #define CUCKOO_FILTER_PRINTUTIL_H_
3 |
4 | #include
5 |
6 | namespace cuckoofilter {
7 | class PrintUtil {
8 | public:
9 | static std::string bytes_to_hex(const char *data, size_t len) {
10 | std::string hexstr = "";
11 | static const char hexes[] = "0123456789ABCDEF ";
12 |
13 | for (size_t i = 0; i < len; i++) {
14 | unsigned char c = data[i];
15 | hexstr.push_back(hexes[c >> 4]);
16 | hexstr.push_back(hexes[c & 0xf]);
17 | hexstr.push_back(hexes[16]);
18 | }
19 | return hexstr;
20 | }
21 |
22 | static std::string bytes_to_hex(const std::string &s) {
23 | return bytes_to_hex((const char *)s.data(), s.size());
24 | }
25 |
26 | private:
27 | PrintUtil();
28 | }; // class PrintUtil
29 |
30 | } // namespace cuckoofilter
31 |
32 | #endif // CUCKOO_FILTER_PRINTUTIL_H_
33 |
--------------------------------------------------------------------------------
/.gitignore~:
--------------------------------------------------------------------------------
1 | /cmake-build-debug/
2 | .vscode/
3 | .history
4 | build/
5 | PD_Filter/TPD_Filter/att_hTable.hpp
6 | PD_Filter/Dict512/terminal
7 | heaptracks/xxhash.h
8 | xxhash.h
9 | heaptracks/
10 | Terminal_history
11 | PD_Filter/Dict320/Dict512_Ver2.hpp
12 | PD_Filter/Dict320/Dict512_With_CF.hpp
13 | PD_Filter/Dict320/pd320_old.cpp
14 | PD_Filter/Dict320/pd320_old.hpp
15 | # PD_Filter/Dict320/pd320_v2.cpp
16 | # PD_Filter/Dict320/pd320_v2.hppBuilt_and_Run_valid_test.sh
17 | Built_and_Run_bench.sh
18 | PD_Filter/HashTables/hashTable64.hpp
19 | PD_Filter/Dict512/pd512_plus_old.cpp
20 | PD_Filter/Dict512/pd512_plus_old.hpp
21 |
22 | Tests/profiling_outputs.
23 | perf-diff.txt
24 | perf.data
25 | perf.data.old
26 | temp.txt
27 |
28 | l1_minimal_search_v25.txt
29 | cmake-build-debug
30 | PD_Filter/Dict512/Comparing-searches.txtcmake-build-debug-coverage/
31 | cmake-build-debug-coverage
32 | PD_Filter/Dict512/Comparing-searches.txt
33 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /cmake-build-debug/
2 | .vscode/
3 | .history
4 | build/
5 | PD_Filter/TPD_Filter/att_hTable.hpp
6 | PD_Filter/Dict512/terminal
7 | heaptracks/xxhash.h
8 | xxhash.h
9 | heaptracks/
10 | Terminal_history
11 | PD_Filter/Dict320/Dict512_Ver2.hpp
12 | PD_Filter/Dict320/Dict512_With_CF.hpp
13 | PD_Filter/Dict320/pd320_old.cpp
14 | PD_Filter/Dict320/pd320_old.hpp
15 | # PD_Filter/Dict320/pd320_v2.cpp
16 | # PD_Filter/Dict320/pd320_v2.hppBuilt_and_Run_valid_test.sh
17 | Built_and_Run_bench.sh
18 | PD_Filter/HashTables/hashTable64.hpp
19 | PD_Filter/Dict512/pd512_plus_old.cpp
20 | PD_Filter/Dict512/pd512_plus_old.hpp
21 |
22 | Tests/profiling_outputs.
23 | perf-diff.txt
24 | perf.data
25 | perf.data.old
26 | temp.txt
27 |
28 | l1_minimal_search_v25.txt
29 | cmake-build-debug
30 | PD_Filter/Dict512/Comparing-searches.txtcmake-build-debug-coverage/
31 | cmake-build-debug-coverage
32 | PD_Filter/Dict512/Comparing-searches.txt
33 |
34 | packed_spare (backup before changing hash function).hpp
35 | Helpfull-Commands.txt
36 | temp_res.txt
37 | cuckoofilter/src/Important benching resuts.txt
--------------------------------------------------------------------------------
/cuckoofilter/Makefile:
--------------------------------------------------------------------------------
1 | CC = g++
2 | AR = ar
3 | PREFIX=/usr/local
4 |
5 | # Uncomment one of the following to switch between debug and opt mode
6 | #OPT = -O3 -DNDEBUG
7 | OPT = -g -ggdb
8 |
9 | CFLAGS += --std=c++11 -fno-strict-aliasing -Wall -c -I. -I./include -I/usr/include/ -I./src/ $(OPT)
10 |
11 | LDFLAGS+= -Wall -lpthread -lssl -lcrypto
12 |
13 | LIBOBJECTS = \
14 | ./src/hashutil.o \
15 |
16 | HEADERS = $(wildcard src/*.h)
17 | ALIB = libcuckoofilter.a
18 |
19 | TEST = test
20 |
21 | all: $(TEST)
22 |
23 | clean:
24 | rm -f $(TEST) */*.o
25 |
26 | test: example/test.o $(LIBOBJECTS)
27 | $(CC) example/test.o $(LIBOBJECTS) $(LDFLAGS) -o $@
28 |
29 | %.o: %.cc ${HEADERS} Makefile
30 | $(CC) $(CFLAGS) $< -o $@
31 |
32 | $(ALIB): $(LIBOBJECTS)
33 | $(AR) rcs $@ $(LIBOBJECTS)
34 |
35 | .PHONY: install
36 | install: $(ALIB)
37 | install -D -m 0755 $(HEADERS) -t $(DESTDIR)$(PREFIX)/include/cuckoofilter
38 | install -D -m 0755 $< -t $(DESTDIR)$(PREFIX)/lib
39 |
40 | .PHONY: uninstall
41 | uninstall:
42 | rm -f $(DESTDIR)$(PREFIX)/lib/$(ALIB)
43 | rm -rf $(DESTDIR)$(PREFIX)/include/cuckoofilter
44 |
--------------------------------------------------------------------------------
/cuckoofilter/src/bitsutil.h:
--------------------------------------------------------------------------------
1 | #ifndef CUCKOO_FILTER_BITS_H_
2 | #define CUCKOO_FILTER_BITS_H_
3 |
4 | namespace cuckoofilter {
5 |
6 | // inspired from
7 | // http://www-graphics.stanford.edu/~seander/bithacks.html#ZeroInWord
8 | #define haszero4(x) (((x)-0x1111ULL) & (~(x)) & 0x8888ULL)
9 | #define hasvalue4(x, n) (haszero4((x) ^ (0x1111ULL * (n))))
10 |
11 | #define haszero8(x) (((x)-0x01010101ULL) & (~(x)) & 0x80808080ULL)
12 | #define hasvalue8(x, n) (haszero8((x) ^ (0x01010101ULL * (n))))
13 |
14 | #define haszero12(x) (((x)-0x001001001001ULL) & (~(x)) & 0x800800800800ULL)
15 | #define hasvalue12(x, n) (haszero12((x) ^ (0x001001001001ULL * (n))))
16 |
17 | #define haszero16(x) \
18 | (((x)-0x0001000100010001ULL) & (~(x)) & 0x8000800080008000ULL)
19 | #define hasvalue16(x, n) (haszero16((x) ^ (0x0001000100010001ULL * (n))))
20 |
21 | inline uint64_t upperpower2(uint64_t x) {
22 | x--;
23 | x |= x >> 1;
24 | x |= x >> 2;
25 | x |= x >> 4;
26 | x |= x >> 8;
27 | x |= x >> 16;
28 | x |= x >> 32;
29 | x++;
30 | return x;
31 | }
32 |
33 | } // namespace cuckoofilter
34 |
35 | #endif // CUCKOO_FILTER_BITS_H
36 |
--------------------------------------------------------------------------------
/PD_Filter/macros.h:
--------------------------------------------------------------------------------
1 | //
2 | // Created by tomer on 11/9/19.
3 | //
4 |
5 | #ifndef CLION_CODE_MACROS_H
6 | #define CLION_CODE_MACROS_H
7 |
8 | #define HEADER_BLOCK_SIZE (32)
9 | #define HEADER_BLOCK_TYPE uint32_t
10 |
11 | #define BODY_BLOCK_SIZE (32)
12 | #define BODY_BLOCK_TYPE uint32_t
13 | #define FP_TYPE_SIZE (32)
14 | #define FP_TYPE uint32_t
15 |
16 | #define SL(p) (1ULL <<(p))
17 | #define MASK(perm) ( (1ULL <<(perm)) - 1ULL)
18 | #define MASK32 (4294967295ul)
19 | #define ON(a, b) (MASK(a) ^ MASK(b))
20 | #define OFF(a, b) (~(MASK(a) ^ MASK(b)))
21 | #define MOD_INVERSE(n) (HEADER_BLOCK_SIZE - (n) - 1)
22 | #define INTEGER_ROUND(a, b) (((a)/(b)) + ((a)%(b) != 0))
23 | #define MB_BIT(n) (1ULL << (MB - (n)))
24 |
25 | ////hash_util macros
26 | #define DEFAULT_SEED 2
27 | #define SECOND_SEED 42
28 | #define HASH_BLOCK_TYPE uint32_t
29 | #define HASH_BLOCK_SIZE (32)
30 | #define MAX_MULTI (1ULL<<10)
31 | #define D_TYPE uint32_t
32 | #define D_TYPE_SIZE (32)
33 | #define HASH_SEED (127)
34 |
35 | ////Debug macros
36 | #define DB (0)
37 | #define DB_PRINT (0)
38 |
39 |
40 | #define S_TYPE uint32_t
41 | #define MAX_CUCKOO_LOOP (256)
42 | #define EMPTY (0x80000000)
43 | #define DEFAULT_BUCKET_SIZE (4)
44 |
45 | constexpr static uint32_t HTA_seed1{123123}, HTA_seed2{456456};
46 |
47 | //#define MAX_CUCKOO_LOOP_MULT (128)
48 |
49 |
50 | #endif //CLION_CODE_MACROS_H
51 |
--------------------------------------------------------------------------------
/morton/morton_filter.h:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (c) 2019 Advanced Micro Devices, Inc.
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy
5 | of this software and associated documentation files (the "Software"), to deal
6 | in the Software without restriction, including without limitation the rights
7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | copies of the Software, and to permit persons to whom the Software is
9 | furnished to do so, subject to the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 |
22 | Author: Alex D. Breslow
23 | Advanced Micro Devices, Inc.
24 | AMD Research
25 | */
26 | #ifndef _MORTON_FILTER_H
27 | #define _MORTON_FILTER_H
28 |
29 | // The main implementation is in compressed_cuckoo_filter.h.
30 |
31 | #include "morton_sample_configs.h"
32 |
33 | #endif
34 |
--------------------------------------------------------------------------------
/morton/test_util.h:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (c) 2019 Advanced Micro Devices, Inc.
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy
5 | of this software and associated documentation files (the "Software"), to deal
6 | in the Software without restriction, including without limitation the rights
7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | copies of the Software, and to permit persons to whom the Software is
9 | furnished to do so, subject to the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 |
22 | Author: Alex D. Breslow
23 | Advanced Micro Devices, Inc.
24 | AMD Research
25 | */
26 | #ifndef _TEST_UTIL_H
27 | #define _TEST_UTIL_H
28 |
29 | namespace Test{
30 | std::string pass(bool success_status){
31 | return std::string(success_status ? "SUCCESS" : "FAILURE");
32 | }
33 | };
34 |
35 | #endif
36 |
--------------------------------------------------------------------------------
/cuckoofilter/src/debug.h:
--------------------------------------------------------------------------------
1 | #ifndef CUCKOO_FILTER_DEBUG_H_
2 | #define CUCKOO_FILTER_DEBUG_H_
3 |
4 | #include // for perror
5 |
6 | namespace cuckoofilter {
7 |
8 | #ifndef DEBUG
9 | //#define DEBUG
10 | #endif
11 |
12 | #define debug_level (DEBUG_ERRS | DEBUG_CUCKOO)
13 |
14 | #ifdef DEBUG
15 | // extern unsigned int debug;
16 |
17 | /*
18 | * a combination of DEBUG_ERRS, DEBUG_CUCKOO, DEBUG_TABLE, DEBUG_ENCODE
19 | */
20 |
21 | #define DPRINTF(level, ...) \
22 | do { \
23 | if (debug_level & (level)) fprintf(stdout, ##__VA_ARGS__); \
24 | } while (0)
25 | #define DEBUG_PERROR(errmsg) \
26 | do { \
27 | if (debug_level & DEBUG_ERRS) perror(errmsg); \
28 | } while (0)
29 |
30 | #else
31 |
32 | #define DPRINTF(level, ...)
33 | #define DEBUG_PERROR(level, ...)
34 |
35 | #endif
36 |
37 | /*
38 | * The format of this should be obvious. Please add some explanatory
39 | * text if you add a debugging value. This text will show up in
40 | * -d list
41 | */
42 | #define DEBUG_NONE 0x00 // DBTEXT: No debugging
43 | #define DEBUG_ERRS 0x01 // DBTEXT: Verbose error reporting
44 | #define DEBUG_CUCKOO 0x02 // DBTEXT: Messages for cuckoo hashing
45 | #define DEBUG_TABLE 0x04 // DBTEXT: Messages for table operations
46 | #define DEBUG_ENCODE 0x08 // DBTEXT: Messages for encoding
47 |
48 | #define DEBUG_ALL 0xffffffff
49 |
50 | // int set_debug(char *arg); /* Returns 0 on success, -1 on failure */
51 |
52 | } // namespace cuckoofilter
53 |
54 | #endif // CUCKOO_FILTER_DEBUG_H_
55 |
--------------------------------------------------------------------------------
/cuckoofilter/example/test.cc:
--------------------------------------------------------------------------------
1 | #include "cuckoofilter.h"
2 |
3 | #include
4 | #include
5 |
6 | #include
7 | #include
8 |
9 | using cuckoofilter::CuckooFilter;
10 |
11 | int main(int argc, char **argv) {
12 | size_t total_items = 1000000;
13 |
14 | // Create a cuckoo filter where each item is of type size_t and
15 | // use 12 bits for each item:
16 | // CuckooFilter filter(total_items);
17 | // To enable semi-sorting, define the storage of cuckoo filter to be
18 | // PackedTable, accepting keys of size_t type and making 13 bits
19 | // for each key:
20 | // CuckooFilter filter(total_items);
21 | CuckooFilter filter(total_items);
22 |
23 | // Insert items to this cuckoo filter
24 | size_t num_inserted = 0;
25 | for (size_t i = 0; i < total_items; i++, num_inserted++) {
26 | if (filter.Add(i) != cuckoofilter::Ok) {
27 | break;
28 | }
29 | }
30 |
31 | // Check if previously inserted items are in the filter, expected
32 | // true for all items
33 | for (size_t i = 0; i < num_inserted; i++) {
34 | assert(filter.Contain(i) == cuckoofilter::Ok);
35 | }
36 |
37 | // Check non-existing items, a few false positives expected
38 | size_t total_queries = 0;
39 | size_t false_queries = 0;
40 | for (size_t i = total_items; i < 2 * total_items; i++) {
41 | if (filter.Contain(i) == cuckoofilter::Ok) {
42 | false_queries++;
43 | }
44 | total_queries++;
45 | }
46 |
47 | // Output the measured false positive rate
48 | std::cout << "false positive rate is "
49 | << 100.0 * false_queries / total_queries << "%\n";
50 |
51 | return 0;
52 | }
53 |
--------------------------------------------------------------------------------
/cuckoofilter/benchmarks/random.h:
--------------------------------------------------------------------------------
1 | // Generating random data
2 |
3 | #pragma once
4 |
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 |
12 |
13 | ::std::vector<::std::uint64_t> GenerateRandom64(::std::size_t count) {
14 | ::std::vector<::std::uint64_t> result(count);
15 | ::std::random_device random;
16 | // To generate random keys to lookup, this uses ::std::random_device which is slower but
17 | // stronger than some other pseudo-random alternatives. The reason is that some of these
18 | // alternatives (like libstdc++'s ::std::default_random, which is a linear congruential
19 | // generator) behave non-randomly under some hash families like Dietzfelbinger's
20 | // multiply-shift.
21 | auto genrand = [&random]() {
22 | return random() + (static_cast<::std::uint64_t>(random()) << 32);
23 | };
24 | ::std::generate(result.begin(), result.end(), ::std::ref(genrand));
25 | return result;
26 | }
27 |
28 | // Using two pointer ranges for sequences x and y, create a vector clone of x but for
29 | // y_probability y's mixed in.
30 | template
31 | ::std::vector MixIn(const T* x_begin, const T* x_end, const T* y_begin, const T* y_end,
32 | double y_probability) {
33 | const size_t x_size = x_end - x_begin, y_size = y_end - y_begin;
34 | if (y_size > (1ull << 32)) throw ::std::length_error("y is too long");
35 | ::std::vector result(x_begin, x_end);
36 | ::std::random_device random;
37 | auto genrand = [&random, y_size]() {
38 | return (static_cast(random()) * y_size) >> 32;
39 | };
40 | for (size_t i = 0; i < y_probability * x_size; ++i) {
41 | result[i] = *(y_begin + genrand());
42 | }
43 | ::std::shuffle(result.begin(), result.end(), random);
44 | return result;
45 | }
46 |
--------------------------------------------------------------------------------
/PD_Filter/HashTables/HistoryLog.cpp:
--------------------------------------------------------------------------------
1 |
2 | #include "HistoryLog.hpp"
3 |
4 | bool item_key_t::operator==(const item_key_t &rhs) const {
5 | return pd_index == rhs.pd_index &&
6 | quot == rhs.quot &&
7 | rem == rhs.rem;
8 | }
9 |
10 | bool item_key_t::operator!=(const item_key_t &rhs) const {
11 | return !(rhs == *this);
12 | }
13 |
14 | bool item_key_t::operator<(const item_key_t &rhs) const {
15 | if (this->pd_index != rhs.pd_index){
16 | // std::cout << "comparing item_key_t with different pd_index field." << std::endl;
17 | return this->pd_index < rhs.pd_index;
18 | }
19 |
20 | // assert(this->pd_index == rhs.pd_index);
21 | return (this->quot != rhs.quot) ? this->quot < rhs.quot : this->rem < rhs.rem;
22 | }
23 |
24 |
25 | bool item_key_t::operator>(const item_key_t &rhs) const {
26 | return rhs < *this;
27 | }
28 |
29 | bool item_key_t::operator<=(const item_key_t &rhs) const {
30 | return !(rhs < *this);
31 | }
32 |
33 | bool item_key_t::operator>=(const item_key_t &rhs) const {
34 | return !(*this < rhs);
35 | }
36 |
37 | std::ostream &operator<<(std::ostream &os, const item_key_t &key) {
38 | os << "pd_index\t\t quot\t\t rem" << std::endl;
39 | os << "\t" << key.pd_index << "\t\t" << key.quot << "\t\t" << ((uint64_t) key.rem) << std::endl;
40 | return os;
41 | }
42 |
43 |
44 | void flip_quot(item_key_t *item) {
45 | item->quot = 24 - item->quot;
46 | }
47 |
48 | const item_key_t rand_item_key_simple(item_key_t lim_key) {
49 | return rand_item_key_simple(lim_key.pd_index, lim_key.quot, lim_key.rem);
50 | }
51 |
52 | const item_key_t rand_item_key_simple(uint64_t max_pd_index, uint64_t max_quot, uint64_t max_rem) {
53 | uint64_t pd_index = rand() % max_pd_index;
54 | uint64_t quot = rand() % max_quot;
55 | uint64_t rem = rand() % max_rem;
56 | return {pd_index, quot, rem};
57 | }
58 |
--------------------------------------------------------------------------------
/PD_Filter/HashTables/HashTables_Tests/Hashtable_main.cpp:
--------------------------------------------------------------------------------
1 | //
2 | // Created by tomer on 22/10/2020.
3 | //
4 |
5 | #include "validate_hash_table.hpp"
6 |
7 |
8 | /*
9 | void t_HT() {
10 | const size_t max_capacity = 1u << 10u, element_length = 28, bucket_size = 4;
11 | size_t reps = 1u << 14u;
12 | auto load_factor = .75;
13 | double working_LF = .72;
14 | double variance = .1;
15 |
16 | v_hash_table_rand_gen_load(reps, max_capacity, element_length, 2, load_factor, working_LF, variance);
17 | cout << "\n\n\n" << endl;
18 | v_hash_table_rand_gen_load(reps, max_capacity, element_length, 4, load_factor, working_LF, variance);
19 | cout << "\n\n\n" << endl;
20 | v_hash_table_rand_gen_load(reps, max_capacity, element_length, 8, load_factor, working_LF, variance);
21 | cout << "\n\n\n" << endl;
22 | v_hash_table_rand_gen_load(reps, max_capacity, element_length, 12, load_factor, working_LF, variance);
23 | }
24 | */
25 |
26 | int main() {
27 | // simpler_HTA ht(1<<20,32,.9);
28 | // return 0;
29 |
30 | constexpr uint64_t temp = 0x11DCE5;
31 | using temp_HT = hashTable_Aligned;
32 | auto res = v_hash_table_rand_gen_load(1 << 13, 1 << 13, 32, 4, .9, .5);
33 | assert(res);
34 | std::cout << "end" << std::endl;
35 | return 0;
36 | // bool res = packed_spare_wrapper_multi(1 << 10, 1 << 24, .95, .95);
37 | // bool res = packed_spare_wrapper(1 << 10, 1 << 24, .95, .95);
38 | // assert(res);
39 | // res = packed_spare_wrapper(1 << 18, 1 << 20, .9, .75);
40 | // res = packed_spare_wrapper(1 << 18, 1 << 20, .9, .75);
41 | // assert(res);
42 | // res = packed_spare_wrapper(1 << 18, 1 << 20, .9, .75);
43 | // assert(res);
44 | // bool res = packed_spare_wrapper(1 << 10, 1 << 18, 42, .9, .9);
45 | // v_hash_table_rand(1u << 14u, max_capacity, element_length, 4, load_factor);
46 | // return 0;
47 | }
48 |
--------------------------------------------------------------------------------
/.clang-format:
--------------------------------------------------------------------------------
1 | # Generated from CLion C/C++ Code Style settings
2 | BasedOnStyle: LLVM
3 | AccessModifierOffset: -4
4 | AlignAfterOpenBracket: Align
5 | AlignConsecutiveAssignments: false
6 | AlignOperands: true
7 | AllowAllArgumentsOnNextLine: false
8 | AllowAllConstructorInitializersOnNextLine: false
9 | AllowAllParametersOfDeclarationOnNextLine: false
10 | AllowShortBlocksOnASingleLine: Always
11 | AllowShortCaseLabelsOnASingleLine: false
12 | AllowShortFunctionsOnASingleLine: All
13 | AllowShortIfStatementsOnASingleLine: Always
14 | AllowShortLambdasOnASingleLine: All
15 | AllowShortLoopsOnASingleLine: true
16 | AlwaysBreakAfterReturnType: None
17 | AlwaysBreakTemplateDeclarations: Yes
18 | BreakBeforeBraces: Custom
19 | BraceWrapping:
20 | AfterCaseLabel: false
21 | AfterClass: false
22 | AfterControlStatement: Never
23 | AfterEnum: false
24 | AfterFunction: false
25 | AfterNamespace: false
26 | AfterUnion: false
27 | BeforeCatch: false
28 | BeforeElse: false
29 | IndentBraces: false
30 | SplitEmptyFunction: false
31 | SplitEmptyRecord: true
32 | BreakBeforeBinaryOperators: None
33 | BreakBeforeTernaryOperators: true
34 | BreakConstructorInitializers: BeforeColon
35 | BreakInheritanceList: BeforeColon
36 | ColumnLimit: 0
37 | CompactNamespaces: false
38 | ContinuationIndentWidth: 8
39 | IndentCaseLabels: true
40 | IndentPPDirectives: None
41 | IndentWidth: 4
42 | KeepEmptyLinesAtTheStartOfBlocks: true
43 | MaxEmptyLinesToKeep: 2
44 | NamespaceIndentation: All
45 | ObjCSpaceAfterProperty: false
46 | ObjCSpaceBeforeProtocolList: true
47 | PointerAlignment: Right
48 | ReflowComments: false
49 | SpaceAfterCStyleCast: true
50 | SpaceAfterLogicalNot: false
51 | SpaceAfterTemplateKeyword: false
52 | SpaceBeforeAssignmentOperators: true
53 | SpaceBeforeCpp11BracedList: false
54 | SpaceBeforeCtorInitializerColon: true
55 | SpaceBeforeInheritanceColon: true
56 | SpaceBeforeParens: ControlStatements
57 | SpaceBeforeRangeBasedForLoopColon: true
58 | SpaceInEmptyParentheses: false
59 | SpacesBeforeTrailingComments: 0
60 | SpacesInAngles: false
61 | SpacesInCStyleCastParentheses: false
62 | SpacesInContainerLiterals: false
63 | SpacesInParentheses: false
64 | SpacesInSquareBrackets: false
65 | TabWidth: 4
66 | UseTab: Never
67 |
--------------------------------------------------------------------------------
/PD_Filter/basic_function_util.h:
--------------------------------------------------------------------------------
1 | //
2 | // Created by tomereven on 18/07/2020.
3 | //
4 |
5 | #ifndef FILTERS_BASIC_FUNCTION_UTIL_H
6 | #define FILTERS_BASIC_FUNCTION_UTIL_H
7 |
8 | #include "printutil.hpp"
9 | #include
10 | #include
11 | #include
12 | #include
13 | #include
14 |
15 |
16 | template
17 | static auto ceil_log2(T x) -> size_t {
18 | assert(x > 1);
19 | size_t res = std::ceil(log2(x));
20 | assert((1ULL << res) >= x);
21 | return res;
22 | }
23 |
24 | auto compute_number_of_PD(size_t max_number_of_elements, size_t max_capacity, double l1_load, bool round_to_upperpower2 = false) -> size_t;
25 |
26 | auto compute_spare_element_size(size_t max_number_of_elements, float level1_load_factor,
27 | size_t pd_max_capacity = 51, size_t quot_range = 50, size_t rem_length = 8) -> size_t;
28 |
29 | inline uint64_t upperpower2(uint64_t x) {
30 | x--;
31 | x |= x >> 1;
32 | x |= x >> 2;
33 | x |= x >> 4;
34 | x |= x >> 8;
35 | x |= x >> 16;
36 | x |= x >> 32;
37 | x++;
38 | return x;
39 | }
40 |
41 |
42 | auto pd_filter_total_byte_size(size_t max_number_of_elements, size_t max_capacity, double l1_load, double l2_load) -> size_t;
43 |
44 | /* Taken from the Xor filter repository https://github.com/FastFilter/fastfilter_cpp.*/
45 |
46 | __attribute__((always_inline)) inline uint16_t reduce16(uint16_t hash, uint16_t n) {
47 | // http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
48 | return (uint16_t)(((uint32_t) hash * n) >> 16);
49 | }
50 |
51 | __attribute__((always_inline)) inline uint32_t reduce32(uint32_t hash, uint32_t n) {
52 | // http://lemire.me/blog/2016/06/27/a-fast-alternative-to-the-modulo-reduction/
53 | return (uint32_t)(((uint64_t) hash * n) >> 32);
54 | }
55 |
56 | __attribute__((always_inline)) inline auto reduce64(uint64_t x, uint64_t mod) -> uint64_t {
57 | return (uint64_t)(((__uint128_t) x * (__uint128_t) mod) >> 64);
58 | }
59 |
60 | // auto factorial(size_t n) -> size_t;
61 | auto factorial(double n) -> double;
62 |
63 | auto poisson_dist(double gamma, size_t k) -> double;
64 |
65 | auto poisson_sum(double gamma, size_t start, size_t end);
66 |
67 | auto compute_the_prob_that_element_overflow(double gamma, size_t max_capcity) -> double;
68 |
69 |
70 | #endif//FILTERS_BASIC_FUNCTION_UTIL_H
71 |
--------------------------------------------------------------------------------
/morton/fixed_point.h:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (c) 2019 Advanced Micro Devices, Inc.
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy
5 | of this software and associated documentation files (the "Software"), to deal
6 | in the Software without restriction, including without limitation the rights
7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | copies of the Software, and to permit persons to whom the Software is
9 | furnished to do so, subject to the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 |
22 | Author: Alex D. Breslow
23 | Advanced Micro Devices, Inc.
24 | AMD Research
25 | */
26 | #ifndef _FIXED_POINT_H
27 | #define _FIXED_POINT_H
28 |
29 | namespace CompressedCuckoo{
30 | // This class is used for getting around the restriction that template
31 | // parameters need to be integer types. I want a compile-time floating
32 | // point template parameter, so I'm using this.
33 | typedef __uint128_t SerializedFixedPoint;
34 | struct FixedPoint{
35 | uint64_t _numerator;
36 | uint64_t _denominator;
37 | constexpr FixedPoint(uint64_t numerator, uint64_t denominator) :
38 | _numerator(numerator), _denominator(denominator){}
39 | // Specialized constructor for values between 0 and 1
40 | explicit constexpr FixedPoint(double fp_representation) :
41 | _numerator(fp_representation * 0x8000000000000000llu),
42 | _denominator(0x8000000000000000llu){}
43 | explicit constexpr FixedPoint(SerializedFixedPoint serialized_fixed_point) :
44 | _numerator(serialized_fixed_point & (0xffffffffffffffffllu)),
45 | _denominator(serialized_fixed_point >> 64){}
46 | constexpr double to_double() const{
47 | return static_cast(_numerator) / _denominator;
48 | }
49 | constexpr float to_float() const{
50 | return static_cast(_numerator) / _denominator;
51 | }
52 | constexpr SerializedFixedPoint serialize() const{
53 | return _numerator + (static_cast(_denominator)
54 | << 64);
55 | }
56 | };
57 | }
58 |
59 | #endif
60 |
--------------------------------------------------------------------------------
/cuckoofilter/src/permencoding.h:
--------------------------------------------------------------------------------
1 | #ifndef CUCKOO_FILTER_PERM_ENCODING_H_
2 | #define CUCKOO_FILTER_PERM_ENCODING_H_
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 |
9 | #include
10 |
11 | #include "debug.h"
12 |
13 | namespace cuckoofilter {
14 |
15 | class PermEncoding {
16 | /* unpack one 2-byte number to four 4-bit numbers */
17 | // inline void unpack(const uint16_t in, const uint8_t out[4]) const {
18 | // (*(uint16_t *)out) = in & 0x0f0f;
19 | // (*(uint16_t *)(out +2)) = (in >> 4) & 0x0f0f;
20 | // }
21 |
22 | inline void unpack(uint16_t in, uint8_t out[4]) const {
23 | out[0] = (in & 0x000f);
24 | out[2] = ((in >> 4) & 0x000f);
25 | out[1] = ((in >> 8) & 0x000f);
26 | out[3] = ((in >> 12) & 0x000f);
27 | }
28 |
29 | /* pack four 4-bit numbers to one 2-byte number */
30 | inline uint16_t pack(const uint8_t in[4]) const {
31 | uint16_t in1 = *((uint16_t *)(in)) & 0x0f0f;
32 | uint16_t in2 = *((uint16_t *)(in + 2)) << 4;
33 | return in1 | in2;
34 | }
35 |
36 | public:
37 | PermEncoding() {
38 | uint8_t dst[4];
39 | uint16_t idx = 0;
40 | memset(dec_table, 0, sizeof(dec_table));
41 | memset(enc_table, 0, sizeof(enc_table));
42 | gen_tables(0, 0, dst, idx);
43 | }
44 |
45 | ~PermEncoding() {}
46 |
47 | static const size_t N_ENTS = 3876;
48 |
49 | uint16_t dec_table[N_ENTS];
50 | uint16_t enc_table[1 << 16];
51 |
52 | inline void decode(const uint16_t codeword, uint8_t lowbits[4]) const {
53 | unpack(dec_table[codeword], lowbits);
54 | }
55 |
56 | inline uint16_t encode(const uint8_t lowbits[4]) const {
57 | if (DEBUG_ENCODE & debug_level) {
58 | printf("Perm.encode\n");
59 | for (int i = 0; i < 4; i++) {
60 | printf("encode lowbits[%d]=%x\n", i, lowbits[i]);
61 | }
62 | printf("pack(lowbits) = %x\n", pack(lowbits));
63 | printf("enc_table[%x]=%x\n", pack(lowbits), enc_table[pack(lowbits)]);
64 | }
65 |
66 | return enc_table[pack(lowbits)];
67 | }
68 |
69 | void gen_tables(int base, int k, uint8_t dst[4], uint16_t &idx) {
70 | for (int i = base; i < 16; i++) {
71 | /* for fast comparison in binary_search in little-endian machine */
72 | dst[k] = i;
73 | if (k + 1 < 4) {
74 | gen_tables(i, k + 1, dst, idx);
75 | } else {
76 | dec_table[idx] = pack(dst);
77 | enc_table[pack(dst)] = idx;
78 | if (DEBUG_ENCODE & debug_level) {
79 | printf("enc_table[%04x]=%04x\t%x %x %x %x\n", pack(dst), idx, dst[0],
80 | dst[1], dst[2], dst[3]);
81 | }
82 | idx++;
83 | }
84 | }
85 | }
86 | };
87 | } // namespace cuckoofilter
88 | #endif // CUCKOO_FILTER_PERM_ENCODING_H_
89 |
--------------------------------------------------------------------------------
/morton/morton_util.h:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (c) 2019 Advanced Micro Devices, Inc.
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy
5 | of this software and associated documentation files (the "Software"), to deal
6 | in the Software without restriction, including without limitation the rights
7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | copies of the Software, and to permit persons to whom the Software is
9 | furnished to do so, subject to the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 |
22 | Author: Alex D. Breslow
23 | Advanced Micro Devices, Inc.
24 | AMD Research
25 | */
26 | #ifndef _MORTON_UTIL_H
27 | #define _MORTON_UTIL_H
28 |
29 | namespace CompressedCuckoo{
30 | // Computes the ceiling on input/divisor and then adds 1 if necessary to make
31 | // the result even
32 | template
33 | T divide_round_up_even(T input, T divisor){
34 | T ret = (input + divisor - 1) / divisor;
35 | return ret & 1 ? ret + 1 : ret;
36 | }
37 |
38 | // total_slots should be the initial estimate for the number of total logical
39 | // slots in the filter, not physical slots
40 | template
41 | T determine_total_buckets(T slots_per_bucket, T total_slots,
42 | T buckets_per_block){
43 | // Alternate implementation that's slightly different in that it doesn't
44 | // round up to the next whole block which would produce an even number of
45 | // buckets. Instead, it just rounds up by 1 if the number of buckets would
46 | // be odd.
47 | // Provisional total slots
48 | //total_slots = ((total_slots + slots_per_bucket - 1) / slots_per_bucket) *
49 | // slots_per_bucket;
50 | //return divide_round_up_even(total_slots, slots_per_bucket);
51 |
52 | T logical_slots_per_block = slots_per_bucket * buckets_per_block;
53 | T total_blocks = (total_slots + logical_slots_per_block - 1) /
54 | (logical_slots_per_block);
55 | T total_buckets = total_blocks * buckets_per_block;
56 | total_buckets = (total_buckets & 1) ? total_buckets + buckets_per_block :
57 | total_buckets;
58 | return total_buckets;
59 | }
60 | }
61 |
62 | #endif // End of file guards
63 |
--------------------------------------------------------------------------------
/morton/hash_util.h:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (c) 2019 Advanced Micro Devices, Inc.
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy
5 | of this software and associated documentation files (the "Software"), to deal
6 | in the Software without restriction, including without limitation the rights
7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | copies of the Software, and to permit persons to whom the Software is
9 | furnished to do so, subject to the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 |
22 | Author: Alex D. Breslow
23 | Advanced Micro Devices, Inc.
24 | AMD Research
25 | */
26 |
27 | // Some functions for hashing
28 |
29 | #ifndef _HASH_UTIL_H
30 | #define _HASH_UTIL_H
31 |
32 | #include
33 |
34 | #include "vector_types.h"
35 | //#include "test_util.h"
36 |
37 | struct BitMixMurmur {
38 | inline hash_t operator()(keys_t key) const{
39 | #if CCF_KEY_SIZE == 4
40 | return hash32N(key);
41 | #else
42 | return hash64N(key);
43 | #endif
44 | }
45 |
46 | // TODO: Implement with template specialization
47 | template
48 | inline T hashN(T ks) const{
49 | #if CCF_KEY_SIZE == 4
50 | return hash32N(ks);
51 | #else
52 | return hash64N(ks);
53 | #endif
54 | }
55 |
56 | // Based on code in the public domain (MurmurHash3a)
57 | // See https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp
58 | // 92cf370
59 | // The disavowing of the copyright is reproduced below and applies only to MurmurHash3:
60 | //-----------------------------------------------------------------------------
61 | // MurmurHash3 was written by Austin Appleby, and is placed in the public
62 | // domain. The author hereby disclaims copyright to this source code.
63 | // Template it to make it usable with vector types such as vN_u64
64 | template
65 | inline T hash64N(T ks) const{ // Bit mix from MurmurHash64/CLHash
66 | ks ^= ks >> 33;
67 | ks *= 0xff51afd7ed558ccdULL;
68 | ks ^= ks >> 33;
69 | ks *= 0xc4ceb9fe1a85ec53ULL;
70 | ks ^= ks >> 33;
71 | return ks;
72 | }
73 | // Based on code in the public domain (MurmurHash3a)
74 | template
75 | inline T hash32N(T ks) const{ // Bit mix from MurmurHash32
76 | ks ^= ks >> 16;
77 | ks *= 0x85ebca6b;
78 | ks ^= ks >> 13;
79 | ks *= 0xc2b2ae35;
80 | ks ^= ks >> 16;
81 | return ks;
82 | }
83 | };
84 |
85 | #endif
86 |
--------------------------------------------------------------------------------
/cuckoofilter/src/hashutil.h:
--------------------------------------------------------------------------------
1 | #ifndef CUCKOO_FILTER_HASHUTIL_H_
2 | #define CUCKOO_FILTER_HASHUTIL_H_
3 |
4 | #include
5 | #include
6 | #include
7 |
8 | #include
9 |
10 | #include
11 | #include
12 |
13 | namespace cuckoofilter {
14 |
15 | class HashUtil {
16 | public:
17 | // Bob Jenkins Hash
18 | static uint32_t BobHash(const void *buf, size_t length, uint32_t seed = 0);
19 | static uint32_t BobHash(const std::string &s, uint32_t seed = 0);
20 |
21 | // Bob Jenkins Hash that returns two indices in one call
22 | // Useful for Cuckoo hashing, power of two choices, etc.
23 | // Use idx1 before idx2, when possible. idx1 and idx2 should be initialized to seeds.
24 | static void BobHash(const void *buf, size_t length, uint32_t *idx1,
25 | uint32_t *idx2);
26 | static void BobHash(const std::string &s, uint32_t *idx1, uint32_t *idx2);
27 |
28 | // MurmurHash2
29 | static uint32_t MurmurHash(const void *buf, size_t length, uint32_t seed = 0);
30 | static uint32_t MurmurHash(const std::string &s, uint32_t seed = 0);
31 |
32 | // SuperFastHash
33 | static uint32_t SuperFastHash(const void *buf, size_t len);
34 | static uint32_t SuperFastHash(const std::string &s);
35 |
36 | // Null hash (shift and mask)
37 | static uint32_t NullHash(const void *buf, size_t length, uint32_t shiftbytes);
38 |
39 | // Wrappers for MD5 and SHA1 hashing using EVP
40 | static std::string MD5Hash(const char *inbuf, size_t in_length);
41 | static std::string SHA1Hash(const char *inbuf, size_t in_length);
42 |
43 | private:
44 | HashUtil();
45 | };
46 |
47 | // See Martin Dietzfelbinger, "Universal hashing and k-wise independent random
48 | // variables via integer arithmetic without primes".
49 | class TwoIndependentMultiplyShift {
50 | unsigned __int128 multiply_, add_;
51 |
52 | public:
53 | TwoIndependentMultiplyShift() {
54 | ::std::random_device random;
55 | for (auto v : {&multiply_, &add_}) {
56 | *v = random();
57 | for (int i = 1; i <= 4; ++i) {
58 | *v = *v << 32;
59 | *v |= random();
60 | }
61 | }
62 | }
63 |
64 | uint64_t operator()(uint64_t key) const {
65 | return (add_ + multiply_ * static_cast(key)) >> 64;
66 | }
67 | };
68 |
69 | // See Patrascu and Thorup's "The Power of Simple Tabulation Hashing"
70 | class SimpleTabulation {
71 | uint64_t tables_[sizeof(uint64_t)][1 << CHAR_BIT];
72 |
73 | public:
74 | SimpleTabulation() {
75 | ::std::random_device random;
76 | for (unsigned i = 0; i < sizeof(uint64_t); ++i) {
77 | for (int j = 0; j < (1 << CHAR_BIT); ++j) {
78 | tables_[i][j] = random() | ((static_cast(random())) << 32);
79 | }
80 | }
81 | }
82 |
83 | uint64_t operator()(uint64_t key) const {
84 | uint64_t result = 0;
85 | for (unsigned i = 0; i < sizeof(key); ++i) {
86 | result ^= tables_[i][reinterpret_cast(&key)[i]];
87 | }
88 | return result;
89 | }
90 | };
91 | }
92 |
93 | #endif // CUCKOO_FILTER_HASHUTIL_H_
94 |
--------------------------------------------------------------------------------
/PD_Filter/basic_function_util.cpp:
--------------------------------------------------------------------------------
1 | //
2 | // Created by tomereven on 18/07/2020.
3 | //
4 |
5 | #include "basic_function_util.h"
6 |
7 | auto compute_number_of_PD(size_t max_number_of_elements, size_t max_capacity, double l1_load, bool round_to_upperpower2) -> size_t {
8 | // cout << "here" << endl;
9 | double b = max_capacity * l1_load;
10 | // std::cout << "b is: "<< b << std::endl;
11 | // auto res = (std::size_t)ceil(max_number_of_elements / b);
12 | // std::cout << "res: "<< res << std::endl;
13 | // std::cout << "res is: " << res << std::endl;
14 | size_t res = (std::size_t) ceil(max_number_of_elements / ((double) b));
15 | return (round_to_upperpower2) ? upperpower2(res) : res;
16 | }
17 |
18 | auto compute_spare_element_size(size_t max_number_of_elements, float level1_load_factor,
19 | size_t pd_max_capacity, size_t quot_range, size_t rem_length) -> size_t {
20 | size_t number_of_pd = compute_number_of_PD(max_number_of_elements, pd_max_capacity, level1_load_factor);
21 | size_t pd_index_length = ceil_log2(number_of_pd);
22 | size_t quot_range_length = ceil_log2(quot_range);
23 | return rem_length + pd_index_length + quot_range_length;
24 | }
25 |
26 | auto pd_filter_total_byte_size(size_t max_number_of_elements, size_t max_capacity, double l1_load, double l2_load) -> size_t {
27 | size_t number_of_pd = compute_number_of_PD(max_number_of_elements, max_capacity, l1_load);
28 | size_t l1_size = number_of_pd * (51 + 2);
29 | // size_t pd_index_length = ceil_log2(number_of_pd);
30 | // size_t spare_element_length = (6 + 6) + pd_index_length;
31 | size_t log2_size = ceil_log2(max_number_of_elements);
32 | // size_t temp = ceil(max_number_of_elements *1);
33 | auto number_of_buckets = ceil(max_number_of_elements / log2_size);
34 | size_t bucket_size = 4u;
35 | size_t spare_element_byte_size = 8u;
36 | size_t spare_size = number_of_buckets * bucket_size * spare_element_byte_size;
37 | return l1_size + spare_size;
38 | }
39 |
40 | auto factorial(double n) -> double {
41 | return (n == 1 || n == 0) ? 1 : factorial(n - 1) * n;
42 | }
43 |
44 | /**
45 | * @brief Compute the probability that some R.V denoted by X~Poi(gamma) is equal to k.
46 | *
47 | * @param gamma
48 | * @param k
49 | * @return double
50 | */
51 | auto poisson_dist(double gamma, size_t k) -> double {
52 | // double temp = k * log(gamma) - gamma - log(factorial(k * 1.0));
53 | // std::cout << "temp: " << temp << std::endl;
54 | auto res = exp(k * log(gamma) - gamma - log(factorial(k * 1.0)));
55 | assert(res >= 0);
56 | assert(res <= 1);
57 | return res;
58 | }
59 |
60 | auto poisson_sum(double gamma, size_t start, size_t end) {
61 | double res = 0;
62 |
63 | for (size_t i = start; i <= end; i++) {
64 | // std::cout << i << ":\t";
65 | res += poisson_dist(gamma, i);
66 | }
67 | assert(res <= 1);
68 | return res;
69 | }
70 |
71 | auto compute_the_prob_that_element_overflow(double gamma, size_t max_capcity) -> double {
72 | return 1 - poisson_sum(gamma, 0, max_capcity);
73 | }
74 |
--------------------------------------------------------------------------------
/cuckoofilter/.clang-format:
--------------------------------------------------------------------------------
1 | ---
2 | Language: Cpp
3 | # BasedOnStyle: Google
4 | AccessModifierOffset: -1
5 | AlignAfterOpenBracket: Align
6 | AlignConsecutiveAssignments: false
7 | AlignConsecutiveDeclarations: false
8 | AlignEscapedNewlinesLeft: true
9 | AlignOperands: true
10 | AlignTrailingComments: true
11 | AllowAllParametersOfDeclarationOnNextLine: true
12 | AllowShortBlocksOnASingleLine: false
13 | AllowShortCaseLabelsOnASingleLine: false
14 | AllowShortFunctionsOnASingleLine: All
15 | AllowShortIfStatementsOnASingleLine: true
16 | AllowShortLoopsOnASingleLine: true
17 | AlwaysBreakAfterDefinitionReturnType: None
18 | AlwaysBreakAfterReturnType: None
19 | AlwaysBreakBeforeMultilineStrings: true
20 | AlwaysBreakTemplateDeclarations: true
21 | BinPackArguments: true
22 | BinPackParameters: true
23 | BraceWrapping:
24 | AfterClass: false
25 | AfterControlStatement: false
26 | AfterEnum: false
27 | AfterFunction: false
28 | AfterNamespace: false
29 | AfterObjCDeclaration: false
30 | AfterStruct: false
31 | AfterUnion: false
32 | BeforeCatch: false
33 | BeforeElse: false
34 | IndentBraces: false
35 | BreakBeforeBinaryOperators: None
36 | BreakBeforeBraces: Attach
37 | BreakBeforeTernaryOperators: true
38 | BreakConstructorInitializersBeforeComma: false
39 | BreakAfterJavaFieldAnnotations: false
40 | BreakStringLiterals: true
41 | ColumnLimit: 80
42 | CommentPragmas: '^ IWYU pragma:'
43 | ConstructorInitializerAllOnOneLineOrOnePerLine: true
44 | ConstructorInitializerIndentWidth: 4
45 | ContinuationIndentWidth: 4
46 | Cpp11BracedListStyle: true
47 | DerivePointerAlignment: false
48 | DisableFormat: false
49 | ExperimentalAutoDetectBinPacking: false
50 | ForEachMacros: [ foreach, Q_FOREACH, BOOST_FOREACH ]
51 | IncludeCategories:
52 | - Regex: '^<.*\.h>'
53 | Priority: 1
54 | - Regex: '^<.*'
55 | Priority: 2
56 | - Regex: '.*'
57 | Priority: 3
58 | IncludeIsMainRegex: '([-_](test|unittest))?$'
59 | IndentCaseLabels: true
60 | IndentWidth: 2
61 | IndentWrappedFunctionNames: false
62 | JavaScriptQuotes: Leave
63 | JavaScriptWrapImports: true
64 | KeepEmptyLinesAtTheStartOfBlocks: false
65 | MacroBlockBegin: ''
66 | MacroBlockEnd: ''
67 | MaxEmptyLinesToKeep: 1
68 | NamespaceIndentation: None
69 | ObjCBlockIndentWidth: 2
70 | ObjCSpaceAfterProperty: false
71 | ObjCSpaceBeforeProtocolList: false
72 | PenaltyBreakBeforeFirstCallParameter: 1
73 | PenaltyBreakComment: 300
74 | PenaltyBreakFirstLessLess: 120
75 | PenaltyBreakString: 1000
76 | PenaltyExcessCharacter: 1000000
77 | PenaltyReturnTypeOnItsOwnLine: 200
78 | PointerAlignment: Right
79 | ReflowComments: true
80 | SortIncludes: true
81 | SpaceAfterCStyleCast: false
82 | SpaceBeforeAssignmentOperators: true
83 | SpaceBeforeParens: ControlStatements
84 | SpaceInEmptyParentheses: false
85 | SpacesBeforeTrailingComments: 2
86 | SpacesInAngles: false
87 | SpacesInContainerLiterals: true
88 | SpacesInCStyleCastParentheses: false
89 | SpacesInParentheses: false
90 | SpacesInSquareBrackets: false
91 | Standard: Auto
92 | TabWidth: 8
93 | UseTab: Never
94 | ...
95 |
96 |
--------------------------------------------------------------------------------
/PD_Filter/Dict320/pd256.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Taken from this repository.
3 | * https://github.com/jbapple/crate-dictionary
4 | * */
5 |
6 | #include "pd256.hpp"
7 |
8 | namespace v_pd256 {
9 | void bin_print_header(uint64_t header) {
10 | // assert(_mm_popcnt_u64(header) == 32);
11 | uint64_t b = 1ULL << (64ul - 1u);
12 | while (b) {
13 | std::string temp = (b & header) ? "1" : "0";
14 | std::cout << temp;
15 | b >>= 1ul;
16 | }
17 | }
18 | auto bin_print_header_spaced(uint64_t header) -> std::string {
19 | // assert(_mm_popcnt_u64(header) == 32);
20 | uint64_t b = 1ULL << (64ul - 1u);
21 | std::string res = "";
22 | while (b) {
23 | for (size_t i = 0; i < 4; i++) {
24 | res += (b & header) ? "1" : "0";
25 | b >>= 1ul;
26 | }
27 | if (b)
28 | res += ".";
29 | }
30 | return res;
31 | }
32 |
33 | }// namespace v_pd256
34 |
35 | namespace pd256 {
36 |
37 | auto validate_number_of_quotient(const __m256i *pd) -> bool {
38 | return validate_number_of_quotient(get_clean_header(pd));
39 | }
40 |
41 | auto validate_number_of_quotient(uint64_t clean_header) -> bool {
42 | auto pop_count = _mm_popcnt_u64(clean_header);
43 | if (pop_count != QUOT_SIZE22) {
44 | std::cout << "pop_count: " << pop_count << std::endl;
45 | }
46 | return pop_count == QUOT_SIZE22;
47 | }
48 |
49 |
50 | auto get_capacity_att(const __m256i *x) -> size_t {
51 | validate_number_of_quotient(x);
52 | // return get_capacity_naive();
53 |
54 | uint64_t header = get_clean_header(x);
55 | assert(_mm_popcnt_u64(header) == QUOT_SIZE22);
56 | auto temp = _lzcnt_u64(header);
57 | assert(14 <= temp);
58 | assert(temp <= (64 - 22));
59 | auto res = 42 - temp;
60 |
61 | if (res != get_capacity_naive(x)) {
62 | auto valid_res = get_capacity_naive(x);
63 | std::cout << "In get_capacity_att:" << std::endl;
64 | std::cout << "res: " << res << std::endl;
65 | std::cout << "valid_res: " << valid_res << std::endl;
66 | assert(false);
67 | }
68 | return res;
69 | }
70 |
71 | auto get_capacity_naive(const __m256i *x) -> size_t {
72 | uint64_t header = get_clean_header(x);
73 |
74 | size_t zero_count = 0, one_count = 0;
75 | uint64_t temp = header;
76 | uint64_t b = 1ULL;
77 | for (size_t i = 0; i < 64; i++) {
78 | if (b & temp) {
79 | one_count++;
80 | if (one_count == QUOT_SIZE22)
81 | return zero_count;
82 | } else {
83 | zero_count++;
84 | }
85 | b <<= 1ul;
86 | }
87 | std::cout << zero_count << std::endl;
88 | std::cout << one_count << std::endl;
89 | return -1;
90 | assert(false);
91 | }
92 |
93 | auto is_pd_full_naive(const __m256i *pd) -> bool {
94 | return get_capacity_naive(pd) == CAPACITY26;
95 | }
96 |
97 | auto get_name() -> std::string {
98 | return "pd256 ";
99 | }
100 |
101 | auto remove_naive(int64_t quot, char rem, __m256i *pd) -> bool {
102 | assert(false);
103 | return false;
104 | }
105 | }// namespace pd256
106 |
--------------------------------------------------------------------------------
/morton/bf.h:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (c) 2019 Advanced Micro Devices, Inc.
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy
5 | of this software and associated documentation files (the "Software"), to deal
6 | in the Software without restriction, including without limitation the rights
7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | copies of the Software, and to permit persons to whom the Software is
9 | furnished to do so, subject to the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 |
22 | Author: Alex D. Breslow
23 | Advanced Micro Devices, Inc.
24 | AMD Research
25 |
26 | Code Source: https://github.com/AMDComputeLibraries/morton_filter
27 |
28 | VLDB 2018 Paper: https://www.vldb.org/pvldb/vol11/p1041-breslow.pdf
29 |
30 | */
31 | #ifndef _BF_H
32 | #define _BF_H
33 |
34 | // Author: Alex Breslow
35 | // Description: A simple specialized implementation of a cache-blocked
36 | // Bloom filter. See "Cache-, Hash- and Space-Efficient Bloom Filters" by
37 | // Putze et al. in JEA'09
38 | // URL: https://dl.acm.org/citation.cfm?id=1594230
39 |
40 | #include
41 |
42 | #define LOG2(x) (x < 16 ? 3 : x < 32 ? 4 : x < 64 ? 5 : x < 128 ? 6 : x < 256 ? 7 : x < 512 ? 8 : -1)
43 |
44 | namespace BlockedBF{
45 | using slot_type = uint32_t;
46 | struct Bucket{
47 | slot_type f1, f2, f3, f4;
48 | };
49 | template
50 | struct BloomFilter{
51 | static_assert(__builtin_popcountll(T_NUM_BUCKETS) == 1, "BloomFilter must "
52 | "be a power of 2 buckets in size");
53 | std::array buckets{};
54 |
55 | // Check if item is in the filter, then insert it. Return if you found it.
56 | inline bool contains_and_update(const hash_t item){
57 | constexpr slot_type one = 1;
58 | constexpr slot_type slot_width_bits = sizeof(slot_type) * 8;
59 | constexpr slot_type log2_slot_width = LOG2(slot_width_bits);
60 | constexpr slot_type shift0 = 0;
61 | constexpr slot_type shift1 = log2_slot_width;
62 | constexpr slot_type shift2 = log2_slot_width * 2;
63 | constexpr slot_type shift3 = log2_slot_width * 3;
64 | constexpr slot_type bucket_shift = log2_slot_width * 4;
65 | // Multiply by MurmurHash constant
66 | uint64_t hashes = 0xff51afd7ed558ccdULL * item;
67 | slot_type h1 = (hashes >> shift0) % slot_width_bits;
68 | slot_type h2 = (hashes >> shift1) % slot_width_bits;
69 | slot_type h3 = (hashes >> shift2) % slot_width_bits;
70 | slot_type h4 = (hashes >> shift3) % slot_width_bits;
71 | slot_type bucket_id = (hashes >> bucket_shift) % (T_NUM_BUCKETS);
72 | bool conflict_present = (buckets[bucket_id].f1 >> h1) &
73 | (buckets[bucket_id].f2 >> h2) &
74 | (buckets[bucket_id].f3 >> h3) &
75 | (buckets[bucket_id].f4 >> h4) & one;
76 | buckets[bucket_id].f1 |= one << h1;
77 | buckets[bucket_id].f2 |= one << h2;
78 | buckets[bucket_id].f3 |= one << h3;
79 | buckets[bucket_id].f4 |= one << h4;
80 | return conflict_present;
81 | }
82 | };
83 | }
84 |
85 | #endif
86 |
--------------------------------------------------------------------------------
/cuckoofilter/benchmarks/conext-figure5.cc:
--------------------------------------------------------------------------------
1 | // This benchmark reproduces the CoNEXT 2014 results found in "Figure 5: Lookup
2 | // performance when a filter achieves its capacity." It takes about two minutes to run on
3 | // an Intel(R) Core(TM) i7-4790 CPU @ 3.60GHz.
4 | //
5 | // Results:
6 | // fraction of queries on existing items/lookup throughput (million OPS)
7 | // CF ss-CF
8 | // 0.00% 24.79 9.37
9 | // 25.00% 24.65 9.57
10 | // 50.00% 24.84 9.57
11 | // 75.00% 24.86 9.62
12 | // 100.00% 24.89 9.96
13 |
14 | #include
15 | #include
16 | #include
17 |
18 | #include "cuckoofilter.h"
19 | #include "random.h"
20 | #include "timing.h"
21 |
22 | using namespace std;
23 |
24 | using namespace cuckoofilter;
25 |
26 | // The number of items sampled when determining the lookup performance
27 | const size_t SAMPLE_SIZE = 1000 * 1000;
28 |
29 | // The time (in seconds) to lookup SAMPLE_SIZE keys in which 0%, 25%, 50%, 75%, and 100%
30 | // of the keys looked up are found.
31 | template
32 | array CuckooBenchmark(
33 | size_t add_count, const vector& to_add, const vector& to_lookup) {
34 | Table cuckoo(add_count);
35 | array result;
36 |
37 | // Add values until failure or until we run out of values to add:
38 | size_t added = 0;
39 | while (added < to_add.size() && 0 == cuckoo.Add(to_add[added])) ++added;
40 |
41 | // A value to track to prevent the compiler from optimizing out all lookups:
42 | size_t found_count = 0;
43 | for (const double found_percent : {0.0, 0.25, 0.50, 0.75, 1.00}) {
44 | const auto to_lookup_mixed = MixIn(&to_lookup[0], &to_lookup[SAMPLE_SIZE], &to_add[0],
45 | &to_add[added], found_percent);
46 | auto start_time = NowNanos();
47 | for (const auto v : to_lookup_mixed) found_count += (0 == cuckoo.Contain(v));
48 | auto lookup_time = NowNanos() - start_time;
49 | result[found_percent * 4] = lookup_time / (1000.0 * 1000.0 * 1000.0);
50 | }
51 | if (6 * SAMPLE_SIZE == found_count) exit(1);
52 | return result;
53 | }
54 |
55 | int main() {
56 | // Number of distinct values, used only for the constructor of CuckooFilter, which does
57 | // not allow the caller to specify the space usage directly. The actual number of
58 | // distinct items inserted depends on how many fit until an insert failure occurs.
59 | size_t add_count = 127.78 * 1000 * 1000;
60 |
61 | // Overestimate add_count so we don't run out of random data:
62 | const size_t max_add_count = 2 * add_count;
63 | const vector to_add = GenerateRandom64(max_add_count);
64 | const vector to_lookup = GenerateRandom64(SAMPLE_SIZE);
65 |
66 | // Calculate metrics:
67 | const auto cf = CuckooBenchmark<
68 | CuckooFilter>(
69 | add_count, to_add, to_lookup);
70 | const auto sscf = CuckooBenchmark<
71 | CuckooFilter>(
72 | add_count, to_add, to_lookup);
73 |
74 | cout << "fraction of queries on existing items/lookup throughput (million OPS) "
75 | << endl;
76 | cout << setw(10) << ""
77 | << " " << setw(10) << right << "CF" << setw(10) << right << "ss-CF" << endl;
78 | for (const double found_percent : {0.0, 0.25, 0.50, 0.75, 1.00}) {
79 | cout << fixed << setprecision(2) << setw(10) << right << 100 * found_percent << "%";
80 | cout << setw(10) << right << (SAMPLE_SIZE / cf[found_percent * 4]) / (1000 * 1000);
81 | cout << setw(10) << right << (SAMPLE_SIZE / sscf[found_percent * 4]) / (1000 * 1000);
82 | cout << endl;
83 | }
84 | }
85 |
--------------------------------------------------------------------------------
/Bloom_Filter/bloom.hpp:
--------------------------------------------------------------------------------
1 |
2 | #ifndef FILTERS_BLOOM_HPP
3 | #define FILTERS_BLOOM_HPP
4 |
5 | #include
6 | #include
7 | #include
8 | #include "../hashutil.h"
9 |
10 | namespace bloomfilter
11 | {
12 |
13 | enum Status
14 | {
15 | Ok = 0,
16 | NotFound = 1,
17 | NotEnoughSpace = 2,
18 | NotSupported = 3,
19 | };
20 |
21 | static size_t getBestK(size_t bitsPerItem)
22 | {
23 | return bitsPerItem;
24 | }
25 |
26 | static size_t getBestM(size_t bitsPerItem, size_t number_of_elements)
27 | {
28 | return std::max(64, (int)round((double)number_of_elements * bitsPerItem / log(2)));
29 | }
30 |
31 | template
32 | class bloom
33 | {
34 |
35 | std::vector bit_array;
36 | uint32_t *seed_array;
37 | const size_t k{bits_per_item};
38 | size_t size;
39 | // HashFamily hasher;
40 | public:
41 | explicit bloom(size_t numberOfElements) : size(bloomfilter::getBestM(bits_per_item, numberOfElements)),
42 | bit_array(bloomfilter::getBestM(bits_per_item, numberOfElements), false)
43 | {
44 | // ,k(bits_per_item)
45 | // {
46 | seed_array = new uint32_t[k];
47 | srand(std::time(NULL));
48 | for (int i = 0; i < k; ++i)
49 | {
50 | seed_array[i] = random() + random();
51 | }
52 | }
53 |
54 | ~bloom()
55 | {
56 | delete[] seed_array;
57 | }
58 |
59 | // Add an item to the filter.
60 | bloomfilter::Status Add(const ItemType &item)
61 | {
62 | for (int i = 0; i < k; ++i)
63 | {
64 | size_t index = wrap_hash(item, seed_array[i]);
65 | bit_array[index] = true;
66 | }
67 | return bloomfilter::Ok;
68 | }
69 |
70 | // Add multiple items to the filter.
71 | bloomfilter::Status AddAll(const std::vector data, const size_t start, const size_t end)
72 | {
73 | for (int i = start; i < end; ++i)
74 | {
75 | Add(data[i]);
76 | }
77 | return Ok;
78 | }
79 |
80 | bloomfilter::Status AddAll(const ItemType *data, const size_t start, const size_t end)
81 | {
82 | for (int i = start; i < end; ++i)
83 | {
84 | Add(data[i]);
85 | }
86 | return Ok;
87 | }
88 |
89 | // Report if the item is inserted, with false positive rate.
90 | bloomfilter::Status Contain(const ItemType &item) const
91 | {
92 | for (int i = 0; i < k; ++i)
93 | {
94 | size_t index = wrap_hash(item, seed_array[i]);
95 | if (!bit_array[index])
96 | return bloomfilter::NotFound;
97 | }
98 | return bloomfilter::Ok;
99 | }
100 |
101 | /* methods for providing stats */
102 | // summary infomation
103 | std::string Info() const;
104 |
105 | // number of current inserted items;
106 | size_t Size() const { return size; }
107 |
108 | inline size_t wrap_hash(const ItemType &item, uint32_t seed) const
109 | {
110 | return hashing::hashint(item + seed) % size;
111 | }
112 | };
113 | } // namespace bloomfilter
114 |
115 | #endif //FILTERS_BLOOM_HPP
116 |
--------------------------------------------------------------------------------
/PD_Filter/Dict320/pd320.cpp:
--------------------------------------------------------------------------------
1 | /*
2 | * Taken from this repository.
3 | * https://github.com/jbapple/crate-dictionary
4 | * */
5 |
6 | #include "pd320.hpp"
7 |
8 | namespace v_pd320 {
9 | void bin_print_header(uint64_t header) {
10 | // assert(_mm_popcnt_u64(header) == 32);
11 | uint64_t b = 1ULL << (64ul - 1u);
12 | while (b) {
13 | std::string temp = (b & header) ? "1" : "0";
14 | std::cout << temp;
15 | b >>= 1ul;
16 | }
17 | }
18 | auto bin_print_header_spaced(uint64_t header) -> std::string {
19 | // assert(_mm_popcnt_u64(header) == 32);
20 | uint64_t b = 1ULL << (64ul - 1u);
21 | std::string res = "";
22 | while (b) {
23 | for (size_t i = 0; i < 4; i++) {
24 | res += (b & header) ? "1" : "0";
25 | b >>= 1ul;
26 | }
27 | if (b)
28 | res += ".";
29 | }
30 | return res;
31 | }
32 |
33 | }// namespace v_pd320
34 |
35 | namespace pd320 {
36 |
37 | auto validate_number_of_quotient(const __m512i *pd) -> bool {
38 | // std::cout << "h128: " << std::endl;
39 |
40 | const uint64_t header = ((uint64_t *) pd)[0];
41 | auto pop_count = _mm_popcnt_u64(header);
42 |
43 | // std::cout << "my_temp: " << my_temp << std::endl;
44 | // Number of bits to keep. Requires little-endianness
45 | // const unsigned __int128 kLeftover = sizeof(header) * CHAR_BIT - 50 - 51;
46 | // const unsigned __int128 kLeftoverMask = (((unsigned __int128)1) << (50 + 51)) - 1;
47 | // header = header & kLeftoverMask;
48 | // size_t res = popcount128(header);
49 | if (pop_count != 32) {
50 | std::cout << "pop_count: " << pop_count << std::endl;
51 | }
52 | return pop_count != 32;
53 | // return true;
54 | }
55 |
56 | auto get_capacity_att(const __m512i *x) -> size_t {
57 | validate_number_of_quotient(x);
58 | // return get_capacity_naive();
59 | uint64_t header;
60 | memcpy(&header, x, 8);
61 | assert(_mm_popcnt_u64(header) == 32);
62 | auto temp = _lzcnt_u64(header);
63 | assert(0 <= temp);
64 | assert(temp <= 32);
65 | auto res = 32 - temp;
66 |
67 | if (res != get_capacity_naive(x)) {
68 | auto valid_res = get_capacity_naive(x);
69 | std::cout << "In get_capacity_att:" << std::endl;
70 | std::cout << "res: " << res << std::endl;
71 | std::cout << "valid_res: " << valid_res << std::endl;
72 | }
73 | return res;
74 | }
75 |
76 | auto get_capacity_naive(const __m512i *x) -> size_t {
77 | uint64_t header;
78 | memcpy(&header, x, 8);
79 | size_t zero_count = 0, one_count = 0;
80 | uint64_t temp = header;
81 | uint64_t b = 1ULL;
82 | for (size_t i = 0; i < 64; i++) {
83 | if (b & temp) {
84 | one_count++;
85 | if (one_count == 32)
86 | return zero_count;
87 | } else {
88 | zero_count++;
89 | }
90 | b <<= 1ul;
91 | }
92 | std::cout << zero_count << std::endl;
93 | std::cout << one_count << std::endl;
94 | return -1;
95 | assert(false);
96 | }
97 |
98 | auto is_pd_full_naive(const __m512i *pd) -> bool {
99 | return get_capacity_naive(pd) == 32;
100 | }
101 |
102 | auto get_name() -> std::string {
103 | return "pd320 ";
104 | }
105 |
106 | auto remove_naive(int64_t quot, char rem, __m512i *pd) -> bool {
107 | assert(false);
108 | return false;
109 | }
110 | }// namespace pd320
111 |
--------------------------------------------------------------------------------
/cuckoofilter/benchmarks/conext-table3.cc:
--------------------------------------------------------------------------------
1 | // This benchmark reproduces the CoNEXT 2014 results found in "Table 3: Space efficiency
2 | // and construction speed." It takes about two minutes to run on an Intel(R) Core(TM)
3 | // i7-4790 CPU @ 3.60GHz.
4 | //
5 | // Results:
6 | //
7 | // metrics CF ss-CF
8 | // # of items (million) 127.82 127.90
9 | // bits per item 12.60 12.59
10 | // false positive rate 0.18% 0.09%
11 | // constr. speed (million keys/sec) 5.86 4.10
12 |
13 | #include
14 | #include
15 | #include
16 |
17 | #include "cuckoofilter.h"
18 | #include "random.h"
19 | #include "timing.h"
20 |
21 | using namespace std;
22 |
23 | using namespace cuckoofilter;
24 |
25 | // The number of items sampled when determining the false positive rate
26 | const size_t FPR_SAMPLE_SIZE = 1000 * 1000;
27 |
28 | struct Metrics {
29 | double add_count; // # of items (million)
30 | double space; // bits per item
31 | double fpr; // false positive rate (%)
32 | double speed; // const. speed (million keys/sec)
33 | };
34 |
35 | template
36 | Metrics CuckooBenchmark(size_t add_count, const vector& input) {
37 | Table cuckoo(add_count);
38 | auto start_time = NowNanos();
39 |
40 | // Insert until failure:
41 | size_t inserted = 0;
42 | while (inserted < input.size() && 0 == cuckoo.Add(input[inserted])) ++inserted;
43 |
44 | auto constr_time = NowNanos() - start_time;
45 |
46 | // Count false positives:
47 | size_t false_positive_count = 0;
48 | size_t absent = 0;
49 | for (; inserted + absent < input.size() && absent < FPR_SAMPLE_SIZE; ++absent) {
50 | false_positive_count += (0 == cuckoo.Contain(input[inserted + absent]));
51 | }
52 |
53 | // Calculate metrics:
54 | const auto time = constr_time / static_cast(1000 * 1000 * 1000);
55 | Metrics result;
56 | result.add_count = static_cast(inserted) / (1000 * 1000);
57 | result.space = static_cast(CHAR_BIT * cuckoo.SizeInBytes()) / inserted;
58 | result.fpr = (100.0 * false_positive_count) / absent;
59 | result.speed = (inserted / time) / (1000 * 1000);
60 | return result;
61 | }
62 |
63 | int main() {
64 | // Number of distinct values, used only for the constructor of CuckooFilter, which does
65 | // not allow the caller to specify the space usage directly. The actual number of
66 | // distinct items inserted depends on how many fit until an insert failure occurs.
67 | const size_t add_count = 127.78 * 1000 * 1000;
68 |
69 | // Overestimate add_count so we don't run out of random data:
70 | const size_t max_add_count = 2 * add_count;
71 | const vector input = GenerateRandom64(max_add_count + FPR_SAMPLE_SIZE);
72 |
73 | // Calculate metrics:
74 | const auto cf = CuckooBenchmark<
75 | CuckooFilter>(
76 | add_count, input);
77 | const auto sscf = CuckooBenchmark<
78 | CuckooFilter>(
79 | add_count, input);
80 |
81 | cout << setw(35) << left << "metrics " << setw(10) << right << "CF" << setw(10)
82 | << "ss-CF" << endl
83 | << fixed << setprecision(2) << setw(35) << left << "# of items (million) "
84 | << setw(10) << right << cf.add_count << setw(10) << sscf.add_count << endl
85 | << setw(35) << left << "bits per item " << setw(10) << right << cf.space
86 | << setw(10) << sscf.space << endl
87 | << setw(35) << left << "false positive rate " << setw(9) << right << cf.fpr << "%"
88 | << setw(9) << sscf.fpr << "%" << endl
89 | << setw(35) << left << "constr. speed (million keys/sec) " << setw(10) << right
90 | << cf.speed << setw(10) << sscf.speed << endl;
91 | }
92 |
--------------------------------------------------------------------------------
/PD_Filter/HashTables/Level3.hpp:
--------------------------------------------------------------------------------
1 | //
2 | // Created by tomer on 8/18/20.
3 | //
4 | /* This implementation rely on
5 | * 1) Every element in the hashTable consists of three parts (msb to lsb) pd_index, quot, rem.
6 | * 2) quot is an integer in range [0,51). More specificity, quot != 63.
7 | * 3) number of bits for rem is 8. (not a must).
8 | * 4) The empty slot can be seen is equal to (63 << 8).
9 | empty slot in a way tha */
10 | #ifndef FILTERS_LEVEL3_HPP
11 | #define FILTERS_LEVEL3_HPP
12 |
13 |
14 | //#include "../../hashutil.h"
15 | //#include "../basic_function_util.h"
16 | //#include "../macros.h"
17 | // #include "../L2_pd/twoDimPD.hpp"
18 | //#include "Spare_Validator.hpp"
19 | #include "HistoryLog.hpp"
20 | #include
21 | #include
22 | //#include
23 |
24 | class Level3 {
25 | public:
26 | size_t capacity{0};
27 | // const size_t number_of_buckets;
28 | // const size_t max_spare_capacity;
29 |
30 | // MainBucket *main_buckets;
31 | // Quotients *q_buckets;
32 |
33 | // unordered_set big_quots_set;
34 | vector elements;
35 |
36 | explicit Level3() {
37 | // elements.resize(32);
38 | }
39 |
40 | // virtual ~Level3() {}
41 |
42 |
43 | auto find(item_key_t itemKey) const -> bool {
44 | return std::find(elements.begin(), elements.end(), itemKey) != elements.end();
45 | }
46 |
47 | auto find(uint64_t pd_index, uint8_t quot, uint8_t rem) const -> bool {
48 | item_key_t itemKey = {pd_index, static_cast(quot), static_cast(rem)};
49 | return find(itemKey);
50 | }
51 |
52 |
53 | void insert(item_key_t itemKey) {
54 | capacity++;
55 | elements.insert(std::upper_bound(elements.begin(), elements.end(), itemKey), itemKey);
56 | assert(find(itemKey));
57 | }
58 |
59 |
60 | void insert(uint64_t pd_index, uint8_t quot, uint8_t rem) {
61 | item_key_t itemKey = {pd_index, static_cast(quot), static_cast(rem)};
62 | insert(itemKey);
63 | }
64 |
65 |
66 | void remove(item_key_t itemKey) {
67 | auto it_res = std::find(elements.begin(), elements.end(), itemKey);
68 | assert(it_res != elements.end());
69 | capacity--;
70 | elements.erase(it_res, it_res + 1);
71 | }
72 |
73 | void remove(uint64_t pd_index, uint8_t quot, uint8_t rem) {
74 | item_key_t itemKey = {pd_index, static_cast(quot), static_cast(rem)};
75 | remove(itemKey);
76 | }
77 |
78 | item_key_t get_pop_element(uint64_t pd_index) const {
79 | item_key_t itemKey = {pd_index, 15, 256};
80 | item_key_t itemKey2 = {pd_index, 0, 0};
81 | auto it_res = std::upper_bound(elements.begin(), elements.end(), itemKey);
82 | auto it_res2 = std::upper_bound(elements.begin(), elements.end(), itemKey2);
83 |
84 | item_key_t pop_item = {static_cast(-1), static_cast(-1), static_cast(-1)};
85 | item_key_t pop_item2 = {static_cast(-1), static_cast(-1), static_cast(-1)};
86 | if (it_res != elements.end()) {
87 | pop_item = it_res[0];
88 | }
89 |
90 | if (it_res2 != elements.end()) {
91 | pop_item2 = it_res2[0];
92 | }
93 |
94 | assert(pop_item == pop_item2);
95 | return pop_item;
96 | }
97 |
98 | item_key_t pop(uint64_t pd_index) {
99 | item_key_t pop_item = get_pop_element(pd_index);
100 | if (pop_item.pd_index != static_cast(-1))
101 | remove(pop_item);
102 |
103 | return pop_item;
104 | }
105 |
106 | size_t get_byte_size() const {
107 | return elements.size() * sizeof(item_key_t);
108 | }
109 | };
110 | #endif//FILTERS_LEVEL3_HPP
111 |
--------------------------------------------------------------------------------
/morton/vector_types.h:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (c) 2019 Advanced Micro Devices, Inc.
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy
5 | of this software and associated documentation files (the "Software"), to deal
6 | in the Software without restriction, including without limitation the rights
7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | copies of the Software, and to permit persons to whom the Software is
9 | furnished to do so, subject to the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 |
22 | Author: Alex D. Breslow
23 | Advanced Micro Devices, Inc.
24 | AMD Research
25 | */
26 | #ifndef _VECTOR_TYPES_H
27 | #define _VECTOR_TYPES_H
28 |
29 | #include
30 |
31 | typedef __uint128_t uint128_t;
32 | //typedef uint64_t atom_t; // Tested up to __uint128_t inclusive
33 | //typedef uint64_t hash_t;
34 | typedef uint8_t counter_t; // Used only in one implementation of scans
35 | //typedef uint64_t keys_t; // IMPORTANT: C++ has a key_t in its implementation
36 | // of . Make sure you use this one.
37 | #define CCF_KEY_SIZE 8
38 |
39 | #if CCF_KEY_SIZE == 4
40 | typedef uint32_t atom_t;
41 | typedef uint32_t hash_t;
42 | typedef uint32_t keys_t;
43 | constexpr uint64_t _N = 8;
44 | #elif CCF_KEY_SIZE == 8
45 | typedef uint64_t atom_t;
46 | typedef uint64_t hash_t;
47 | typedef uint64_t keys_t;
48 | constexpr uint64_t _N = 4;
49 | #else
50 | #error "Only CCF_KEY_SIZE 4 and 8 are currently supported"
51 | #endif
52 |
53 | // Could add the bucket id and fingerprint since they are associated with this
54 | struct StoreParams{ // TODO: Optimize this a bit
55 | hash_t block_id; // Could go
56 | uint16_t counter_index; // Could go
57 | counter_t bucket_start_index; // Stay
58 | counter_t elements_in_block; // Stay
59 | counter_t counter_value; // Could go
60 | };
61 |
62 |
63 | constexpr uint_fast64_t batch_size = 128;
64 | typedef std::array ar_atom;
65 | typedef std::array ar_u8;
66 | typedef std::array ar_u16;
67 | typedef std::array ar_u32;
68 | typedef std::array ar_counter;
69 | typedef std::array ar_hash;
70 | typedef std::array ar_key;
71 | typedef std::array ar_store_params;
72 |
73 | struct StoreParamsSOA{
74 | ar_hash block_ids;
75 | ar_u16 counter_indexes;
76 | ar_counter bucket_start_indexes;
77 | ar_counter elements_in_blocks;
78 | ar_counter counter_values;
79 | };
80 |
81 |
82 | // TODO: Check that this is GNU source
83 | typedef uint64_t vN_u64 __attribute__ ((vector_size(sizeof(uint64_t) * _N)));
84 | typedef uint8_t vN_u8 __attribute__ ((vector_size (sizeof(uint8_t) * _N)));
85 | typedef uint16_t vN_u16 __attribute__ ((vector_size (sizeof(uint16_t) * _N)));
86 | typedef uint32_t vN_u32 __attribute__ ((vector_size (sizeof(uint32_t) * _N)));
87 | typedef atom_t vN_atom __attribute__ ((vector_size (sizeof(atom_t) * _N)));
88 | typedef keys_t vN_key __attribute__((vector_size (sizeof(keys_t) * _N)));
89 | typedef hash_t vN_hash __attribute__((vector_size (sizeof(hash_t) * _N)));
90 |
91 | // For some reason using atom_t for counters in the vectorized case is faster.
92 | typedef atom_t vN_counter __attribute__ ((vector_size (sizeof(atom_t) * _N)));
93 |
94 | #endif
95 |
--------------------------------------------------------------------------------
/cuckoofilter/README.md:
--------------------------------------------------------------------------------
1 | Cuckoo Filter
2 | ============
3 |
4 | Overview
5 | --------
6 | Cuckoo filter is a Bloom filter replacement for approximated set-membership queries. While Bloom filters are well-known space-efficient data structures to serve queries like "if item x is in a set?", they do not support deletion. Their variances to enable deletion (like counting Bloom filters) usually require much more space.
7 |
8 | Cuckoo filters provide the flexibility to add and remove items dynamically. A cuckoo filter is based on cuckoo hashing (and therefore named as cuckoo filter). It is essentially a cuckoo hash table storing each key's fingerprint. Cuckoo hash tables can be highly compact, thus a cuckoo filter could use less space than conventional Bloom filters, for applications that require low false positive rates (< 3%).
9 |
10 | For details about the algorithm and citations please use:
11 |
12 | ["Cuckoo Filter: Practically Better Than Bloom"](http://www.cs.cmu.edu/~binfan/papers/conext14_cuckoofilter.pdf) in proceedings of ACM CoNEXT 2014 by Bin Fan, Dave Andersen and Michael Kaminsky
13 |
14 |
15 | API
16 | --------
17 | A cuckoo filter supports following operations:
18 |
19 | * `Add(item)`: insert an item to the filter
20 | * `Contain(item)`: return if item is already in the filter. Note that this method may return false positive results like Bloom filters
21 | * `Delete(item)`: delete the given item from the filter. Note that to use this method, it must be ensured that this item is in the filter (e.g., based on records on external storage); otherwise, a false item may be deleted.
22 | * `Size()`: return the total number of items currently in the filter
23 | * `SizeInBytes()`: return the filter size in bytes
24 |
25 | Here is a simple example in C++ for the basic usage of cuckoo filter.
26 | More examples can be found in `example/` directory.
27 |
28 | ```cpp
29 | // Create a cuckoo filter where each item is of type size_t and
30 | // use 12 bits for each item, with capacity of total_items
31 | CuckooFilter filter(total_items);
32 | // Insert item 12 to this cuckoo filter
33 | filter.Add(12);
34 | // Check if previously inserted items are in the filter
35 | assert(filter.Contain(12) == cuckoofilter::Ok);
36 | ```
37 |
38 | Repository structure
39 | --------------------
40 | * `src/`: the C++ header and implementation of cuckoo filter
41 | * `example/test.cc`: an example of using cuckoo filter
42 | * `benchmarks/`: Some benchmarks of speed, space used, and false positive rate
43 |
44 |
45 | Build
46 | -------
47 | This libray depends on openssl library. Note that on MacOS 10.12, the header
48 | files of openssl are not available by default. It may require to install openssl
49 | and pass the path to `lib` and `include` directories to gcc, for example:
50 |
51 | ```bash
52 | $ brew install openssl
53 | # Replace 1.0.2j with the actual version of the openssl installed
54 | $ export LDFLAGS="-L/usr/local/Cellar/openssl/1.0.2j/lib"
55 | $ export CFLAGS="-I/usr/local/Cellar/openssl/1.0.2j/include"
56 | ```
57 |
58 | To build the example (`example/test.cc`):
59 | ```bash
60 | $ make test
61 | ```
62 |
63 | To build the benchmarks:
64 | ```bash
65 | $ cd benchmarks
66 | $ make
67 | ```
68 |
69 | Install
70 | -------
71 | To install the cuckoofilter library:
72 | ```bash
73 | $ make install
74 | ```
75 | By default, the header files will be placed in `/usr/local/include/cuckoofilter`
76 | and the static library at `/usr/local/lib/cuckoofilter.a`.
77 |
78 |
79 | Contributing
80 | ------------
81 | Contributions via GitHub pull requests are welcome. Please keep the code style guided by
82 | [Google C++ style](https://google.github.io/styleguide/cppguide.html). One can use
83 | [clang-format](http://clang.llvm.org/docs/ClangFormat.html) with our provided
84 | [`.clang-format`](https://github.com/efficient/cuckoofilter/blob/master/.clang-format)
85 | in this repository to enforce the style.
86 |
87 |
88 |
89 | Authors
90 | -------
91 | - Bin Fan
92 | - David G. Andersen
93 | - Michael Kaminsky
94 |
--------------------------------------------------------------------------------
/PD_Filter/L2_pd/temp_main.cpp:
--------------------------------------------------------------------------------
1 | #include "twoDimPD.hpp"
2 |
3 | void simplest_test() {
4 | auto b = MainBucket<48, 32, 8>();
5 | assert(!b.find(0, 0, 0));
6 | b.insert(0, 0, 0);
7 | assert(b.find(0, 0, 0));
8 | auto del_res = b.conditional_remove(0, 0, 0);
9 | assert(del_res);
10 | assert(!b.find(0, 0, 0));
11 | // return;
12 | }
13 |
14 | void att() {
15 | std::cout << "*** " << __FILE__ << ":" << __LINE__ << " *** " << std::endl;
16 | }
17 |
18 | void leading_trailing_zeros() {
19 | std::cout << "_lzcnt_u64(0) :" << _lzcnt_u64(0) << std::endl;
20 | std::cout << "_lzcnt_u64(1) :" << _lzcnt_u64(1) << std::endl;
21 | std::cout << "_lzcnt_u64(2) :" << _lzcnt_u64(2) << std::endl;
22 | std::cout << "_lzcnt_u64(4) :" << _lzcnt_u64(4) << std::endl;
23 | std::cout << "_lzcnt_u64(m63):" << _lzcnt_u64(1ULL << 63) << std::endl;
24 |
25 | std::cout << "_tzcnt_u64(0) :" << _tzcnt_u64(0) << std::endl;
26 | std::cout << "_tzcnt_u64(1) :" << _tzcnt_u64(1) << std::endl;
27 | std::cout << "_tzcnt_u64(2) :" << _tzcnt_u64(2) << std::endl;
28 | std::cout << "_tzcnt_u64(4) :" << _tzcnt_u64(4) << std::endl;
29 | std::cout << "_tzcnt_u64(m63):" << _tzcnt_u64(1ULL << 63) << std::endl;
30 |
31 |
32 | // _lzcnt_u64(0) :64
33 | // _lzcnt_u64(1) :63
34 | // _lzcnt_u64(2) :62
35 | // _lzcnt_u64(4) :61
36 | // _lzcnt_u64(m63) :0
37 |
38 | // _tzcnt_u64(0) :64
39 | // _tzcnt_u64(1) :0
40 | // _tzcnt_u64(2) :1
41 | // _tzcnt_u64(4) :2
42 | // _tzcnt_u64(m63) :63
43 | }
44 |
45 | int old_main() {
46 | // uint64_t a[4] = {0};
47 | // a[0] = 0x1234'1334'1334'1334;
48 | // a[1] = 0x1334'1234'1334'1334;
49 | // a[2] = 0x1334'1334'1234'1334;
50 | // a[3] = 0x1334'1334'1334'1234;
51 |
52 | // auto res1 = bits_memcpy::my_cmp_epu<16, 4>(0x1234, a);
53 |
54 | // std::cout << "res1: " << res1 << std::endl;
55 | // print_memory::print_word_LE(res1, GAP);
56 |
57 | // att();
58 |
59 | bool res = MainBucket_tests::insert_find_all<48, 32, 8>();
60 | assert(res);
61 | for (size_t i = 0; i < (1ul << 10ul); i++) {
62 | res = MainBucket_tests::rand_test1<48, 32, 8>();
63 | if (!res) {
64 | std::cout << "i: " << i << std::endl;
65 | return -1;
66 | }
67 | }
68 | std::cout << "pass1" << std::endl;
69 |
70 | MainBucket_tests::recursive_add_delete<48, 32, 8>(1 << 20);
71 | // assert(res);
72 | std::cout << "pass2" << std::endl;
73 |
74 | res = MainBucket_tests::recursive_add_delete_with_map<48, 32, 8>(1 << 20);
75 | assert(res);
76 | std::cout << "pass3" << std::endl;
77 | res = MainBucket_tests::rand_test1<48, 32, 8>();
78 | assert(res);
79 | std::cout << "pass4" << std::endl;
80 | // bool res = rt0<48, 32, 8>();
81 | // assert(res);
82 | std::cout << "Passed" << std::endl;
83 | return 0;
84 | }
85 |
86 | void Q_tests_main() {
87 | bool res = Quotients_tests::true_negative_lookup_test<48, 32, 4>();
88 | assert(res);
89 |
90 | res = Quotients_tests::true_negative_lookup_test_all<48, 32, 4>();
91 | assert(res);
92 | res = Quotients_tests::simplest_test<48, 32, 4>();
93 | assert(res);
94 |
95 | for (size_t i = 0; i < (1 << 12); i++) {
96 | bool temp = Quotients_tests::simplest_test<48, 32, 4>();
97 | assert(temp);
98 | }
99 |
100 | for (size_t i = 0; i < (1 << 13); i++) {
101 | bool temp = Quotients_tests::rand_test18<48, 32, 4>();
102 | assert(temp);
103 | }
104 |
105 | for (size_t i = 0; i < (1 << 13); i++) {
106 | bool temp = Quotients_tests::rand_test18<48, 32, 4>();
107 | assert(temp);
108 | }
109 |
110 | res = Quotients_tests::recursive_add_delete_with_map<48, 32, 4>(1 << 10);
111 | assert(res);
112 | }
113 |
114 | int main() {
115 | std::cout << "Temp_Main" << std::endl;
116 |
117 | // old_main();
118 | leading_trailing_zeros();
119 |
120 |
121 | return 0;
122 | }
--------------------------------------------------------------------------------
/morton/compressed_cuckoo_config.h:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (c) 2019 Advanced Micro Devices, Inc.
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy
5 | of this software and associated documentation files (the "Software"), to deal
6 | in the Software without restriction, including without limitation the rights
7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | copies of the Software, and to permit persons to whom the Software is
9 | furnished to do so, subject to the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 |
22 | Author: Alex D. Breslow
23 | Advanced Micro Devices, Inc.
24 | AMD Research
25 | */
26 | #ifndef _COMPRESSED_CUCKOO_CONFIG_H
27 | #define _COMPRESSED_CUCKOO_CONFIG_H
28 |
29 | namespace CompressedCuckoo{
30 | // See vector_types.h for more types and for tuning the vector width and
31 | // atom types
32 | const bool g_cache_aligned_allocate = true;
33 | const size_t g_cache_line_size_bytes = 64; // Change this as necessary
34 | const uint64_t stash_prefix_tag_len = 4;
35 |
36 | // Allows for up to 255 items per block
37 | const uint8_t max_fullness_counter_width = 8;
38 | constexpr atom_t one = static_cast(1);
39 |
40 | enum struct AlternateBucketSelectionMethodEnum{
41 | TABLE_BASED_OFFSET,
42 | FUNCTION_BASED_OFFSET,
43 | FAN_ET_AL_PARTIAL_KEY // Only use this if you can guarantee the total buckets
44 | // in the filter is a power of two
45 | };
46 |
47 | enum struct InsertionMethodEnum{
48 | FIRST_FIT,
49 | TWO_CHOICE,
50 | HYBRID_SIMPLE,
51 | HYBRID_PIECEWISE, // Starts off as first-fit and then transitions to two choice
52 | // once you hit a certain load factor
53 | FIRST_FIT_OPT, // Transitions between two implementations of first-fit
54 | };
55 |
56 | enum struct CounterReadMethodEnum{
57 | READ_SIMPLE,
58 | READ_CROSS,
59 | READ_RAW, // If counters are always in atom 0 of block 0, just read that.
60 | // NOTE: This is prone to bugs, if you rearrange the storage of
61 | // of the counters within a block, so beware.
62 | READ_RAW128 // Read the first 128 bits from the block. See comment above.
63 | };
64 |
65 | enum struct FingerprintReadMethodEnum{
66 | READ_SIMPLE,
67 | READ_CROSS,
68 | READ_BYTE // Special optimization for 8-bit fingerprints that are byte
69 | // aligned
70 | // RAW reads don't make sense here. We don't statically know which atom
71 | // that needs to be read. It may make sense with 128-bit atoms, but
72 | // my benchmarking showed 64-bit atoms to be faster.
73 | };
74 |
75 | enum struct FingerprintComparisonMethodEnum{
76 | VARIABLE_COUNT,
77 | FIXED_COUNT_AGGRESSIVE,
78 | SEMI_FIXED
79 | };
80 |
81 | enum struct ReductionMethodEnum{
82 | POP_CNT, // Must only use when counters fit into a single atom
83 | PARALLEL_REDUCE,
84 | NAIVE_FULL_EXCLUSIVE_SCAN,
85 | };
86 |
87 | enum struct OverflowTrackingArrayHashingMethodEnum{
88 | // Daniel Lemire's fast hashing method
89 | LEMIRE_FINGERPRINT_MULTIPLY,
90 | RAW_BUCKET_HASH,
91 | CLUSTERED_BUCKET_HASH,
92 | };
93 |
94 | enum struct InsertStatus{
95 | FAILED_TO_INSERT = 0,
96 | PLACED_IN_PRIMARY_BUCKET = 1,
97 | PLACED_IN_SECONDARY_BUCKET = 2
98 | };
99 |
100 |
101 | } // End of CompressedCuckoo namespace
102 |
103 | #endif
104 |
--------------------------------------------------------------------------------
/PD_Filter/Fixed_PD/v_tests.cpp:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | #include "v_tests.hpp"
5 | namespace fpd_tests {
6 | // using namespace Wrap_Fixed_pd;
7 | // void pd_init(__m512i *pd) {
8 | // *pd = __m512i{(INT64_C(1) << 50) - 1, 0, 0, 0, 0, 0, 0, 0};
9 | // }
10 |
11 | auto insert_find_single(int64_t quot, uint8_t rem, Wrap_Fixed_pd::packed_fpd *pd) -> bool {
12 | Wrap_Fixed_pd::add(quot, rem, pd);
13 | auto find_res = Wrap_Fixed_pd::find(quot, rem, pd);
14 | assert(find_res);
15 | return true;
16 | }
17 |
18 | auto insert_find_single_with_capacity(int64_t quot, uint8_t rem, Wrap_Fixed_pd::packed_fpd *pd) -> bool {
19 | auto old_capacity = Wrap_Fixed_pd::get_capacity(pd);
20 | Wrap_Fixed_pd::add(quot, rem, pd);
21 | auto find_res = Wrap_Fixed_pd::find(quot, rem, pd);
22 | assert(find_res);
23 | auto new_capacity = Wrap_Fixed_pd::get_capacity(pd);
24 | assert(new_capacity == old_capacity + 1);
25 | return true;
26 | }
27 |
28 |
29 | auto insert_find_all(Wrap_Fixed_pd::packed_fpd *pd) -> bool {
30 | for (size_t q = 0; q < fixed_pd::QUOT_RANGE; q++) {
31 | for (size_t r = 0; r < 256; r++) {
32 | insert_find_single(q, r, pd);
33 | }
34 | }
35 | return true;
36 | }
37 |
38 |
39 | auto insert_find_all() -> bool {
40 | Wrap_Fixed_pd::packed_fpd pd = Wrap_Fixed_pd::packed_fpd();
41 | return insert_find_all(&pd);
42 | }
43 |
44 | auto rand_test1() -> bool {
45 | size_t max_capacity = fixed_pd::CAPACITY;
46 | // uint64_t valid_max_quot = 0;
47 | Wrap_Fixed_pd::packed_fpd pd = Wrap_Fixed_pd::packed_fpd();
48 | for (size_t i = 0; i < max_capacity; i++) {
49 | uint64_t q = rand() % fixed_pd::QUOT_RANGE;
50 | uint64_t r = rand() & 255;
51 | Wrap_Fixed_pd::add(q, r, &pd);
52 |
53 | assert(Wrap_Fixed_pd::find(q, r, &pd));
54 | auto temp_capacity = Wrap_Fixed_pd::get_capacity(&pd);
55 | assert(Wrap_Fixed_pd::get_capacity(&pd) == i + 1);
56 | }
57 | return true;
58 | }
59 |
60 | auto rand_test2() -> bool {
61 | size_t max_capacity = fixed_pd::CAPACITY + (fixed_pd::CAPACITY / 4);
62 | // uint64_t valid_max_quot = 0;
63 | Wrap_Fixed_pd::packed_fpd pd = Wrap_Fixed_pd::packed_fpd();
64 | for (size_t i = 0; i < max_capacity; i++) {
65 | uint64_t q = rand() % fixed_pd::QUOT_RANGE;
66 | uint64_t r = rand() & 255;
67 | Wrap_Fixed_pd::add(q, r, &pd);
68 |
69 | assert(Wrap_Fixed_pd::find(q, r, &pd));
70 | auto temp_capacity = Wrap_Fixed_pd::get_capacity(&pd);
71 | assert(Wrap_Fixed_pd::get_capacity(&pd) == i + 1);
72 | }
73 | return true;
74 | }
75 |
76 | auto determ_no_false_positive() -> bool {
77 | size_t max_capacity = fixed_pd::CAPACITY + (fixed_pd::CAPACITY / 4);
78 | // std::vector> vals;
79 | std::vector vals;
80 | // uint64_t valid_max_quot = 0;
81 | Wrap_Fixed_pd::packed_fpd pd = Wrap_Fixed_pd::packed_fpd();
82 | for (size_t i = 0; i < max_capacity; i++) {
83 | uint64_t q = rand() % fixed_pd::QUOT_RANGE;
84 | uint64_t r = rand() & 255;
85 | Wrap_Fixed_pd::vec_key temp_key = std::make_tuple(q, r);
86 | vals.push_back(temp_key);
87 | Wrap_Fixed_pd::add(q, r, &pd);
88 |
89 | assert(Wrap_Fixed_pd::find(q, r, &pd));
90 | auto temp_capacity = Wrap_Fixed_pd::get_capacity(&pd);
91 | assert(Wrap_Fixed_pd::get_capacity(&pd) == i + 1);
92 | }
93 |
94 | for (size_t i = 0; i < (1<<10); i++)
95 | {
96 | uint64_t q = rand() % fixed_pd::QUOT_RANGE;
97 | uint64_t r = rand() & 255;
98 | Wrap_Fixed_pd::vec_key temp_key = std::make_tuple(q, r);
99 | if (std::find(vals.begin(), vals.end(), temp_key) != vals.end()){
100 | continue;
101 | }
102 |
103 | assert(!Wrap_Fixed_pd::find(q, r, &pd));
104 | }
105 | return true;
106 |
107 | }
108 |
109 |
110 | }// namespace fpd_tests
111 |
--------------------------------------------------------------------------------
/PD_Filter/Fixed_PD/wrap_fpd.hpp:
--------------------------------------------------------------------------------
1 |
2 | #ifndef FILTERS_WRAP_FIXED_PD_HPP
3 | #define FILTERS_WRAP_FIXED_PD_HPP
4 |
5 | #include "fpd.hpp"
6 | #include
7 | #include
8 | #include
9 |
10 | namespace Wrap_Fixed_pd {
11 | // using namespace Fixed_pd;
12 |
13 | // typedef std::tuple vec_key;
14 |
15 | struct packed_fpd {
16 | __m512i body1 __attribute__((aligned(64)));
17 | uint64_t header1[4];
18 | uint64_t header2[4];
19 | __m512i body2 __attribute__((aligned(64)));
20 |
21 |
22 | packed_fpd() {
23 | body1 = __m512i{0, 0, 0, 0, 0, 0, 0, 0};
24 | body2 = __m512i{0, 0, 0, 0, 0, 0, 0, 0};
25 | for (size_t i = 0; i < 4; i++) {
26 | header1[i] = 0;
27 | header2[i] = 0;
28 | }
29 | }
30 |
31 | } __attribute__((aligned(64)));
32 | static_assert(sizeof(struct packed_fpd) == (64 * 3), "Check your assumptions");
33 | // static_assert(sizeof(struct packed_fpd) == (64 * 4), "Check your assumptions");
34 |
35 |
36 | // void init_fpd(packed_fpd *pd) {
37 | // pd->body = __m512i{0, 0, 0, 0, 0, 0, 0, 0};
38 | // for (size_t i = 0; i < counter_size; i++) {
39 | // pd->header[i] = 0;
40 | // }
41 | // }
42 |
43 | auto validate_init_packed_fpd(packed_fpd *pd) -> bool;
44 |
45 |
46 | /* auto add(uint64_t quot, uint8_t rem, uint64_t *header, __m512i *body) -> int;
47 |
48 | auto find(uint64_t quot, uint8_t rem, const uint64_t *header, const __m512i *body) -> int;
49 |
50 | auto get_capacity(uint64_t *header, __m512i *body) -> size_t;
51 | */
52 |
53 | inline auto find(uint64_t quot, uint8_t rem, const uint64_t *header, const __m512i *body) -> int {
54 | auto counter = Fixed_pd::Header::read_counter(quot, header);
55 | if (counter == 0) {
56 | return 0;
57 | } else if (counter == Fixed_pd::counter_overflowed_val) {
58 | return -1;
59 | }
60 |
61 | uint64_t v = Fixed_pd::Body::get_v(rem, body);
62 | if (!v) {
63 | return 0;
64 | }
65 |
66 | uint64_t start = Fixed_pd::Header::get_start(quot, header);
67 | return (v >> start) & MASK(counter);
68 | }
69 |
70 | inline auto find2(uint64_t quot, uint8_t rem, const uint64_t *header, const __m512i *body) -> int {
71 | const size_t counter = Fixed_pd::Header::read_counter(quot, header);
72 | if (counter != Fixed_pd::counter_overflowed_val) {
73 | if (!counter)
74 | return 0;
75 | uint64_t v = Fixed_pd::Body::get_v(rem, body);
76 | if (!v)
77 | return 0;
78 |
79 | uint64_t start = Fixed_pd::Header::get_start(quot, header);
80 | return (v >> start) & MASK(counter);
81 | }
82 | return -1;
83 | }
84 |
85 | inline auto find3(uint64_t quot, uint8_t rem, const uint64_t *header, const __m512i *body) -> int {
86 | const size_t counter = Fixed_pd::Header::read_counter(quot, header);
87 | switch (counter) {
88 | case 0:
89 | return 0;
90 |
91 | case Fixed_pd::counter_overflowed_val:
92 | return -1;
93 |
94 | default:
95 | break;
96 | }
97 | uint64_t v = Fixed_pd::Body::get_v(rem, body);
98 | if (!v)
99 | return 0;
100 |
101 | uint64_t start = Fixed_pd::Header::get_start(quot, header);
102 | return (v >> start) & MASK(counter);
103 | }
104 |
105 | inline auto add(uint64_t quot, uint8_t rem, uint64_t *header, __m512i *body) -> int {
106 | assert(quot < Fixed_pd::QUOT_RANGE);
107 | int header_add_res = Fixed_pd::Header::add(quot, header);
108 | if (header_add_res == -1) {
109 | uint64_t start = Fixed_pd::Header::get_start(quot, header);
110 | assert(start <= Fixed_pd::CAPACITY);
111 | uint64_t end = Fixed_pd::Header::read_counter(quot, header);
112 | Fixed_pd::Body::add(start, start + end - 1, rem, body);
113 | return -1;
114 | }
115 | return header_add_res;
116 | }
117 |
118 |
119 | inline auto get_capacity(uint64_t *header, __m512i *body) -> size_t {
120 | return Fixed_pd::Header::get_capacity(header);
121 | }
122 | }// namespace Wrap_Fixed_pd
123 | #endif// FILTERS_WRAP_FIXED_PD_HPP
124 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Update
2 | A recent Incremental version of the ideas in this code appears in [Prefix-Filter](https://github.com/TomerEven/Prefix-Filter) repository.
3 | The Prefix-Filter is an incremental filter, with faster insertions throughput than Bloom, CF and VQF, and query throughput which is slightly worse than the CF with the same false positive rate (which is faster than others ''hashtable for fingerprints'' filters).
4 |
5 | # Comparing Filters
6 | Currently benchmarking:
7 | 1. Bloomfilter (BF)
8 | 2. Cuckoo filter (CF)
9 | 3. SIMD blocked Bloom filter (SIMD)
10 | 4. Morton filter (MF)
11 | 5. Pocket Dictionary filter (PD)
12 |
13 | ## Validation
14 | The files `test.hpp test.cpp` contain validation tests on the filter.
15 | 1. Making sure the filter does not have a false negative. (Indicating the element is in not in the filter when it is.)
16 | 2. Checking the filter false positive rate is as expected.
17 | Filter often have a parameter controlling on the false positive probability $\epsilon$, when it is increased, the filter uses more space, and has smaller error probability.
18 |
19 | ## Benchmark
20 | There are various benchmark to evaluate the error probability under differents loads, and speed test by four paramaters:
21 | Insertions, uniform lookup (uniform lookup result is "no" w.h.p. in standard scenarios), True-lookup (of elements in the filter) and Deletions.
22 |
23 |
24 | ## Usage
25 |
26 | ### Dependencies
27 | Since CF uses `openssl` library, the project won't compile unless it is installed. (See CF [repository](https://github.com/efficient/cuckoofilter))
28 |
29 |
30 | ### To build
31 | ```
32 | git clone -b Simpler https://github.com/TheHolyJoker/Comparing_Filters.git
33 | cd Comparing_Filters
34 | mkdir build
35 | cd build
36 | cmake..
37 | cmake --build ./ --target Filters
38 | ```
39 | ### To run
40 | In `build` directory run
41 |
42 | ```
43 | ./Filters
44 | ```
45 | 1. `filter indicator`: Which filter to test.
46 | 1. To include BF in the test,`filter indicator & 1` should be true.
47 | 2. To include CF in the test,`filter indicator & 2` should be true.
48 | 3. To include SIMD in the test,`filter indicator & 4` should be true.
49 | 4. To include MF in the test,`filter indicator & 8` should be true.
50 | 5. To include PD in the test,`filter indicator & 16` should be true.
51 |
52 | The default value is -1 to test all filters.
53 | 2. `exponent of the number of keys`: Every filter is built to contain at most 2^`exponent of the number of keys`.
54 | The default value is 24. (should not be set to less than 16 or MF might fail)
55 | 3. `lookup factor`: Lookup exponent factor. If set to d and n insertions will be performed, then n*2^d lookups will be performed.
56 | The default value is 2
57 | 4. `rounds`: The benchmark performs insertion, and then lookup where each time a fraction of `1/rounds` of the total number of elements is queried.
58 | The default value is 32.
59 |
60 | ## Credit
61 | Large parts of the code and its structure are taken from https://github.com/FastFilter/fastfilter_cpp.
62 |
63 | Cuckoo filter is from https://github.com/efficient/cuckoofilter by Bin Fan et al.
64 |
65 | SIMD blocked Bloom filter is from https://github.com/apache/impala.
66 |
67 | Morton filter is from https://github.com/AMDComputeLibraries/morton_filter.
68 |
69 | Counting Quotient Filter (CQF) is from https://github.com/splatlab/cqf. (Currently not in use).
70 |
71 | Pocket Dictionary is work in progress see https://github.com/TomerEven/Pocket_Dictionary.
72 | The Pocket Dictionary class that uses advanced SIMD instructions, is taken from [here](https://github.com/jbapple/cuckoofilter/tree/crates/src) by Jim Apple ([@Jbapple](https://github.com/jbapple)).
73 |
74 | ### Papers
75 | **Bloom filter** https://en.wikipedia.org/wiki/Bloom_filter
76 |
77 | **Cuckoo Filter**
78 | ["Cuckoo Filter: Better Than Bloom" by Bin Fan, Dave Andersen and Michael Kaminsky](https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf)
79 |
80 | **SIMD blocked Bloom filter**
81 | [Cache-, Hash- and Space-Efficient Bloom Filters](https://algo2.iti.kit.edu/singler/publications/cacheefficientbloomfilters-wea2007.pdf)
82 |
83 | **Morton filter** [Morton Filters: Faster, Space-Efficient Cuckoo Filters via
84 | Biasing, Compression, and Decoupled Logical Sparsity](https://www.vldb.org/pvldb/vol11/p1041-breslow.pdf),
85 |
86 | **Quotient Filter** [A General-Purpose Counting Filter: Counting Quotient Filter (CQF)](https://www3.cs.stonybrook.edu/~ppandey/files/p775-pandey.pdf)
87 |
88 | **Pocket Dictionary** [Fully-Dynamic Space-Efficient Dictionaries and Filters with
89 | Constant Number of Memory Accesses](https://arxiv.org/pdf/1911.05060.pdf)
90 |
91 |
92 | ## To do
93 | 1. Add Filters
94 | 1. **Vacuum-Filter** [paper](http://www.vldb.org/pvldb/vol13/p197-wang.pdf) [repository](https://github.com/wuwuz/Vacuum-Filter)
95 | 2. **Quotient-Filter** [repository](https://github.com/splatlab/cqf)
96 | 2. Counting filter benchmark.
97 |
--------------------------------------------------------------------------------
/morton/util.h:
--------------------------------------------------------------------------------
1 | /*
2 | Copyright (c) 2019 Advanced Micro Devices, Inc.
3 |
4 | Permission is hereby granted, free of charge, to any person obtaining a copy
5 | of this software and associated documentation files (the "Software"), to deal
6 | in the Software without restriction, including without limitation the rights
7 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 | copies of the Software, and to permit persons to whom the Software is
9 | furnished to do so, subject to the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be included in
12 | all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20 | THE SOFTWARE.
21 |
22 | Author: Alex D. Breslow
23 | Advanced Micro Devices, Inc.
24 | AMD Research
25 | */
26 | #ifndef _UTIL_H
27 | #define _UTIL_H
28 |
29 | #include
30 | #include
31 | #include
32 | #include
33 |
34 | #include
35 |
36 | #include "vector_types.h"
37 |
38 | // FIXME: Put guards around this
39 | // For BMI2 pdep instruction
40 | #ifdef __BMI2__
41 | #include "x86intrin.h"
42 | #endif
43 |
44 | namespace util{
45 |
46 | template
47 | inline std::string bin_string(INT_TYPE integer, uint32_t spacing){
48 | std::stringstream ss;
49 | for(int32_t i = sizeof(integer) * 8 - 1; i > -1; i--){
50 | ss << ((integer >> i) & 1 ? '1' : '0');
51 | if(i % spacing == 0){
52 | ss << ' ';
53 | }
54 | }
55 | return ss.str();
56 | }
57 |
58 | template
59 | inline std::string bin_string(INT_TYPE integer){
60 | return bin_string(integer, 8 * sizeof(INT_TYPE));
61 | }
62 |
63 | // This could be implemented using fancy binary arithmatic or builtins,
64 | // but this probably suffices if the integer is known at compile time.
65 | constexpr inline uint32_t log2ceil(uint32_t integer){
66 | //return ceil(log2(integer));
67 | return 32u - __builtin_clz(integer - 1u);
68 | }
69 |
70 | // See https://lemire.me/blog/2016/06/27
71 | // These functions implement a fast alternative to the modulo reduction.
72 | // The algorithm is presented by Professor Daniel Lemire of the University
73 | // of Quebec in his outstanding blog, which is under a Creative Commons
74 | // Attribution-ShareAlike 3.0 Unported License.
75 | // See https://creativecommons.org/licenses/by-sa/3.0/us/ and
76 | // https://lemire.me/blog/terms-of-use/.
77 | template
78 | inline T fast_mod_alternative(T raw_hash, T modulus, T hash_width_in_bits);
79 |
80 | template<>
81 | inline uint64_t fast_mod_alternative(uint64_t raw_hash,
82 | uint64_t modulus, uint64_t hash_width_in_bits){
83 | return (static_cast<__uint128_t>(raw_hash) * modulus) >> hash_width_in_bits;
84 | }
85 |
86 | template<>
87 | inline uint32_t fast_mod_alternative(uint32_t raw_hash,
88 | uint32_t modulus, uint32_t hash_width_in_bits){
89 | return (static_cast<__uint64_t>(raw_hash) * modulus) >> hash_width_in_bits;
90 | }
91 |
92 | template
93 | inline TN fast_mod_alternativeN(TN raw_hashes, T modulus);
94 |
95 | template
96 | inline void print_array(const std::string& name, const ARRAY_TYPE& array){
97 | std::cout << name << " [ ";
98 | for(uint32_t i = 0; i < batch_size; i++){
99 | std::cout << static_cast(array[i]) << " ";
100 | }
101 | std::cout << "]\n";
102 | }
103 |
104 | template<>
105 | inline vN_u32 fast_mod_alternativeN(vN_u32 raw_hashes, uint32_t modulus){
106 | for(uint32_t i = 0; i < _N; i++){
107 | static_assert(_N <= 8, "Vector width exceeds AVX/AVX2's 256-bit vector width\n");
108 | raw_hashes[i] = static_cast((static_cast<__uint64_t>(raw_hashes[i]) * modulus) >> 32U);
109 | }
110 | return raw_hashes;
111 | }
112 |
113 | ////} // End of util namespace
114 |
115 | /*
116 | // FIXME: Not yet tested
117 | std::ostream& operator<<(std::ostream& os, __uint128_t integer){
118 | std::stringstream ss;
119 | __uint128_t sqrt_power10 = static_cast<__uint128_t>(10000000000000000000ull);
120 | // log10 of 2^127 is between 38 and 39, so start with 38 zeros
121 | __int128_t power10 = sqrt_power10 * sqrt_power10;
122 | while(static_cast<__uint128_t>(0) / power10 == 0){
123 | power10 /= 10;
124 | }
125 | while(power10 != 0){
126 | uint32_t digit = integer / power10;
127 | os << static_cast(digit);
128 | integer -= power10 * (digit);
129 | power10 /= 10;
130 | }
131 | return os;
132 | }*/
133 |
134 | } // End of util namespace
135 |
136 | #endif
137 |
--------------------------------------------------------------------------------
/PD_Filter/L2_pd/tpd.cpp:
--------------------------------------------------------------------------------
1 |
2 | #include "tpd.hpp"
3 |
4 |
5 | int compare_vector_and_tpd(b_vec *v, uint64_t *arr, size_t bits_to_compare) {
6 | constexpr size_t slot_size = sizeof(uint64_t) * CHAR_BIT;
7 | size_t word_count = (bits_to_compare + slot_size - 1) / slot_size;
8 | size_t bit_count = 0;
9 | for (size_t i = 0; i < word_count; i++) {
10 | uint64_t word = arr[i];
11 | size_t v_index_offset = slot_size * i;
12 | uint64_t b = 1ULL;
13 | for (size_t j = 0; j < slot_size; j++) {
14 | if (bit_count == bits_to_compare)
15 | return -1;
16 | bool a = word & b;
17 | bool b = v->at(v_index_offset + j);
18 | if (a != b) {
19 | // std::cout << "/* message */" << std::endl;
20 | return i * slot_size + j;
21 | }
22 | b <<= 1ull;
23 | bit_count++;
24 | }
25 | }
26 | return -1;
27 | }
28 |
29 |
30 | auto count_zeros_up_to_the_kth_one(uint64_t word, size_t k) -> size_t {
31 | int one_count = -1;
32 | int zero_count = 0;
33 | uint64_t b = 1ULL;
34 | for (size_t i = 0; i < 64; i++) {
35 | if (b & word) {
36 | one_count++;
37 | assert(i - one_count == zero_count);
38 | if (one_count == k) {
39 | // auto res = i - k;
40 | // assert(res == zero_count);
41 | return i - k;
42 | }
43 | } else {
44 | zero_count++;
45 | }
46 | b <<= 1ul;
47 | }
48 | std::cout << one_count << std::endl;
49 | std::cout << zero_count << std::endl;
50 | return -1;
51 | assert(false);
52 | }
53 |
54 | auto count_ones_up_to_the_kth_zero(uint64_t word, size_t k) -> size_t {
55 | int zero_count = -1;
56 | int one_count = 0;
57 | uint64_t b = 1ULL;
58 | for (size_t i = 0; i < 64; i++) {
59 | if (b & word) {
60 | one_count++;
61 | } else {
62 | zero_count++;
63 | assert(i - zero_count == one_count);
64 | if (zero_count == k) {
65 | // auto res = i - k;
66 | // assert(res == zero_count);
67 | return i - k;
68 | }
69 | }
70 | b <<= 1ul;
71 | }
72 | std::cout << one_count << std::endl;
73 | std::cout << zero_count << std::endl;
74 | return -1;
75 | assert(false);
76 | }
77 |
78 |
79 | auto count_zeros_up_to_the_kth_one(const uint64_t *pd, size_t k) -> size_t {
80 | uint64_t pop1 = _mm_popcnt_u64(pd[0]);
81 | if (pop1 >= k) {
82 | return count_zeros_up_to_the_kth_one(pd[0], k);
83 | }
84 | uint64_t pop2 = _mm_popcnt_u64(pd[1]);
85 | assert(pop1 + pop2 >= k);
86 | auto w1_zeros = 64 - pop1;
87 | auto w2_zeros = count_zeros_up_to_the_kth_one(pd[1], k - pop1);
88 | return w1_zeros + w2_zeros;
89 | }
90 |
91 | auto count_ones_up_to_the_kth_zero(const uint64_t *arr, size_t k) -> size_t {
92 | uint64_t pop1 = _mm_popcnt_u64(pd[0]);
93 | size_t z1 = 64 - pop1;
94 | if (z1 >= k) {
95 | return count_ones_up_to_the_kth_zero(pd[0], k);
96 | }
97 | uint64_t pop2 = _mm_popcnt_u64(pd[1]);
98 | size_t z2 = 64 - pop2;
99 | assert(z1 + z2 >= k);
100 | auto o2 = count_ones_up_to_the_kth_zero(pd[1], k - z1);
101 | return pop1 + o2;
102 | }
103 |
104 | void read_bits_easy_case(const uint64_t *src, uint64_t *dest, size_t start, size_t end) {
105 | bool start_is_x8 = (start & 7 == 0);
106 | assert(start_is_x8);
107 | const uint64_t offset = start / CHAR_BIT;
108 | const uint64_t kBytes2copy = (end - start + CHAR_BIT - 1) / CHAR_BIT;
109 | memcpy(dest, ((uint8_t *) src) + offset, kBytes2copy);
110 |
111 | size_t last_index = (end - start + 63) / 64;
112 | uint64_t last_word_bits = (end - start) & 63;
113 | uint64_t mask = MASK(last_word_bits);
114 | dest[last_index] &= mask;
115 | }
116 |
117 | /**
118 | * @brief Clear the first shift bits in the array, and move the data shift places to the right.
119 | *
120 | * @param a
121 | * @param a_size
122 | * @param shift
123 | */
124 | void right_shift_array(uint64_t *a, size_t a_size, uint64_t shift) {
125 | for (size_t i = 0; i < a_size - 1; i++) {
126 | a[i] >>= shift;
127 | uint64_t temp = (a[i + 1] << (64 - shift));
128 | a[i] |= temp;
129 | }
130 | a[a_size - 1] >>= shift;
131 | }
132 |
133 | void read_bits(const uint64_t *src, uint64_t *dest, size_t start, size_t end) {
134 | bool start_is_x8 = (start & 7 == 0);
135 |
136 | if (start_is_x8) {
137 | read_bits_easy_case(src, dest, start, end);
138 | return;
139 | }
140 |
141 | size_t new_start = (start / 8) * 8;
142 | assert(new_start < start);
143 | read_bits_easy_case(src, dest, new_start, end);
144 |
145 | uint64_t shift = (start & 7);
146 |
147 | // start or new_start?
148 | size_t dest_size = (end - new_start + 63) / 64;
149 | right_shift_array(dest, dest_size, shift);
150 | }
151 |
152 | bool test_bit(const uint64_t *a, size_t bit_index){
153 | constexpr uint64_t slot_size = sizeof(a[0]) * CHAR_BIT;
154 | const size_t word_index = bit_index / slot_size;
155 | const size_t rel_index = bit_index % slot_size;
156 | return a[word_index] & (1ULL << rel_index);
157 | }
158 |
--------------------------------------------------------------------------------
/hashutil.h:
--------------------------------------------------------------------------------
1 |
2 | #ifndef HASHUTIL_H_
3 | #define HASHUTIL_H_
4 |
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 | #include
12 |
13 | #include "Hash_functions/BobJenkins.h"
14 | #include "Hash_functions/wyhash.h"
15 | #include "Hash_functions/xxhash64.h"
16 | #include
17 | #include
18 | #include
19 |
20 | // #include "Hash_functions/woothash.h"
21 |
22 | namespace hashing {
23 | // See Martin Dietzfelbinger, "Universal hashing and k-wise independent random
24 | // variables via integer arithmetic without primes".
25 | class TwoIndependentMultiplyShift {
26 | unsigned __int128 multiply_, add_;
27 |
28 | public:
29 | TwoIndependentMultiplyShift() {
30 | ::std::random_device random;
31 | for (auto v : {&multiply_, &add_}) {
32 | *v = random();
33 | for (int i = 1; i <= 4; ++i) {
34 | *v = *v << 32;
35 | *v |= random();
36 | }
37 | }
38 | }
39 |
40 | /**
41 | * @brief Construct a new Two Independent Multiply Shift object
42 | * Disable the randomness for debugging.
43 | *
44 | * @param seed1 Garbage
45 | * @param seed2 Garbage
46 | */
47 | TwoIndependentMultiplyShift(unsigned __int128 seed1, unsigned __int128 seed2) {
48 | std::cout << "hash function is pseudo random" << std::endl;
49 |
50 | multiply_ = 0xaaaa'bbbb'cccc'dddd;
51 | multiply_ <<= 64;
52 | multiply_ |= 0xeeee'ffff'1111'0000;
53 | add_ = 0xaaaa'aaaa'bbbb'bbbb;
54 | add_ <<= 64;
55 | add_ |= 0xcccc'cccc'dddd'dddd;
56 |
57 | assert(multiply_ > 18446744073709551615ULL);
58 | assert(add_ > 18446744073709551615ULL);
59 | }
60 |
61 | inline uint64_t operator()(uint64_t key) const {
62 | return (add_ + multiply_ * static_cast(key)) >> 64;
63 | }
64 |
65 | inline uint32_t hash32(uint64_t key) const {
66 | return ((uint32_t)(add_ + multiply_ * static_cast(key)));
67 | }
68 | auto get_name() const -> std::string {
69 | return "TwoIndependentMultiplyShift";
70 | }
71 | };
72 |
73 | class SimpleMixSplit {
74 |
75 | public:
76 | uint64_t seed;
77 |
78 | SimpleMixSplit() {
79 | ::std::random_device random;
80 | seed = random();
81 | seed <<= 32;
82 | seed |= random();
83 | }
84 |
85 | inline static uint64_t murmur64(uint64_t h) {
86 | h ^= h >> 33;
87 | h *= UINT64_C(0xff51afd7ed558ccd);
88 | h ^= h >> 33;
89 | h *= UINT64_C(0xc4ceb9fe1a85ec53);
90 | h ^= h >> 33;
91 | return h;
92 | }
93 |
94 | inline uint64_t operator()(uint64_t key) const {
95 | return murmur64(key + seed);
96 | }
97 | };
98 |
99 | class my_xxhash64 {
100 | uint64_t seed;
101 |
102 | public:
103 | my_xxhash64() {
104 | seed = random();
105 | }
106 | inline uint64_t operator()(uint64_t key) const {
107 | return XXHash64::hash(&key, 8, seed);
108 | }
109 | auto get_name() const -> std::string {
110 | return "xxhash64";
111 | }
112 | };
113 |
114 | class my_wyhash64 {
115 | uint64_t seed;
116 |
117 | public:
118 | my_wyhash64() {
119 | seed = random();
120 | }
121 | inline uint64_t operator()(uint64_t key) const {
122 | return wyhash64(key, seed);
123 | }
124 |
125 | auto get_name() const -> std::string {
126 | return "wyhash64";
127 | }
128 | };
129 |
130 | class my_BobHash {
131 | uint64_t seed1, seed2;
132 |
133 | public:
134 | my_BobHash() {
135 | seed1 = random();
136 | seed2 = random();
137 | }
138 |
139 |
140 | inline uint64_t operator()(uint32_t s) const {
141 | uint32_t out1 = seed1, out2 = seed2;
142 | void BobHash(const void *buf, size_t length, uint32_t *idx1, uint32_t *idx2);
143 | BobJenkins::BobHash((void *) &s, 4, &out1, &out2);
144 | return ((uint64_t) out1 << 32ul) | ((uint64_t) out2);
145 |
146 | // return BobJenkins::BobHash((void *) &s, 4, seed);
147 | }
148 |
149 | // inline uint64_t operator()(uint64_t s) const {
150 | // return BobJenkins::BobHash((void *) &s, 8, seed);
151 | // }
152 |
153 | auto get_name() const -> std::string {
154 | return "BobHash";
155 | }
156 | };
157 |
158 | inline uint32_t hashint(uint32_t a) {
159 | a = (a + 0x7ed55d16) + (a << 12);
160 | a = (a ^ 0xc761c23c) ^ (a >> 19);
161 | a = (a + 0x165667b1) + (a << 5);
162 | a = (a + 0xd3a2646c) ^ (a << 9);
163 | a = (a + 0xfd7046c5) + (a << 3);
164 | a = (a ^ 0xb55a4f09) ^ (a >> 16);
165 | return a;
166 | }
167 |
168 | inline uint32_t hashint(uint64_t a) {
169 | a = (a + 0x7ed55d16) + (a << 12);
170 | a = (a ^ 0xc761c23c) ^ (a >> 19);
171 | a = (a + 0x165667b1) + (a << 5);
172 | a = (a + 0xd3a2646c) ^ (a << 9);
173 | a = (a + 0xfd7046c5) + (a << 3);
174 | a = (a ^ 0xb55a4f09) ^ (a >> 16);
175 | return a;
176 | }
177 |
178 |
179 | }// namespace hashing
180 |
181 | #endif// CUCKOO_FILTER_HASHUTIL_H_
182 |
--------------------------------------------------------------------------------
/cuckoofilter/src/simd-block.h:
--------------------------------------------------------------------------------
1 | // Copied from Apache Impala (incubating), usable under the terms in the Apache License,
2 | // Version 2.0.
3 |
4 | // This is a block Bloom filter (from Putze et al.'s "Cache-, Hash- and Space-Efficient
5 | // Bloom Filters") with some twists:
6 | //
7 | // 1. Each block is a split Bloom filter - see Section 2.1 of Broder and Mitzenmacher's
8 | // "Network Applications of Bloom Filters: A Survey".
9 | //
10 | // 2. The number of bits set per Add() is contant in order to take advantage of SIMD
11 | // instructions.
12 |
13 | #pragma once
14 |
15 | #include
16 | #include
17 |
18 | #include
19 | #include
20 |
21 | #include
22 |
23 | #include "hashutil.h"
24 |
25 | using uint32_t = ::std::uint32_t;
26 | using uint64_t = ::std::uint64_t;
27 |
28 | template
29 | class SimdBlockFilter {
30 | private:
31 | // The filter is divided up into Buckets:
32 | using Bucket = uint32_t[8];
33 |
34 | // log2(number of bytes in a bucket):
35 | static constexpr int LOG_BUCKET_BYTE_SIZE = 5;
36 |
37 | static_assert(
38 | (1 << LOG_BUCKET_BYTE_SIZE) == sizeof(Bucket) && sizeof(Bucket) == sizeof(__m256i),
39 | "Bucket sizing has gone awry.");
40 |
41 | // log_num_buckets_ is the log (base 2) of the number of buckets in the directory:
42 | const int log_num_buckets_;
43 |
44 | // directory_mask_ is (1 << log_num_buckets_) - 1. It is precomputed in the contructor
45 | // for efficiency reasons:
46 | const uint32_t directory_mask_;
47 |
48 | Bucket* directory_;
49 |
50 | HashFamily hasher_;
51 |
52 | public:
53 | // Consumes at most (1 << log_heap_space) bytes on the heap:
54 | explicit SimdBlockFilter(const int log_heap_space);
55 | SimdBlockFilter(SimdBlockFilter&& that)
56 | : log_num_buckets_(that.log_num_buckets_),
57 | directory_mask_(that.directory_mask_),
58 | directory_(that.directory_),
59 | hasher_(that.hasher_) {}
60 | ~SimdBlockFilter() noexcept;
61 | void Add(const uint64_t key) noexcept;
62 | bool Find(const uint64_t key) const noexcept;
63 | uint64_t SizeInBytes() const { return sizeof(Bucket) * (1ull << log_num_buckets_); }
64 |
65 | private:
66 | // A helper function for Insert()/Find(). Turns a 32-bit hash into a 256-bit Bucket
67 | // with 1 single 1-bit set in each 32-bit lane.
68 | static __m256i MakeMask(const uint32_t hash) noexcept;
69 |
70 | SimdBlockFilter(const SimdBlockFilter&) = delete;
71 | void operator=(const SimdBlockFilter&) = delete;
72 | };
73 |
74 | template
75 | SimdBlockFilter::SimdBlockFilter(const int log_heap_space)
76 | : // Since log_heap_space is in bytes, we need to convert it to the number of Buckets
77 | // we will use.
78 | log_num_buckets_(::std::max(1, log_heap_space - LOG_BUCKET_BYTE_SIZE)),
79 | // Don't use log_num_buckets_ if it will lead to undefined behavior by a shift that is
80 | // too large.
81 | directory_mask_((1ull << ::std::min(63, log_num_buckets_)) - 1),
82 | directory_(nullptr),
83 | hasher_() {
84 | if (!__builtin_cpu_supports("avx2")) {
85 | throw ::std::runtime_error("SimdBlockFilter does not work without AVX2 instructions");
86 | }
87 | const size_t alloc_size = 1ull << (log_num_buckets_ + LOG_BUCKET_BYTE_SIZE);
88 | const int malloc_failed =
89 | posix_memalign(reinterpret_cast(&directory_), 64, alloc_size);
90 | if (malloc_failed) throw ::std::bad_alloc();
91 | memset(directory_, 0, alloc_size);
92 | }
93 |
94 | template
95 | SimdBlockFilter::~SimdBlockFilter() noexcept {
96 | free(directory_);
97 | directory_ = nullptr;
98 | }
99 |
100 | // The SIMD reinterpret_casts technically violate C++'s strict aliasing rules. However, we
101 | // compile with -fno-strict-aliasing.
102 | template
103 | [[gnu::always_inline]] inline __m256i
104 | SimdBlockFilter::MakeMask(const uint32_t hash) noexcept {
105 | const __m256i ones = _mm256_set1_epi32(1);
106 | // Odd contants for hashing:
107 | const __m256i rehash = _mm256_setr_epi32(0x47b6137bU, 0x44974d91U, 0x8824ad5bU,
108 | 0xa2b7289dU, 0x705495c7U, 0x2df1424bU, 0x9efc4947U, 0x5c6bfb31U);
109 | // Load hash into a YMM register, repeated eight times
110 | __m256i hash_data = _mm256_set1_epi32(hash);
111 | // Multiply-shift hashing ala Dietzfelbinger et al.: multiply 'hash' by eight different
112 | // odd constants, then keep the 5 most significant bits from each product.
113 | hash_data = _mm256_mullo_epi32(rehash, hash_data);
114 | hash_data = _mm256_srli_epi32(hash_data, 27);
115 | // Use these 5 bits to shift a single bit to a location in each 32-bit lane
116 | return _mm256_sllv_epi32(ones, hash_data);
117 | }
118 |
119 | template
120 | [[gnu::always_inline]] inline void
121 | SimdBlockFilter::Add(const uint64_t key) noexcept {
122 | const auto hash = hasher_(key);
123 | const uint32_t bucket_idx = hash & directory_mask_;
124 | const __m256i mask = MakeMask(hash >> log_num_buckets_);
125 | __m256i* const bucket = &reinterpret_cast<__m256i*>(directory_)[bucket_idx];
126 | _mm256_store_si256(bucket, _mm256_or_si256(*bucket, mask));
127 | }
128 |
129 | template
130 | [[gnu::always_inline]] inline bool
131 | SimdBlockFilter::Find(const uint64_t key) const noexcept {
132 | const auto hash = hasher_(key);
133 | const uint32_t bucket_idx = hash & directory_mask_;
134 | const __m256i mask = MakeMask(hash >> log_num_buckets_);
135 | const __m256i bucket = reinterpret_cast<__m256i*>(directory_)[bucket_idx];
136 | // We should return true if 'bucket' has a one wherever 'mask' does. _mm256_testc_si256
137 | // takes the negation of its first argument and ands that with its second argument. In
138 | // our case, the result is zero everywhere iff there is a one in 'bucket' wherever
139 | // 'mask' is one. testc returns 1 if the result is 0 everywhere and returns 0 otherwise.
140 | return _mm256_testc_si256(bucket, mask);
141 | }
142 |
--------------------------------------------------------------------------------
/Tests/minimal_tests.hpp:
--------------------------------------------------------------------------------
1 | #ifndef FILTERS_MINIMAL_TESTS_HPP
2 | #define FILTERS_MINIMAL_TESTS_HPP
3 |
4 | #include "new_tests.hpp"
5 | #include
6 |
7 | template
8 | auto default_validation_test_single() -> bool {
9 | std::stringstream ss;
10 | // return w_validate_filter(1000ul, 2000ul, 8ul, 42.0, 42.0, &ss);
11 | return w_validate_filter(100000ul, 200000ul, 8ul, 42.0, 42.0, &ss);
12 | }
13 |
14 | template
15 | auto default_validation_test_single_with_deletions() -> bool {
16 | std::stringstream ss;
17 | // return w_validate_filter(1000ul, 2000ul, 8ul, 42.0, 42.0, &ss);
18 | return w_validate_filter(100000ul, 200000ul, 8ul, 42.0, 42.0, &ss, true);
19 | }
20 |
21 |
22 | template
23 | auto validation_test_single(size_t filter_max_capacity, size_t lookup_reps) -> bool {
24 | std::stringstream ss;
25 | return w_validate_filter(filter_max_capacity, lookup_reps, 8ul, 42.0, 42.0, &ss);
26 | }
27 |
28 |
29 |
30 | template
31 | auto single_fp_rates(size_t filter_max_capacity, size_t lookup_reps, size_t bits_per_item, bool validate_before_benchmarking, vector *> *elements) -> std::stringstream {
32 | Table filter = FilterAPI::ConstructFromAddCount(filter_max_capacity);
33 | string filter_name = FilterAPI::get_name(&filter);
34 |
35 | bool valid = true;
36 | if (validate_before_benchmarking) {
37 | valid = default_validation_test_single();
38 | }
39 | if (valid) {
40 | std::stringstream end;
41 | auto tp = fp_rates_single_filter(&filter, elements);
42 | end = print_single_round_false_positive_rates(filter_name, lookup_reps, bits_per_item, std::get<1>(tp), std::get<0>(tp), true);
43 | std::cout << end.str();
44 | return end;
45 | }
46 | std::stringstream end;
47 | return end;
48 | }
49 |
50 |
51 | template
52 | auto single_fp_rates(size_t filter_max_capacity, size_t lookup_reps, size_t bits_per_item, bool validate_before_benchmarking) -> std::stringstream {
53 | unordered_set v_add, v_find;//, v_delete;
54 | size_t add_size = std::floor(filter_max_capacity * 1);
55 | set_init(add_size, &v_add);
56 | set_init(lookup_reps, &v_find);
57 | vector *> elements{&v_add, &v_find};//, &v_delete};
58 | return single_fp_rates(filter_max_capacity, lookup_reps, bits_per_item, validate_before_benchmarking, &elements);
59 | }
60 |
61 | template
62 | auto single_fp_rates_probabilistic(size_t filter_max_capacity, size_t lookup_reps, size_t bits_per_item, vector *> *elements) -> std::stringstream {
63 | Table filter = FilterAPI::ConstructFromAddCount(filter_max_capacity);
64 | string filter_name = FilterAPI::get_name(&filter);
65 |
66 | auto tp = fp_rates_single_filter_probabilistic(&filter, elements);
67 | size_t false_counter = std::get<0>(tp);
68 | size_t true_counter = std::get<1>(tp);
69 | assert(false_counter >= true_counter);
70 | std::stringstream end = print_single_round_false_positive_rates_probabilistic(filter_name, lookup_reps, bits_per_item, true_counter, false_counter, true);
71 | std::cout << end.str();
72 | return end;
73 | }
74 |
75 |
76 | template
77 | auto single_fp_rates_probabilistic(size_t filter_max_capacity, size_t lookup_reps, size_t bits_per_item) -> std::stringstream {
78 | vector v_add, v_find;//, v_delete;
79 | vector *> elements{&v_add, &v_find};//, &v_delete};
80 | init_elements(filter_max_capacity, lookup_reps, &elements, false);
81 | return single_fp_rates_probabilistic(filter_max_capacity, lookup_reps, bits_per_item, &elements);
82 | }
83 |
84 |
85 | template
86 | void single_bench(size_t filter_max_capacity, size_t bench_precision, bool validate_before_benchmarking, vector *> *elements) {
87 | bool valid = true;
88 | if (validate_before_benchmarking) {
89 | valid = default_validation_test_single();
90 |
91 | }
92 | if (valid) {
93 | std::stringstream ss;
94 | ss = benchmark_single_filter_wrapper(filter_max_capacity, bench_precision, elements);
95 | }
96 | }
97 |
98 |
99 | template
100 | void single_bench(size_t filter_max_capacity, size_t lookup_reps, size_t bench_precision, bool validate_before_benchmarking, bool with_deletions) {
101 | vector v_add, v_find, v_delete;
102 | vector *> elements{&v_add, &v_find, &v_delete};
103 | init_elements(filter_max_capacity, lookup_reps, &elements, bench_precision, with_deletions);
104 | return single_bench(filter_max_capacity, bench_precision, validate_before_benchmarking, &elements);
105 | }
106 |
107 | // template
108 | // void single_bench_with_del(size_t filter_max_capacity, size_t bench_precision, bool validate_before_benchmarking, vector *> *elements) {
109 | // // vector v_add, v_find, v_delete;
110 | // // vector *> elements{&v_add, &v_find, &v_delete};
111 | // // init_elements(filter_max_capacity, lookup_reps, &elements, bench_precision, with_deletions);
112 |
113 | // bool valid = true;
114 | // if (validate_before_benchmarking) {
115 | // valid = default_validation_test_single();
116 |
117 | // }
118 | // if (valid) {
119 | // std::stringstream ss;
120 | // ss = benchmark_single_filter_wrapper(filter_max_capacity, bench_precision, elements);
121 | // }
122 | // // return single_bench(filter_max_capacity, bench_precision, validate_before_benchmarking, &elements);
123 | // }
124 |
125 |
126 | #endif// FILTERS_MINIMAL_TESTS_HPP
--------------------------------------------------------------------------------
/Bloom_Filter/simd-block.h:
--------------------------------------------------------------------------------
1 | // Copied from Apache Impala (incubating), usable under the terms in the Apache License,
2 | // Version 2.0.
3 |
4 | // This is a block Bloom filter (from Putze et al.'s "Cache-, Hash- and Space-Efficient
5 | // Bloom Filters") with some twists:
6 | //
7 | // 1. Each block is a split Bloom filter - see Section 2.1 of Broder and Mitzenmacher's
8 | // "Network Applications of Bloom Filters: A Survey".
9 | //
10 | // 2. The number of bits set per Add() is contant in order to take advantage of SIMD
11 | // instructions.
12 |
13 | #pragma once
14 |
15 | #include
16 | #include
17 |
18 | #include
19 | #include
20 |
21 | #include "../hashutil.h"
22 | #include
23 |
24 |
25 | using uint32_t = ::std::uint32_t;
26 | using uint64_t = ::std::uint64_t;
27 |
28 | template
29 | class SimdBlockFilter {
30 | private:
31 | // The filter is divided up into Buckets:
32 | using Bucket = uint32_t[8];
33 |
34 | // log2(number of bytes in a bucket):
35 | static constexpr int LOG_BUCKET_BYTE_SIZE = 5;
36 |
37 | static_assert(
38 | (1 << LOG_BUCKET_BYTE_SIZE) == sizeof(Bucket) && sizeof(Bucket) == sizeof(__m256i),
39 | "Bucket sizing has gone awry.");
40 |
41 | // log_num_buckets_ is the log (base 2) of the number of buckets in the directory:
42 | const int log_num_buckets_;
43 |
44 | // directory_mask_ is (1 << log_num_buckets_) - 1. It is precomputed in the contructor
45 | // for efficiency reasons:
46 | const uint32_t directory_mask_;
47 |
48 | Bucket *directory_;
49 |
50 | HashFamily hasher_;
51 |
52 | public:
53 | // Consumes at most (1 << log_heap_space) bytes on the heap:
54 | explicit SimdBlockFilter(const int log_heap_space);
55 |
56 | SimdBlockFilter(SimdBlockFilter &&that)
57 | : log_num_buckets_(that.log_num_buckets_),
58 | directory_mask_(that.directory_mask_),
59 | directory_(that.directory_),
60 | hasher_(that.hasher_) {}
61 |
62 | ~SimdBlockFilter() noexcept;
63 |
64 | void Add(const uint64_t key) noexcept;
65 |
66 | bool Find(const uint64_t key) const noexcept;
67 |
68 | uint64_t SizeInBytes() const { return sizeof(Bucket) * (1ull << log_num_buckets_); }
69 |
70 | private:
71 | // A helper function for Insert()/Find(). Turns a 32-bit hash into a 256-bit Bucket
72 | // with 1 single 1-bit set in each 32-bit lane.
73 | static __m256i MakeMask(const uint32_t hash) noexcept;
74 |
75 | SimdBlockFilter(const SimdBlockFilter &) = delete;
76 |
77 | void operator=(const SimdBlockFilter &) = delete;
78 | };
79 |
80 | template
81 | SimdBlockFilter::SimdBlockFilter(const int log_heap_space)
82 | : // Since log_heap_space is in bytes, we need to convert it to the number of Buckets
83 | // we will use.
84 | log_num_buckets_(::std::max(1, log_heap_space - LOG_BUCKET_BYTE_SIZE)),
85 | // Don't use log_num_buckets_ if it will lead to undefined behavior by a shift that is
86 | // too large.
87 | directory_mask_((1ull << ::std::min(63, log_num_buckets_)) - 1),
88 | directory_(nullptr),
89 | hasher_() {
90 | if (!__builtin_cpu_supports("avx2")) {
91 | throw ::std::runtime_error("SimdBlockFilter does not work without AVX2 instructions");
92 | }
93 | const size_t alloc_size = 1ull << (log_num_buckets_ + LOG_BUCKET_BYTE_SIZE);
94 | const int malloc_failed =
95 | posix_memalign(reinterpret_cast(&directory_), 64, alloc_size);
96 | if (malloc_failed) throw ::std::bad_alloc();
97 | memset(directory_, 0, alloc_size);
98 | }
99 |
100 | template
101 | SimdBlockFilter::~SimdBlockFilter() noexcept {
102 | free(directory_);
103 | directory_ = nullptr;
104 | }
105 |
106 | // The SIMD reinterpret_casts technically violate C++'s strict aliasing rules. However, we
107 | // compile with -fno-strict-aliasing.
108 | template
109 | [[gnu::always_inline]] inline __m256i
110 | SimdBlockFilter::MakeMask(const uint32_t hash) noexcept {
111 | const __m256i ones = _mm256_set1_epi32(1);
112 | // Odd contants for hashing:
113 | const __m256i rehash = _mm256_setr_epi32(0x47b6137bU, 0x44974d91U, 0x8824ad5bU,
114 | 0xa2b7289dU, 0x705495c7U, 0x2df1424bU, 0x9efc4947U, 0x5c6bfb31U);
115 | // Load hash into a YMM register, repeated eight times
116 | __m256i hash_data = _mm256_set1_epi32(hash);
117 | // Multiply-shift hashing ala Dietzfelbinger et al.: multiply 'hash' by eight different
118 | // odd constants, then keep the 5 most significant bits from each product.
119 | hash_data = _mm256_mullo_epi32(rehash, hash_data);
120 | hash_data = _mm256_srli_epi32(hash_data, 27);
121 | // Use these 5 bits to shift a single bit to a location in each 32-bit lane
122 | return _mm256_sllv_epi32(ones, hash_data);
123 | }
124 |
125 | template
126 | [[gnu::always_inline]] inline void
127 | SimdBlockFilter::Add(const uint64_t key) noexcept {
128 | const auto hash = hasher_(key);
129 | const uint32_t bucket_idx = hash & directory_mask_;
130 | const __m256i mask = MakeMask(hash >> log_num_buckets_);
131 | __m256i *const bucket = &reinterpret_cast<__m256i *>(directory_)[bucket_idx];
132 | _mm256_store_si256(bucket, _mm256_or_si256(*bucket, mask));
133 | }
134 |
135 | template
136 | [[gnu::always_inline]] inline bool
137 | SimdBlockFilter::Find(const uint64_t key) const noexcept {
138 | const auto hash = hasher_(key);
139 | const uint32_t bucket_idx = hash & directory_mask_;
140 | const __m256i mask = MakeMask(hash >> log_num_buckets_);
141 | const __m256i bucket = reinterpret_cast<__m256i *>(directory_)[bucket_idx];
142 | // We should return true if 'bucket' has a one wherever 'mask' does. _mm256_testc_si256
143 | // takes the negation of its first argument and ands that with its second argument. In
144 | // our case, the result is zero everywhere iff there is a one in 'bucket' wherever
145 | // 'mask' is one. testc returns 1 if the result is 0 everywhere and returns 0 otherwise.
146 | return _mm256_testc_si256(bucket, mask);
147 | }
148 |
--------------------------------------------------------------------------------
/Hash_functions/wyhash.h:
--------------------------------------------------------------------------------
1 | //Author: Wang Yi
2 | #ifndef wyhash_final_version
3 | #define wyhash_final_version
4 | //defines that change behavior
5 | #ifndef WYHASH_CONDOM
6 | #define WYHASH_CONDOM 1 //0,1,2
7 | #endif
8 | #define WYHASH_32BIT_MUM 0 //faster on 32 bit system
9 | //includes
10 | #include
11 | #include
12 | #if defined(_MSC_VER) && defined(_M_X64)
13 | #include
14 | #pragma intrinsic(_umul128)
15 | #endif
16 | #if defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
17 | #define _likely_(x) __builtin_expect(x,1)
18 | #define _unlikely_(x) __builtin_expect(x,0)
19 | #else
20 | #define _likely_(x) (x)
21 | #define _unlikely_(x) (x)
22 | #endif
23 | //mum function
24 | static inline uint64_t _wyrot(uint64_t x) { return (x>>32)|(x<<32); }
25 | static inline void _wymum(uint64_t *A, uint64_t *B){
26 | #if(WYHASH_32BIT_MUM)
27 | uint64_t hh=(*A>>32)*(*B>>32), hl=(*A>>32)*(unsigned)*B, lh=(unsigned)*A*(*B>>32), ll=(uint64_t)(unsigned)*A*(unsigned)*B;
28 | #if(WYHASH_CONDOM>1)
29 | *A^=_wyrot(hl)^hh; *B^=_wyrot(lh)^ll;
30 | #else
31 | *A=_wyrot(hl)^hh; *B=_wyrot(lh)^ll;
32 | #endif
33 | #elif defined(__SIZEOF_INT128__)
34 | __uint128_t r=*A; r*=*B;
35 | #if(WYHASH_CONDOM>1)
36 | *A^=(uint64_t)r; *B^=(uint64_t)(r>>64);
37 | #else
38 | *A=(uint64_t)r; *B=(uint64_t)(r>>64);
39 | #endif
40 | #elif defined(_MSC_VER) && defined(_M_X64)
41 | #if(WYHASH_CONDOM>1)
42 | uint64_t a, b;
43 | a=_umul128(*A,*B,&b);
44 | *A^=a; *B^=b;
45 | #else
46 | *A=_umul128(*A,*B,B);
47 | #endif
48 | #else
49 | uint64_t ha=*A>>32, hb=*B>>32, la=(uint32_t)*A, lb=(uint32_t)*B, hi, lo;
50 | uint64_t rh=ha*hb, rm0=ha*lb, rm1=hb*la, rl=la*lb, t=rl+(rm0<<32), c=t>32)+(rm1>>32)+c;
52 | #if(WYHASH_CONDOM>1)
53 | *A^=lo; *B^=hi;
54 | #else
55 | *A=lo; *B=hi;
56 | #endif
57 | #endif
58 | }
59 | static inline uint64_t _wymix(uint64_t A, uint64_t B){ _wymum(&A,&B); return A^B; }
60 | //read functions
61 | #ifndef WYHASH_LITTLE_ENDIAN
62 | #if defined(_WIN32) || defined(__LITTLE_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
63 | #define WYHASH_LITTLE_ENDIAN 1
64 | #elif defined(__BIG_ENDIAN__) || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
65 | #define WYHASH_LITTLE_ENDIAN 0
66 | #endif
67 | #endif
68 | #if (WYHASH_LITTLE_ENDIAN)
69 | static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return v;}
70 | static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return v;}
71 | #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__clang__)
72 | static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return __builtin_bswap64(v);}
73 | static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return __builtin_bswap32(v);}
74 | #elif defined(_MSC_VER)
75 | static inline uint64_t _wyr8(const uint8_t *p) { uint64_t v; memcpy(&v, p, 8); return _byteswap_uint64(v);}
76 | static inline uint64_t _wyr4(const uint8_t *p) { unsigned v; memcpy(&v, p, 4); return _byteswap_ulong(v);}
77 | #endif
78 | static inline uint64_t _wyr3(const uint8_t *p, unsigned k) { return (((uint64_t)p[0])<<16)|(((uint64_t)p[k>>1])<<8)|p[k-1];}
79 | //wyhash function
80 | static inline uint64_t _wyfinish16(const uint8_t *p, uint64_t len, uint64_t seed, const uint64_t *secret, uint64_t i){
81 | #if(WYHASH_CONDOM>0)
82 | uint64_t a, b;
83 | if(_likely_(i<=8)){
84 | if(_likely_(i>=4)){ a=_wyr4(p); b=_wyr4(p+i-4); }
85 | else if (_likely_(i)){ a=_wyr3(p,i); b=0; }
86 | else a=b=0;
87 | }
88 | else{ a=_wyr8(p); b=_wyr8(p+i-8); }
89 | return _wymix(secret[1]^len,_wymix(a^secret[1], b^seed));
90 | #else
91 | #define oneshot_shift ((i<8)*((8-i)<<3))
92 | return _wymix(secret[1]^len,_wymix((_wyr8(p)<>oneshot_shift)^seed));
93 | #endif
94 | }
95 |
96 | static inline uint64_t _wyfinish(const uint8_t *p, uint64_t len, uint64_t seed, const uint64_t *secret, uint64_t i){
97 | if(_likely_(i<=16)) return _wyfinish16(p,len,seed,secret,i);
98 | return _wyfinish(p+16,len,_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed),secret,i-16);
99 | }
100 |
101 | static inline uint64_t wyhash(const void *key, uint64_t len, uint64_t seed, const uint64_t *secret){
102 | const uint8_t *p=(const uint8_t *)key;
103 | uint64_t i=len; seed^=*secret;
104 | if(_unlikely_(i>64)){
105 | uint64_t see1=seed;
106 | do{
107 | seed=_wymix(_wyr8(p)^secret[1],_wyr8(p+8)^seed)^_wymix(_wyr8(p+16)^secret[2],_wyr8(p+24)^seed);
108 | see1=_wymix(_wyr8(p+32)^secret[3],_wyr8(p+40)^see1)^_wymix(_wyr8(p+48)^secret[4],_wyr8(p+56)^see1);
109 | p+=64; i-=64;
110 | }while(i>64);
111 | seed^=see1;
112 | }
113 | return _wyfinish(p,len,seed,secret,i);
114 | }
115 | //utility functions
116 | const uint64_t _wyp[5] = {0xa0761d6478bd642full, 0xe7037ed1a0b428dbull, 0x8ebc6af09c88c6e3ull, 0x589965cc75374cc3ull, 0x1d8e4e27c47d124full};
117 | static inline uint64_t wyhash64(uint64_t A, uint64_t B){ A^=_wyp[0]; B^=_wyp[1]; _wymum(&A,&B); return _wymix(A^_wyp[0],B^_wyp[1]);}
118 | static inline uint64_t wyrand(uint64_t *seed){ *seed+=_wyp[0]; return _wymix(*seed,*seed^_wyp[1]);}
119 | static inline double wy2u01(uint64_t r){ const double _wynorm=1.0/(1ull<<52); return (r>>12)*_wynorm;}
120 | static inline double wy2gau(uint64_t r){ const double _wynorm=1.0/(1ull<<20); return ((r&0x1fffff)+((r>>21)&0x1fffff)+((r>>42)&0x1fffff))*_wynorm-3.0;}
121 | static inline void make_secret(uint64_t seed, uint64_t *secret){
122 | uint8_t c[] = {15, 23, 27, 29, 30, 39, 43, 45, 46, 51, 53, 54, 57, 58, 60, 71, 75, 77, 78, 83, 85, 86, 89, 90, 92, 99, 101, 102, 105, 106, 108, 113, 114, 116, 120, 135, 139, 141, 142, 147, 149, 150, 153, 154, 156, 163, 165, 166, 169, 170, 172, 177, 178, 180, 184, 195, 197, 198, 201, 202, 204, 209, 210, 212, 216, 225, 226, 228, 232, 240 };
123 | for(size_t i=0;i<5;i++){
124 | uint8_t ok;
125 | do{
126 | ok=1; secret[i]=0;
127 | for(size_t j=0;j<64;j+=8) secret[i]|=((uint64_t)c[wyrand(&seed)%sizeof(c)])< y_quot
19 | * 2) z_quot = y_quot AND z_rem >= y_rem.
20 | */
21 | #ifndef FILTERS_SPARE_VALIDATOR_HPP
22 | #define FILTERS_SPARE_VALIDATOR_HPP
23 |
24 |
25 | #include "../../hashutil.h"
26 | #include "../L2_pd/twoDimPD.hpp"
27 | #include "../basic_function_util.h"
28 | #include "../macros.h"
29 | #include
30 | #include
31 |
32 | namespace SV {
33 | enum operations {
34 | Add_op = 1,
35 | Remove_op = -1,
36 | Pop_op = -2
37 | };
38 | }
39 |
40 | typedef std::tuple qr_key_t;
41 | typedef std::tuple log_vec_key_t;
42 | //typedef unordered_map my_map_t;
43 | typedef vector log_vec_t;
44 |
45 | namespace qr_map {
46 |
47 | // typedef std::tuple key;
48 |
49 | struct key_hash : public std::unary_function {
50 | std::size_t operator()(const qr_key_t &k) const {
51 | auto res = (std::get<0>(k) << 8ul) | std::get<1>(k);
52 | // res = (res << 1ul) | std::get<2>(k);
53 | return res;
54 | }
55 | };
56 |
57 | struct key_equal : public std::binary_function {
58 | bool operator()(const qr_key_t &v0, const qr_key_t &v1) const {
59 | return (
60 | std::get<0>(v0) == std::get<0>(v1) &&
61 | std::get<1>(v0) == std::get<1>(v1));
62 | // std::get<2>(v0) == std::get<2>(v1));
63 | }
64 | };
65 |
66 |
67 | typedef std::unordered_map