├── .gitignore ├── include ├── hashutil.h ├── partitioned_counter.h ├── zipf.h ├── gqf_wrapper.h ├── gqf_file.h ├── gqf_int.h └── gqf.h ├── scripts ├── merge_into_master.sh └── merge_into_develop.sh ├── LICENSE ├── src ├── partitioned_counter.c ├── test_partitioned_counter.c ├── test_threadsafe.c ├── hashutil.c ├── zipf.c ├── test.c ├── gqf_file.c ├── bm.c └── gqf.c ├── README.md └── Makefile /.gitignore: -------------------------------------------------------------------------------- 1 | !.gitignore 2 | !.*.sh 3 | 4 | *.cqf 5 | *.o 6 | 7 | main 8 | bm 9 | 10 | -------------------------------------------------------------------------------- /include/hashutil.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ============================================================================ 3 | * 4 | * Authors: Prashant Pandey 5 | * Rob Johnson 6 | * 7 | * ============================================================================ 8 | */ 9 | 10 | #ifndef _HASHUTIL_H_ 11 | #define _HASHUTIL_H_ 12 | 13 | #include 14 | #include 15 | #include 16 | 17 | uint64_t MurmurHash64B ( const void * key, int len, unsigned int seed ); 18 | uint64_t MurmurHash64A ( const void * key, int len, unsigned int seed ); 19 | 20 | uint64_t hash_64(uint64_t key, uint64_t mask); 21 | uint64_t hash_64i(uint64_t key, uint64_t mask); 22 | 23 | #endif // #ifndef _HASHUTIL_H_ 24 | 25 | 26 | -------------------------------------------------------------------------------- /scripts/merge_into_master.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ $# -eq 0 ] 4 | then 5 | echo "No input arguments provided. Usage is merge_into_master.sh " 6 | exit 1 7 | fi 8 | 9 | feature=$1 10 | 11 | # from https://stackoverflow.com/questions/173919/is-there-a-theirs-version-of-git-merge-s-ours 12 | # in case branchA is not our current branch 13 | git checkout master 14 | 15 | # make merge commit but without conflicts!! 16 | # the contents of 'ours' will be discarded later 17 | git merge -s ours ${feature} 18 | 19 | # make temporary branch to merged commit 20 | git branch branchTEMP 21 | 22 | # get contents of working tree and index to the one of branchB 23 | git reset --hard ${feature} 24 | 25 | # reset to our merged commit but 26 | # keep contents of working tree and index 27 | git reset --soft branchTEMP 28 | 29 | # change the contents of the merged commit 30 | # with the contents of branchB 31 | git commit --amend 32 | 33 | # get rid off our temporary branch 34 | git branch -D branchTEMP 35 | 36 | # verify that the merge commit contains only contents of branchB 37 | git diff HEAD ${feature} 38 | -------------------------------------------------------------------------------- /scripts/merge_into_develop.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ $# -eq 0 ] 4 | then 5 | echo "No input arguments provided. Usage is merge_into_develop.sh " 6 | exit 1 7 | fi 8 | 9 | feature=$1 10 | 11 | # from https://stackoverflow.com/questions/173919/is-there-a-theirs-version-of-git-merge-s-ours 12 | # in case branchA is not our current branch 13 | git checkout develop 14 | 15 | # make merge commit but without conflicts!! 16 | # the contents of 'ours' will be discarded later 17 | git merge -s ours ${feature} 18 | 19 | # make temporary branch to merged commit 20 | git branch branchTEMP 21 | 22 | # get contents of working tree and index to the one of branchB 23 | git reset --hard ${feature} 24 | 25 | # reset to our merged commit but 26 | # keep contents of working tree and index 27 | git reset --soft branchTEMP 28 | 29 | # change the contents of the merged commit 30 | # with the contents of branchB 31 | git commit --amend 32 | 33 | # get rid off our temporary branch 34 | git branch -D branchTEMP 35 | 36 | # verify that the merge commit contains only contents of branchB 37 | git diff HEAD ${feature} 38 | -------------------------------------------------------------------------------- /include/partitioned_counter.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ============================================================================ 3 | * 4 | * Author: Prashant Pandey (), ppandey@cs.stonybrook.edu 5 | * Organization: Stony Brook University 6 | * 7 | * ============================================================================ 8 | */ 9 | 10 | #ifndef _PARTITIONED_COUNTER_H_ 11 | #define _PARTITIONED_COUNTER_H_ 12 | 13 | #include 14 | #include 15 | 16 | #ifdef __cplusplus 17 | extern "C" { 18 | #endif 19 | 20 | typedef struct local_counter { 21 | int64_t counter; 22 | int64_t padding[7]; 23 | } local_counter; 24 | 25 | typedef struct local_counter lctr_t; 26 | 27 | typedef struct partitioned_counter { 28 | lctr_t *local_counters; 29 | int64_t *global_counter; 30 | uint32_t num_counters; 31 | int32_t threshold; 32 | } partitioned_counter; 33 | 34 | typedef struct partitioned_counter pc_t; 35 | 36 | #define PC_ERROR -1 37 | 38 | /* on success returns 0. 39 | * If allocation fails returns PC_ERROR 40 | */ 41 | int pc_init(pc_t *pc, int64_t *global_counter, uint32_t num_counters, 42 | int32_t threshold); 43 | 44 | void pc_destructor(pc_t *pc); 45 | 46 | void pc_add(pc_t *pc, int64_t count); 47 | 48 | void pc_sync(pc_t *pc); 49 | 50 | #ifdef __cplusplus 51 | } 52 | #endif 53 | 54 | #endif /* _PARTITIONED_COUNTER_H_ */ 55 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2017, Rob Johnson and Prahsant Pandey 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /include/zipf.h: -------------------------------------------------------------------------------- 1 | #ifndef ZIPF_H 2 | #define ZIPF_H 3 | 4 | /* Zipfian number generator. 5 | * Goals: Fast (10M numbers/s) 6 | * Configurable exponent. 7 | * Capable of generating at least 2^{32} (4 billion) distinct numbers. 8 | * There are two parameters: 9 | * s the characteristic exponent, and 10 | * N the number of elements in the universe. 11 | * Once created, this data structure is read-only, and can be used in a multithreaded fashion. This code 12 | * calls random(), which is generally multihread-safe these days. 13 | * 14 | * Copyright 2011 Bradley C. Kuszmaul 15 | */ 16 | 17 | #include 18 | 19 | #ifdef __cplusplus 20 | extern "C" { 21 | #endif 22 | 23 | typedef struct zipfian const *ZIPFIAN; 24 | ZIPFIAN create_zipfian (double s, long N, long int (*randomfun)(void)); 25 | // Effect: Create a generator of zipfian numbers. 26 | 27 | void destroy_zipfian (const ZIPFIAN); 28 | // Effect; Destroy the zipfian generator (freeing all it's memory, for example). 29 | 30 | long zipfian_gen (const ZIPFIAN); 31 | // Effect: return a number from 0 (inclusive) to N (exlusive) with probability distribution approximately as follows. 32 | // $k-1$ is returned with probability $1/(k^s H_{N,s})$ 33 | // where $H_{N,s}$ is the $N$th generalized harmonic number $\sum_{n=1}^{N} 1/n^s$. 34 | 35 | long zipfian_hash (const ZIPFIAN); 36 | // Effect: Return a random 64-bit number. The numbers themselves are uniform hashes of the numbers from 0 (inclusive) to N (exclusive) 37 | 38 | void generate_random_keys (uint64_t *elems, long N, long gencount, double s); 39 | 40 | #ifdef __cplusplus 41 | } 42 | #endif 43 | 44 | #endif 45 | -------------------------------------------------------------------------------- /include/gqf_wrapper.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ============================================================================ 3 | * 4 | * Authors: Prashant Pandey 5 | * Rob Johnson 6 | * 7 | * ============================================================================ 8 | */ 9 | 10 | #ifndef GQF_WRAPPER_H 11 | #define GQF_WRAPPER_H 12 | 13 | #include "gqf.h" 14 | #include "gqf_int.h" 15 | #include "gqf_file.h" 16 | 17 | QF g_quotient_filter; 18 | QFi g_quotient_filter_itr; 19 | 20 | extern inline int gqf_init(uint64_t nbits, uint64_t num_hash_bits) 21 | { 22 | uint64_t nslots = 1 << nbits; 23 | qf_malloc(&g_quotient_filter, nslots, num_hash_bits, 0, QF_HASH_NONE, 0); 24 | return 0; 25 | } 26 | 27 | extern inline int gqf_insert(__uint128_t val, uint64_t count) 28 | { 29 | qf_insert(&g_quotient_filter, val, 0, count, QF_NO_LOCK); 30 | return 0; 31 | } 32 | 33 | extern inline int gqf_lookup(__uint128_t val) 34 | { 35 | return qf_count_key_value(&g_quotient_filter, val, 0, 0); 36 | } 37 | 38 | extern inline __uint128_t gqf_range() 39 | { 40 | return g_quotient_filter.metadata->range; 41 | } 42 | 43 | extern inline int gqf_destroy() 44 | { 45 | qf_free(&g_quotient_filter); 46 | return 0; 47 | } 48 | 49 | extern inline int gqf_iterator(uint64_t pos) 50 | { 51 | qf_iterator_from_position(&g_quotient_filter, &g_quotient_filter_itr, pos); 52 | return 0; 53 | } 54 | 55 | /* Returns 0 if the iterator is still valid (i.e. has not reached the 56 | * end of the QF. */ 57 | extern inline int gqf_get(uint64_t *key, uint64_t *value, uint64_t *count) 58 | { 59 | return qfi_get_hash(&g_quotient_filter_itr, key, value, count); 60 | } 61 | 62 | /* Advance to next entry. Returns whether or not another entry is 63 | * found. */ 64 | extern inline int gqf_next() 65 | { 66 | return qfi_next(&g_quotient_filter_itr); 67 | } 68 | 69 | /* Check to see if the if the end of the QF */ 70 | extern inline int gqf_end() 71 | { 72 | return qfi_end(&g_quotient_filter_itr); 73 | } 74 | 75 | #endif 76 | -------------------------------------------------------------------------------- /include/gqf_file.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ============================================================================ 3 | * 4 | * Authors: Prashant Pandey 5 | * Rob Johnson 6 | * 7 | * ============================================================================ 8 | */ 9 | 10 | #ifndef _GQF_FILE_H_ 11 | #define _GQF_FILE_H_ 12 | 13 | #include 14 | #include 15 | #include 16 | 17 | #include "gqf.h" 18 | 19 | #ifdef __cplusplus 20 | extern "C" { 21 | #endif 22 | 23 | /* Initialize a file-backed (i.e. mmapped) CQF at "filename". */ 24 | bool qf_initfile(QF *qf, uint64_t nslots, uint64_t key_bits, uint64_t 25 | value_bits, enum qf_hashmode hash, uint32_t seed, const char* 26 | filename); 27 | 28 | #define QF_USEFILE_READ_ONLY (0x01) 29 | #define QF_USEFILE_READ_WRITE (0x02) 30 | 31 | /* mmap existing cqf in "filename" into "qf". */ 32 | uint64_t qf_usefile(QF* qf, const char* filename, int flag); 33 | 34 | /* Resize the QF to the specified number of slots. Uses mmap to 35 | * initialize the new file, and calls munmap() on the old memory. 36 | * Return value: 37 | * >= 0: number of keys copied during resizing. 38 | * */ 39 | int64_t qf_resize_file(QF *qf, uint64_t nslots); 40 | 41 | bool qf_closefile(QF* qf); 42 | 43 | bool qf_deletefile(QF* qf); 44 | 45 | /* write data structure of to the disk */ 46 | uint64_t qf_serialize(const QF *qf, const char *filename); 47 | 48 | /* read data structure off the disk */ 49 | uint64_t qf_deserialize(QF *qf, const char *filename); 50 | 51 | /* This wraps qfi_next, using madvise(DONTNEED) to reduce our RSS. 52 | Only valid on mmapped QFs, i.e. cqfs from qf_initfile and 53 | qf_usefile. */ 54 | int qfi_next_madvise(QFi *qfi); 55 | 56 | /* Furthermore, you can call this immediately after constructing the 57 | qfi to call madvise(DONTNEED) on the portion of the cqf up to the 58 | first element visited by the qfi. */ 59 | int qfi_initial_madvise(QFi *qfi); 60 | 61 | #ifdef __cplusplus 62 | } 63 | #endif 64 | 65 | #endif // _GQF_FILE_H_ 66 | -------------------------------------------------------------------------------- /src/partitioned_counter.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ============================================================================ 3 | * 4 | * Author: Prashant Pandey (), ppandey@cs.stonybrook.edu 5 | * Organization: Stony Brook University 6 | * 7 | * ============================================================================ 8 | */ 9 | 10 | #define _GNU_SOURCE 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #include "partitioned_counter.h" 21 | 22 | #define min(a,b) ((a) < (b) ? (a) : (b)) 23 | 24 | int pc_init(pc_t *pc, int64_t *global_counter, uint32_t num_counters, 25 | int32_t threshold) { 26 | int num_cpus = (int)sysconf( _SC_NPROCESSORS_ONLN ); 27 | if (num_cpus < 0) { 28 | perror( "sysconf" ); 29 | return PC_ERROR; 30 | } 31 | pc->num_counters = num_counters == 0 ? num_cpus : min(num_cpus, 32 | num_counters); 33 | 34 | pc->local_counters = (lctr_t *)calloc(pc->num_counters, 35 | sizeof(*pc->local_counters)); 36 | if (pc->local_counters == NULL) { 37 | perror("Couldn't allocate memory for local counters."); 38 | return PC_ERROR; 39 | } 40 | /*printf("Padding check: 0: %p 1: %p\n", (void*)&pc->local_counters[0],*/ 41 | /*(void*)&pc->local_counters[1]);*/ 42 | pc->global_counter = global_counter; 43 | pc->threshold = threshold; 44 | 45 | return 0; 46 | } 47 | 48 | void pc_destructor(pc_t *pc) 49 | { 50 | pc_sync(pc); 51 | lctr_t *lc = pc->local_counters; 52 | pc->local_counters = NULL; 53 | free(lc); 54 | } 55 | 56 | void pc_add(pc_t *pc, int64_t count) { 57 | int cpuid = sched_getcpu(); 58 | uint32_t counter_id = cpuid % pc->num_counters; 59 | int64_t cur_count = 60 | __atomic_add_fetch(&pc->local_counters[counter_id].counter, count, 61 | __ATOMIC_SEQ_CST); 62 | if (cur_count > pc->threshold || cur_count < -pc->threshold) { 63 | int64_t new_count = 64 | __atomic_exchange_n(&pc->local_counters[counter_id].counter, 0, 65 | __ATOMIC_SEQ_CST); 66 | __atomic_fetch_add(pc->global_counter, new_count, __ATOMIC_SEQ_CST); 67 | } 68 | } 69 | 70 | void pc_sync(pc_t *pc) { 71 | for (uint32_t i = 0; i < pc->num_counters; i++) { 72 | int64_t c = __atomic_exchange_n(&pc->local_counters[i].counter, 0, 73 | __ATOMIC_SEQ_CST); 74 | __atomic_fetch_add(pc->global_counter, c, __ATOMIC_SEQ_CST); 75 | } 76 | } 77 | 78 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # cqf 2 | A General-Purpose Counting Filter: Counting Quotient Filter (CQF) 3 | 4 | This work appeared at SIGMOD 2017. If you use this software please cite us: 5 | ``` 6 | @inproceedings{DBLP:conf/sigmod/PandeyBJP17, 7 | author = {Prashant Pandey and 8 | Michael A. Bender and 9 | Rob Johnson and 10 | Robert Patro}, 11 | title = {A General-Purpose Counting Filter: Making Every Bit Count}, 12 | booktitle = {Proceedings of the 2017 {ACM} International Conference on Management 13 | of Data, {SIGMOD} Conference 2017, Chicago, IL, USA, May 14-19, 2017}, 14 | pages = {775--787}, 15 | year = {2017}, 16 | crossref = {DBLP:conf/sigmod/2017}, 17 | url = {http://doi.acm.org/10.1145/3035918.3035963}, 18 | doi = {10.1145/3035918.3035963}, 19 | timestamp = {Wed, 10 May 2017 22:12:12 +0200}, 20 | biburl = {http://dblp.org/rec/bib/conf/sigmod/PandeyBJP17}, 21 | bibsource = {dblp computer science bibliography, http://dblp.org} 22 | } 23 | ``` 24 | 25 | Overview 26 | -------- 27 | The CQF supports approximate membership testing and counting the occurrences of 28 | items in a data set. This general-purpose AMQ is small and fast, has good 29 | locality of reference, scales out of RAM to SSD, and supports deletions, 30 | counting (even on skewed data sets), resizing, merging, and highly concurrent 31 | access. 32 | 33 | API 34 | -------- 35 | * `qf_insert(item, count)`: insert an item to the filter 36 | * `qf_count_key_value(item)`: return the count of the item. Note that this 37 | method may return false positive results like Bloom filters or an over count. 38 | * `qf_remove(item, count)`: decrement the count of the item by count. If count 39 | is 0 then completely remove the item. 40 | 41 | Build 42 | ------- 43 | This library depends on libssl. 44 | 45 | The code uses two new instructions to implement select on machine words introduced 46 | in intel's Haswell line of CPUs. However, there is also an alternate implementation 47 | of select on machine words to work on CPUs older than Haswell. 48 | 49 | To build on a Haswell or newer hardware: 50 | ```bash 51 | $ make test 52 | $ ./test 24 8 53 | ``` 54 | 55 | To build on an older hardware (older than Haswell): 56 | ```bash 57 | $ make NH=1 test 58 | $ ./test 24 8 59 | ``` 60 | 61 | The argument to main is the log of the number of slots in the CQF. For example, 62 | to create a CQF with 2^30 slots, the argument will be 30. 63 | 64 | Contributing 65 | ------------ 66 | Contributions via GitHub pull requests are welcome. 67 | 68 | 69 | Authors 70 | ------- 71 | - Prashant Pandey 72 | - Rob Johnson 73 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | TARGETS=test test_threadsafe test_pc bm 2 | 3 | ifdef D 4 | DEBUG=-g 5 | OPT= 6 | else 7 | DEBUG= 8 | OPT=-Ofast 9 | endif 10 | 11 | ifdef NH 12 | ARCH= 13 | else 14 | ARCH=-msse4.2 -D__SSE4_2_ 15 | endif 16 | 17 | ifdef P 18 | PROFILE=-pg -no-pie # for bug in gprof. 19 | endif 20 | 21 | LOC_INCLUDE=include 22 | LOC_SRC=src 23 | LOC_TEST=test 24 | OBJDIR=obj 25 | 26 | CC = gcc -std=gnu11 27 | CXX = g++ -std=c++11 28 | LD= gcc -std=gnu11 29 | 30 | CXXFLAGS = -Wall $(DEBUG) $(PROFILE) $(OPT) $(ARCH) -m64 -I. -Iinclude 31 | 32 | LDFLAGS = $(DEBUG) $(PROFILE) $(OPT) -lpthread -lssl -lcrypto -lm 33 | 34 | # 35 | # declaration of dependencies 36 | # 37 | 38 | all: $(TARGETS) 39 | 40 | # dependencies between programs and .o files 41 | 42 | test: $(OBJDIR)/test.o $(OBJDIR)/gqf.o $(OBJDIR)/gqf_file.o \ 43 | $(OBJDIR)/hashutil.o \ 44 | $(OBJDIR)/partitioned_counter.o 45 | 46 | test_threadsafe: $(OBJDIR)/test_threadsafe.o $(OBJDIR)/gqf.o \ 47 | $(OBJDIR)/gqf_file.o $(OBJDIR)/hashutil.o \ 48 | $(OBJDIR)/partitioned_counter.o 49 | 50 | test_pc: $(OBJDIR)/test_partitioned_counter.o $(OBJDIR)/gqf.o \ 51 | $(OBJDIR)/gqf_file.o $(OBJDIR)/hashutil.o \ 52 | $(OBJDIR)/partitioned_counter.o 53 | 54 | bm: $(OBJDIR)/bm.o $(OBJDIR)/gqf.o $(OBJDIR)/gqf_file.o \ 55 | $(OBJDIR)/zipf.o $(OBJDIR)/hashutil.o \ 56 | $(OBJDIR)/partitioned_counter.o 57 | 58 | # dependencies between .o files and .h files 59 | 60 | $(OBJDIR)/test.o: $(LOC_INCLUDE)/gqf.h $(LOC_INCLUDE)/gqf_file.h \ 61 | $(LOC_INCLUDE)/hashutil.h \ 62 | $(LOC_INCLUDE)/partitioned_counter.h 63 | 64 | $(OBJDIR)/test_threadsafe.o: $(LOC_INCLUDE)/gqf.h $(LOC_INCLUDE)/gqf_file.h \ 65 | $(LOC_INCLUDE)/hashutil.h \ 66 | $(LOC_INCLUDE)/partitioned_counter.h 67 | 68 | $(OBJDIR)/bm.o: $(LOC_INCLUDE)/gqf_wrapper.h \ 69 | $(LOC_INCLUDE)/partitioned_counter.h 70 | 71 | 72 | # dependencies between .o files and .cc (or .c) files 73 | 74 | $(OBJDIR)/gqf.o: $(LOC_SRC)/gqf.c $(LOC_INCLUDE)/gqf.h 75 | $(OBJDIR)/gqf_file.o: $(LOC_SRC)/gqf_file.c $(LOC_INCLUDE)/gqf_file.h 76 | $(OBJDIR)/hashutil.o: $(LOC_SRC)/hashutil.c $(LOC_INCLUDE)/hashutil.h 77 | $(OBJDIR)/partitioned_counter.o: $(LOC_INCLUDE)/partitioned_counter.h 78 | 79 | # 80 | # generic build rules 81 | # 82 | 83 | $(TARGETS): 84 | $(LD) $^ -o $@ $(LDFLAGS) 85 | 86 | $(OBJDIR)/%.o: $(LOC_SRC)/%.cc | $(OBJDIR) 87 | $(CXX) $(CXXFLAGS) $(INCLUDE) $< -c -o $@ 88 | 89 | $(OBJDIR)/%.o: $(LOC_SRC)/%.c | $(OBJDIR) 90 | $(CC) $(CXXFLAGS) $(INCLUDE) $< -c -o $@ 91 | 92 | $(OBJDIR)/%.o: $(LOC_TEST)/%.c | $(OBJDIR) 93 | $(CC) $(CXXFLAGS) $(INCLUDE) $< -c -o $@ 94 | 95 | $(OBJDIR): 96 | @mkdir -p $(OBJDIR) 97 | 98 | clean: 99 | rm -rf $(OBJDIR) $(TARGETS) core 100 | 101 | -------------------------------------------------------------------------------- /src/test_partitioned_counter.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ============================================================================ 3 | * 4 | * Author: Prashant Pandey (), ppandey@cs.stonybrook.edu 5 | * Organization: Stony Brook University 6 | * 7 | * ============================================================================ 8 | */ 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #include "partitioned_counter.h" 23 | 24 | #include 25 | 26 | #define INC_TO 500000 27 | #define DEC_TO INC_BY/2 28 | #define INC_BY 1 29 | #define NUM_RUNS 10 30 | 31 | uint64_t TOTAL_COUNT; 32 | 33 | uint64_t tv2msec(struct timeval tv) 34 | { 35 | return tv.tv_sec * 1000 + tv.tv_usec / 1000; 36 | } 37 | 38 | void *thread_routine(void *arg) { 39 | pc_t *pc_counter = (pc_t*)arg; 40 | 41 | /*for (int i = 0; i < INC_TO; i++) {*/ 42 | /*pc_add(pc_counter, INC_BY);*/ 43 | /*}*/ 44 | /*for (int i = 0; i < DEC_TO; i++) {*/ 45 | /*pc_add(pc_counter, -INC_BY);*/ 46 | /*}*/ 47 | for (uint64_t i = 0; i < TOTAL_COUNT; i++) 48 | pc_add(pc_counter, INC_BY); 49 | 50 | return NULL; 51 | } 52 | 53 | /* 54 | * === FUNCTION ============================================================= 55 | * Name: main 56 | * Description: 57 | * ============================================================================ 58 | */ 59 | int main (int argc, char *argv[]) 60 | { 61 | int64_t global_counter = 0; 62 | pc_t pc_counter; 63 | if (argc < 2) { 64 | printf("Specify the number of threads.\n"); 65 | return 1; 66 | } 67 | int procs = atoi(argv[1]); 68 | TOTAL_COUNT = (1ULL << 30) / procs; 69 | 70 | struct timeval start, stop; 71 | pc_init(&pc_counter, &global_counter, 8, 100); 72 | 73 | pthread_t *thrs = malloc( sizeof( pthread_t ) * procs); 74 | if (thrs == NULL) 75 | { 76 | perror( "malloc" ); 77 | return -1; 78 | } 79 | printf( "Starting %d threads...\n", procs ); 80 | 81 | uint64_t total_time; 82 | for (int i = 0; i < NUM_RUNS; i++) { 83 | gettimeofday(&start, NULL); 84 | for (int i = 0; i < procs; i++) { 85 | if (pthread_create(&thrs[i], NULL, thread_routine, (void *)(&pc_counter))) 86 | { 87 | perror( "pthread_create" ); 88 | procs = i; 89 | break; 90 | } 91 | } 92 | for (int i = 0; i < procs; i++) 93 | pthread_join( thrs[i], NULL ); 94 | pc_sync(&pc_counter); 95 | gettimeofday(&stop, NULL); 96 | total_time = tv2msec(stop) - tv2msec(start); 97 | memset(thrs, 0, sizeof( pthread_t ) * procs); 98 | } 99 | free(thrs); 100 | 101 | printf("Average time for %d runs: %ld ms\n", NUM_RUNS, total_time/NUM_RUNS); 102 | 103 | printf("After doing all the math, global_int value is: %ld\n", 104 | global_counter); 105 | /*int64_t exp_count = (INC_TO - DEC_TO) * INC_BY * procs;*/ 106 | int64_t exp_count = TOTAL_COUNT * INC_BY * NUM_RUNS * procs; 107 | printf("Expected value is: %ld\n", exp_count); 108 | if (global_counter != exp_count) 109 | printf("Counting failed!\n"); 110 | else 111 | printf("Counting passed!\n"); 112 | return EXIT_SUCCESS; 113 | } /* ---------- end of function main ---------- */ 114 | -------------------------------------------------------------------------------- /src/test_threadsafe.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ============================================================================ 3 | * 4 | * Authors: Prashant Pandey 5 | * Rob Johnson 6 | * 7 | * ============================================================================ 8 | */ 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #include "include/gqf.h" 21 | #include "include/gqf_int.h" 22 | #include "include/gqf_file.h" 23 | 24 | typedef struct insert_args { 25 | QF *cf; 26 | uint64_t *vals; 27 | int freq; 28 | uint64_t start; 29 | uint64_t end; 30 | } insert_args; 31 | 32 | void *insert_bm(void *arg) 33 | { 34 | insert_args *a = (insert_args *)arg; 35 | for (uint32_t i = a->start; i <= a->end; i++) { 36 | int ret = qf_insert(a->cf, a->vals[i], 0, a->freq, QF_WAIT_FOR_LOCK); 37 | if (ret < 0) { 38 | fprintf(stderr, "failed insertion for key: %lx %d.\n", a->vals[i], 39 | a->freq); 40 | if (ret == -1) 41 | fprintf(stderr, "CQF is full.\n"); 42 | else if (ret == -2) 43 | fprintf(stderr, "TRY_ONCE_LOCK failed.\n"); 44 | else if (ret == -3) 45 | fprintf(stderr, "Runtime lock does not satisfy the init time lock.\n"); 46 | else 47 | fprintf(stderr, "Does not recognise return value.\n"); 48 | abort(); 49 | } 50 | } 51 | return NULL; 52 | } 53 | 54 | void multi_threaded_insertion(insert_args args[], int tcnt) 55 | { 56 | pthread_t threads[tcnt]; 57 | 58 | for (int i = 0; i < tcnt; i++) { 59 | fprintf(stdout, "Thread %d bounds %ld %ld\n", i, args[i].start, args[i].end); 60 | if (pthread_create(&threads[i], NULL, &insert_bm, &args[i])) { 61 | fprintf(stderr, "Error creating thread\n"); 62 | exit(0); 63 | } 64 | } 65 | 66 | for (int i = 0; i < tcnt; i++) { 67 | if (pthread_join(threads[i], NULL)) { 68 | fprintf(stderr, "Error joining thread\n"); 69 | exit(0); 70 | } 71 | } 72 | } 73 | 74 | int main(int argc, char **argv) 75 | { 76 | if (argc < 4) { 77 | fprintf(stderr, "Please specify three arguments: \n \ 78 | 1. log of the number of slots in the CQF.\n \ 79 | 2. frequency count of keys.\n \ 80 | 3. number of threads.\n"); 81 | exit(1); 82 | } 83 | QF cfr; 84 | uint64_t qbits = atoi(argv[1]); 85 | uint64_t freq = atoi(argv[2]); 86 | uint32_t tcnt = atoi(argv[3]); 87 | uint64_t nhashbits = qbits + 8; 88 | uint64_t nslots = (1ULL << qbits); 89 | uint64_t nvals = 750*nslots/1000; 90 | nvals = nvals/freq; 91 | 92 | uint64_t *vals; 93 | 94 | /* Initialise the CQF */ 95 | if (!qf_malloc(&cfr, nslots, nhashbits, 0, QF_HASH_INVERTIBLE, 0)) { 96 | fprintf(stderr, "Can't allocate CQF.\n"); 97 | abort(); 98 | } 99 | 100 | /* Generate random values */ 101 | vals = (uint64_t*)calloc(nvals, sizeof(vals[0])); 102 | RAND_bytes((unsigned char *)vals, sizeof(*vals) * nvals); 103 | for (uint32_t i = 0; i < nvals; i++) { 104 | vals[i] = (1 * vals[i]) % cfr.metadata->range; 105 | } 106 | 107 | insert_args *args = (insert_args*)malloc(tcnt * sizeof(insert_args)); 108 | for (uint32_t i = 0; i < tcnt; i++) { 109 | args[i].cf = 𝔠 110 | args[i].vals = vals; 111 | args[i].freq = freq; 112 | args[i].start = (nvals/tcnt) * i; 113 | args[i].end = (nvals/tcnt) * (i + 1) - 1; 114 | } 115 | fprintf(stdout, "Total number of items: %ld\n", args[tcnt-1].end); 116 | 117 | multi_threaded_insertion(args, tcnt); 118 | 119 | fprintf(stdout, "Inserted all items: %ld\n", args[tcnt-1].end); 120 | 121 | for (uint64_t i = 0; i < args[tcnt-1].end; i++) { 122 | uint64_t count = qf_count_key_value(&cfr, vals[i], 0, 0); 123 | if (count < freq) { 124 | fprintf(stderr, "failed lookup after insertion for %lx %ld.\n", vals[i], 125 | count); 126 | abort(); 127 | } 128 | } 129 | 130 | QFi cfir; 131 | /* Initialize an iterator */ 132 | qf_iterator_from_position(&cfr, &cfir, 0); 133 | do { 134 | uint64_t key, value, count; 135 | qfi_get_key(&cfir, &key, &value, &count); 136 | qfi_next(&cfir); 137 | if (qf_count_key_value(&cfr, key, 0, 0) < freq) { 138 | fprintf(stderr, "Failed lookup during iteration for: %lx. Returned count: %ld\n", 139 | key, count); 140 | abort(); 141 | } 142 | } while(!qfi_end(&cfir)); 143 | 144 | fprintf(stdout, "Total num of distinct items in the CQF %ld\n", 145 | cfr.metadata->ndistinct_elts); 146 | fprintf(stdout, "Verified all items: %ld\n", args[tcnt-1].end); 147 | 148 | return 0; 149 | } 150 | -------------------------------------------------------------------------------- /include/gqf_int.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ============================================================================ 3 | * 4 | * Authors: Prashant Pandey 5 | * Rob Johnson 6 | * 7 | * ============================================================================ 8 | */ 9 | 10 | #ifndef _GQF_INT_H_ 11 | #define _GQF_INT_H_ 12 | 13 | #include 14 | #include 15 | 16 | #include "gqf.h" 17 | #include "partitioned_counter.h" 18 | 19 | #ifdef __cplusplus 20 | extern "C" { 21 | #endif 22 | 23 | #define MAGIC_NUMBER 1018874902021329732 24 | 25 | /* Can be 26 | 0 (choose size at run-time), 27 | 8, 16, 32, or 64 (for optimized versions), 28 | or other integer <= 56 (for compile-time-optimized bit-shifting-based versions) 29 | */ 30 | #define QF_BITS_PER_SLOT 0 31 | 32 | /* Must be >= 6. 6 seems fastest. */ 33 | #define QF_BLOCK_OFFSET_BITS (6) 34 | 35 | #define QF_SLOTS_PER_BLOCK (1ULL << QF_BLOCK_OFFSET_BITS) 36 | #define QF_METADATA_WORDS_PER_BLOCK ((QF_SLOTS_PER_BLOCK + 63) / 64) 37 | 38 | typedef struct __attribute__ ((__packed__)) qfblock { 39 | /* Code works with uint16_t, uint32_t, etc, but uint8_t seems just as fast as 40 | * anything else */ 41 | uint8_t offset; 42 | uint64_t occupieds[QF_METADATA_WORDS_PER_BLOCK]; 43 | uint64_t runends[QF_METADATA_WORDS_PER_BLOCK]; 44 | 45 | #if QF_BITS_PER_SLOT == 8 46 | uint8_t slots[QF_SLOTS_PER_BLOCK]; 47 | #elif QF_BITS_PER_SLOT == 16 48 | uint16_t slots[QF_SLOTS_PER_BLOCK]; 49 | #elif QF_BITS_PER_SLOT == 32 50 | uint32_t slots[QF_SLOTS_PER_BLOCK]; 51 | #elif QF_BITS_PER_SLOT == 64 52 | uint64_t slots[QF_SLOTS_PER_BLOCK]; 53 | #elif QF_BITS_PER_SLOT != 0 54 | uint8_t slots[QF_SLOTS_PER_BLOCK * QF_BITS_PER_SLOT / 8]; 55 | #else 56 | uint8_t slots[]; 57 | #endif 58 | } qfblock; 59 | 60 | struct __attribute__ ((__packed__)) qfblock; 61 | typedef struct qfblock qfblock; 62 | 63 | typedef struct file_info { 64 | int fd; 65 | char *filepath; 66 | } file_info; 67 | 68 | // The below struct is used to instrument the code. 69 | // It is not used in normal operations of the CQF. 70 | typedef struct { 71 | uint64_t total_time_single; 72 | uint64_t total_time_spinning; 73 | uint64_t locks_taken; 74 | uint64_t locks_acquired_single_attempt; 75 | } wait_time_data; 76 | 77 | typedef struct quotient_filter_runtime_data { 78 | file_info f_info; 79 | uint32_t auto_resize; 80 | int64_t (*container_resize)(QF *qf, uint64_t nslots); 81 | pc_t pc_nelts; 82 | pc_t pc_ndistinct_elts; 83 | pc_t pc_noccupied_slots; 84 | uint64_t num_locks; 85 | volatile int metadata_lock; 86 | volatile int *locks; 87 | wait_time_data *wait_times; 88 | } quotient_filter_runtime_data; 89 | 90 | typedef quotient_filter_runtime_data qfruntime; 91 | 92 | typedef struct quotient_filter_metadata { 93 | uint64_t magic_endian_number; 94 | enum qf_hashmode hash_mode; 95 | uint32_t reserved; 96 | uint64_t total_size_in_bytes; 97 | uint32_t seed; 98 | uint64_t nslots; 99 | uint64_t xnslots; 100 | uint64_t key_bits; 101 | uint64_t value_bits; 102 | uint64_t key_remainder_bits; 103 | uint64_t bits_per_slot; 104 | __uint128_t range; 105 | uint64_t nblocks; 106 | uint64_t nelts; 107 | uint64_t ndistinct_elts; 108 | uint64_t noccupied_slots; 109 | } quotient_filter_metadata; 110 | 111 | typedef quotient_filter_metadata qfmetadata; 112 | 113 | typedef struct quotient_filter { 114 | qfruntime *runtimedata; 115 | qfmetadata *metadata; 116 | qfblock *blocks; 117 | } quotient_filter; 118 | 119 | typedef quotient_filter QF; 120 | 121 | #if QF_BITS_PER_SLOT > 0 122 | static inline qfblock * get_block(const QF *qf, uint64_t block_index) 123 | { 124 | return &qf->blocks[block_index]; 125 | } 126 | #else 127 | static inline qfblock * get_block(const QF *qf, uint64_t block_index) 128 | { 129 | return (qfblock *)(((char *)qf->blocks) 130 | + block_index * (sizeof(qfblock) + QF_SLOTS_PER_BLOCK * 131 | qf->metadata->bits_per_slot / 8)); 132 | } 133 | #endif 134 | 135 | // The below struct is used to instrument the code. 136 | // It is not used in normal operations of the CQF. 137 | typedef struct { 138 | uint64_t start_index; 139 | uint16_t length; 140 | } cluster_data; 141 | 142 | typedef struct quotient_filter_iterator { 143 | const QF *qf; 144 | uint64_t run; 145 | uint64_t current; 146 | uint64_t cur_start_index; 147 | uint16_t cur_length; 148 | uint32_t num_clusters; 149 | cluster_data *c_info; 150 | } quotient_filter_iterator; 151 | 152 | #ifdef __cplusplus 153 | } 154 | #endif 155 | 156 | #endif /* _GQF_INT_H_ */ 157 | -------------------------------------------------------------------------------- /src/hashutil.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ============================================================================ 3 | * 4 | * Authors: Prashant Pandey 5 | * Rob Johnson 6 | * 7 | * ============================================================================ 8 | */ 9 | 10 | #include "hashutil.h" 11 | 12 | 13 | 14 | //----------------------------------------------------------------------------- 15 | // MurmurHash2, 64-bit versions, by Austin Appleby 16 | 17 | // The same caveats as 32-bit MurmurHash2 apply here - beware of alignment 18 | // and endian-ness issues if used across multiple platforms. 19 | 20 | 21 | // 64-bit hash for 64-bit platforms 22 | 23 | uint64_t MurmurHash64A ( const void * key, int len, unsigned int seed ) 24 | { 25 | const uint64_t m = 0xc6a4a7935bd1e995; 26 | const int r = 47; 27 | 28 | uint64_t h = seed ^ (len * m); 29 | 30 | const uint64_t * data = (const uint64_t *)key; 31 | const uint64_t * end = data + (len/8); 32 | 33 | while(data != end) 34 | { 35 | uint64_t k = *data++; 36 | 37 | k *= m; 38 | k ^= k >> r; 39 | k *= m; 40 | 41 | h ^= k; 42 | h *= m; 43 | } 44 | 45 | const unsigned char * data2 = (const unsigned char*)data; 46 | 47 | switch(len & 7) 48 | { 49 | case 7: h ^= (uint64_t)data2[6] << 48; 50 | case 6: h ^= (uint64_t)data2[5] << 40; 51 | case 5: h ^= (uint64_t)data2[4] << 32; 52 | case 4: h ^= (uint64_t)data2[3] << 24; 53 | case 3: h ^= (uint64_t)data2[2] << 16; 54 | case 2: h ^= (uint64_t)data2[1] << 8; 55 | case 1: h ^= (uint64_t)data2[0]; 56 | h *= m; 57 | }; 58 | 59 | h ^= h >> r; 60 | h *= m; 61 | h ^= h >> r; 62 | 63 | return h; 64 | } 65 | 66 | 67 | // 64-bit hash for 32-bit platforms 68 | 69 | uint64_t MurmurHash64B ( const void * key, int len, unsigned int seed ) 70 | { 71 | const unsigned int m = 0x5bd1e995; 72 | const int r = 24; 73 | 74 | unsigned int h1 = seed ^ len; 75 | unsigned int h2 = 0; 76 | 77 | const unsigned int * data = (const unsigned int *)key; 78 | 79 | while(len >= 8) 80 | { 81 | unsigned int k1 = *data++; 82 | k1 *= m; k1 ^= k1 >> r; k1 *= m; 83 | h1 *= m; h1 ^= k1; 84 | len -= 4; 85 | 86 | unsigned int k2 = *data++; 87 | k2 *= m; k2 ^= k2 >> r; k2 *= m; 88 | h2 *= m; h2 ^= k2; 89 | len -= 4; 90 | } 91 | 92 | if(len >= 4) 93 | { 94 | unsigned int k1 = *data++; 95 | k1 *= m; k1 ^= k1 >> r; k1 *= m; 96 | h1 *= m; h1 ^= k1; 97 | len -= 4; 98 | } 99 | 100 | switch(len) 101 | { 102 | case 3: h2 ^= ((unsigned char*)data)[2] << 16; 103 | case 2: h2 ^= ((unsigned char*)data)[1] << 8; 104 | case 1: h2 ^= ((unsigned char*)data)[0]; 105 | h2 *= m; 106 | }; 107 | 108 | h1 ^= h2 >> 18; h1 *= m; 109 | h2 ^= h1 >> 22; h2 *= m; 110 | h1 ^= h2 >> 17; h1 *= m; 111 | h2 ^= h1 >> 19; h2 *= m; 112 | 113 | uint64_t h = h1; 114 | 115 | h = (h << 32) | h2; 116 | 117 | return h; 118 | } 119 | 120 | /* 121 | * For any 1 for a snapshot. 131 | 132 | uint64_t hash_64(uint64_t key, uint64_t mask) 133 | { 134 | key = (~key + (key << 21)) & mask; // key = (key << 21) - key - 1; 135 | key = key ^ key >> 24; 136 | key = ((key + (key << 3)) + (key << 8)) & mask; // key * 265 137 | key = key ^ key >> 14; 138 | key = ((key + (key << 2)) + (key << 4)) & mask; // key * 21 139 | key = key ^ key >> 28; 140 | key = (key + (key << 31)) & mask; 141 | return key; 142 | } 143 | 144 | // The inversion of hash_64(). Modified from 145 | // 146 | uint64_t hash_64i(uint64_t key, uint64_t mask) 147 | { 148 | uint64_t tmp; 149 | 150 | // Invert key = key + (key << 31) 151 | tmp = (key - (key << 31)); 152 | key = (key - (tmp << 31)) & mask; 153 | 154 | // Invert key = key ^ (key >> 28) 155 | tmp = key ^ key >> 28; 156 | key = key ^ tmp >> 28; 157 | 158 | // Invert key *= 21 159 | key = (key * 14933078535860113213ull) & mask; 160 | 161 | // Invert key = key ^ (key >> 14) 162 | tmp = key ^ key >> 14; 163 | tmp = key ^ tmp >> 14; 164 | tmp = key ^ tmp >> 14; 165 | key = key ^ tmp >> 14; 166 | 167 | // Invert key *= 265 168 | key = (key * 15244667743933553977ull) & mask; 169 | 170 | // Invert key = key ^ (key >> 24) 171 | tmp = key ^ key >> 24; 172 | key = key ^ tmp >> 24; 173 | 174 | // Invert key = (~key) + (key << 21) 175 | tmp = ~key; 176 | tmp = ~(key - (tmp << 21)); 177 | tmp = ~(key - (tmp << 21)); 178 | key = ~(key - (tmp << 21)) & mask; 179 | 180 | return key; 181 | } 182 | 183 | -------------------------------------------------------------------------------- /src/zipf.c: -------------------------------------------------------------------------------- 1 | /* See zipf.h for the specification of this file. 2 | * Copyright 2011 Bradley C. Kuszmaul 3 | */ 4 | 5 | #include "include/zipf.h" 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #ifndef USE_MYRANDOM 12 | #define RFUN random 13 | #define RSEED srandom 14 | #else 15 | #define RFUN myrandom 16 | #define RSEED mysrandom 17 | 18 | static unsigned int m_z = 1; 19 | static unsigned int m_w = 1; 20 | static void mysrandom (unsigned int seed) { 21 | m_z = seed; 22 | m_w = (seed<<16) + (seed >> 16); 23 | } 24 | 25 | static long myrandom() 26 | { 27 | m_z = 36969 * (m_z & 65535) + (m_z >> 16); 28 | m_w = 18000 * (m_w & 65535) + (m_w >> 16); 29 | return ((m_z << 16) + m_w) % 0x7FFFFFFF; 30 | } 31 | #endif 32 | 33 | struct zpair { // For the ith element of the array: 34 | long num; // How many elements are represented by this bucket 35 | long low; // How many elements are represented by all the previous buckets. 36 | double cumulative; // The sum of the all the probabilities of all the elements represented by previous buckets. 37 | }; 38 | 39 | enum { NPAIRS = 1000000 }; 40 | 41 | struct zipfian { 42 | double s; // s, the characteristic exponent. 43 | long N; // N, the size of the universe. 44 | double H_Ns; // H_{N,s}. 45 | long int (*randomfun)(void); 46 | struct zpair pairs[NPAIRS]; 47 | }; 48 | 49 | static void zprint (ZIPFIAN z) { 50 | int i = 0; 51 | printf("s=%f, N=%ld, H_sN=%f\n", z->s, z->N, z->H_Ns); 52 | for (i=0; i=NPAIRS) ? z->H_Ns : z->pairs[i+1].cumulative) - z->pairs[i].cumulative; 54 | printf("%2ld %2ld %f (delta=%f)\n", z->pairs[i].num, z->pairs[i].low, z->pairs[i].cumulative, diff); 55 | } 56 | } 57 | 58 | ZIPFIAN create_zipfian (double s, long N, long int (*randomfun)(void)) { 59 | assert(s > 0); 60 | assert(0 < N); 61 | struct zipfian *z = (struct zipfian *)malloc(sizeof(*z)); 62 | assert(z); 63 | z->s = s; 64 | z->N = N; 65 | z->randomfun = randomfun; 66 | 67 | // Calculate the total probability distribution 68 | double H_Ns = 0; 69 | long i = 0; 70 | for (i=0; ipairs[i] = (struct zpair){.cumulative = cumulative, 78 | .low = i, 79 | .num = 1}; 80 | cumulative += pow(i+1, -s); 81 | } 82 | // For the second half divide up the remaining part of N evenly by the probability. 83 | 84 | 85 | long last_n = NPAIRS/2; 86 | long next_n = last_n; 87 | 88 | for (i=NPAIRS/2; ipairs[i] = (struct zpair){.cumulative = cumulative, 97 | .low = last_n, 98 | .num = next_n - last_n}; 99 | last_n = next_n; 100 | cumulative = next_cumulative; 101 | } 102 | z->H_Ns = H_Ns; 103 | 104 | if (0) zprint(z); 105 | 106 | return z; 107 | } 108 | 109 | static long z_search (ZIPFIAN s, double C, long low, long pcount) 110 | // Find the first zpair for which the cumulative probability of the previous pairs is less than C. 111 | // Generate a value in its range uniformly randomly. 112 | { 113 | assert(pcount>0); 114 | if (pcount==1) { 115 | struct zpair const *p = &s->pairs[low]; 116 | assert(p->cumulative <= C); 117 | return p->low + s->randomfun()%p->num; 118 | } else { 119 | long mid = low + pcount/2; 120 | struct zpair const *p = &s->pairs[mid]; 121 | if (p->cumulative > C) { 122 | return z_search(s, C, low, pcount/2); 123 | } else { 124 | return z_search(s, C, low+pcount/2, pcount-pcount/2); 125 | } 126 | } 127 | } 128 | 129 | long zipfian_gen (ZIPFIAN z) { 130 | // we're going to have to use two calls to random() to get enough random bits. 131 | const long rand_limit = ((long)RAND_MAX)+1; 132 | const double one_over = 1/(double)rand_limit; 133 | const double scale_factor = one_over * one_over; 134 | long v = (long)(z->randomfun()) * rand_limit + z->randomfun(); 135 | double scaled = v * z->H_Ns * scale_factor; 136 | return z_search(z, scaled, 0, NPAIRS); 137 | } 138 | 139 | void destroy_zipfian (ZIPFIAN z) { 140 | free((struct zipfian *)z); 141 | } 142 | 143 | void generate_random_keys (uint64_t *elems, long N, long gencount, double s) { 144 | int i; 145 | uint32_t *counts; 146 | /*struct timeval a,b,c;*/ 147 | printf("Generating %ld elements in universe of %ld items with characteristic exponent %f\n", 148 | gencount, N, s); 149 | /*gettimeofday(&a, NULL);*/ 150 | ZIPFIAN z = create_zipfian(s, N, RFUN); 151 | counts = (uint32_t*)calloc(N, sizeof(counts)); 152 | 153 | /*gettimeofday(&b, NULL);*/ 154 | /*printf("Setup time = %0.6fs\n", tdiff(&a, &b));*/ 155 | for (i=0; i 5 | * Rob Johnson 6 | * 7 | * ============================================================================ 8 | */ 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #include "include/gqf.h" 23 | #include "include/gqf_int.h" 24 | #include "include/gqf_file.h" 25 | 26 | int main(int argc, char **argv) 27 | { 28 | if (argc < 3) { 29 | fprintf(stderr, "Please specify the log of the number of slots and the number of remainder bits in the CQF.\n"); 30 | exit(1); 31 | } 32 | QF qf; 33 | uint64_t qbits = atoi(argv[1]); 34 | uint64_t rbits = atoi(argv[2]); 35 | uint64_t nhashbits = qbits + rbits; 36 | uint64_t nslots = (1ULL << qbits); 37 | uint64_t nvals = 95*nslots/100; 38 | uint64_t key_count = 4; 39 | uint64_t *vals; 40 | 41 | /* Initialise the CQF */ 42 | /*if (!qf_malloc(&qf, nslots, nhashbits, 0, QF_HASH_INVERTIBLE, 0)) {*/ 43 | /*fprintf(stderr, "Can't allocate CQF.\n");*/ 44 | /*abort();*/ 45 | /*}*/ 46 | if (!qf_initfile(&qf, nslots, nhashbits, 0, QF_HASH_INVERTIBLE, 0, 47 | "mycqf.file")) { 48 | fprintf(stderr, "Can't allocate CQF.\n"); 49 | abort(); 50 | } 51 | 52 | qf_set_auto_resize(&qf, true); 53 | 54 | /* Generate random values */ 55 | vals = (uint64_t*)malloc(nvals*sizeof(vals[0])); 56 | RAND_bytes((unsigned char *)vals, sizeof(*vals) * nvals); 57 | srand(0); 58 | for (uint64_t i = 0; i < nvals; i++) { 59 | vals[i] = (1 * vals[i]) % qf.metadata->range; 60 | /*vals[i] = rand() % qf.metadata->range;*/ 61 | /*fprintf(stdout, "%lx\n", vals[i]);*/ 62 | } 63 | 64 | /* Insert keys in the CQF */ 65 | for (uint64_t i = 0; i < nvals; i++) { 66 | int ret = qf_insert(&qf, vals[i], 0, key_count, QF_NO_LOCK); 67 | if (ret < 0) { 68 | fprintf(stderr, "failed insertion for key: %lx %d.\n", vals[i], 50); 69 | if (ret == QF_NO_SPACE) 70 | fprintf(stderr, "CQF is full.\n"); 71 | else if (ret == QF_COULDNT_LOCK) 72 | fprintf(stderr, "TRY_ONCE_LOCK failed.\n"); 73 | else 74 | fprintf(stderr, "Does not recognise return value.\n"); 75 | abort(); 76 | } 77 | } 78 | 79 | /* Lookup inserted keys and counts. */ 80 | for (uint64_t i = 0; i < nvals; i++) { 81 | uint64_t count = qf_count_key_value(&qf, vals[i], 0, 0); 82 | if (count < key_count) { 83 | fprintf(stderr, "failed lookup after insertion for %lx %ld.\n", vals[i], 84 | count); 85 | abort(); 86 | } 87 | } 88 | 89 | #if 0 90 | for (uint64_t i = 0; i < nvals; i++) { 91 | uint64_t count = qf_count_key_value(&qf, vals[i], 0, 0); 92 | if (count < key_count) { 93 | fprintf(stderr, "failed lookup during deletion for %lx %ld.\n", vals[i], 94 | count); 95 | abort(); 96 | } 97 | if (count > 0) { 98 | /*fprintf(stdout, "deleting: %lx\n", vals[i]);*/ 99 | qf_delete_key_value(&qf, vals[i], 0, QF_NO_LOCK); 100 | /*qf_dump(&qf);*/ 101 | uint64_t cnt = qf_count_key_value(&qf, vals[i], 0, 0); 102 | if (cnt > 0) { 103 | fprintf(stderr, "failed lookup after deletion for %lx %ld.\n", vals[i], 104 | cnt); 105 | abort(); 106 | } 107 | } 108 | } 109 | #endif 110 | 111 | /* Write the CQF to disk and read it back. */ 112 | char filename[] = "mycqf_serialized.cqf"; 113 | fprintf(stdout, "Serializing the CQF to disk.\n"); 114 | uint64_t total_size = qf_serialize(&qf, filename); 115 | if (total_size < sizeof(qfmetadata) + qf.metadata->total_size_in_bytes) { 116 | fprintf(stderr, "CQF serialization failed.\n"); 117 | abort(); 118 | } 119 | qf_deletefile(&qf); 120 | 121 | QF file_qf; 122 | fprintf(stdout, "Reading the CQF from disk.\n"); 123 | if (!qf_deserialize(&file_qf, filename)) { 124 | fprintf(stderr, "Can't initialize the CQF from file: %s.\n", filename); 125 | abort(); 126 | } 127 | for (uint64_t i = 0; i < nvals; i++) { 128 | uint64_t count = qf_count_key_value(&file_qf, vals[i], 0, 0); 129 | if (count < key_count) { 130 | fprintf(stderr, "failed lookup in file based CQF for %lx %ld.\n", 131 | vals[i], count); 132 | abort(); 133 | } 134 | } 135 | 136 | fprintf(stdout, "Testing iterator and unique indexes.\n"); 137 | /* Initialize an iterator and validate counts. */ 138 | QFi qfi; 139 | qf_iterator_from_position(&file_qf, &qfi, 0); 140 | QF unique_idx; 141 | if (!qf_malloc(&unique_idx, file_qf.metadata->nslots, nhashbits, 0, 142 | QF_HASH_INVERTIBLE, 0)) { 143 | fprintf(stderr, "Can't allocate set.\n"); 144 | abort(); 145 | } 146 | 147 | int64_t last_index = -1; 148 | int i = 0; 149 | qf_iterator_from_position(&file_qf, &qfi, 0); 150 | while(!qfi_end(&qfi)) { 151 | uint64_t key, value, count; 152 | qfi_get_key(&qfi, &key, &value, &count); 153 | if (count < key_count) { 154 | fprintf(stderr, "Failed lookup during iteration for: %lx. Returned count: %ld\n", 155 | key, count); 156 | abort(); 157 | } 158 | int64_t idx = qf_get_unique_index(&file_qf, key, value, 0); 159 | if (idx == QF_DOESNT_EXIST) { 160 | fprintf(stderr, "Failed lookup for unique index for: %lx. index: %ld\n", 161 | key, idx); 162 | abort(); 163 | } 164 | if (idx <= last_index) { 165 | fprintf(stderr, "Unique indexes not strictly increasing.\n"); 166 | abort(); 167 | } 168 | last_index = idx; 169 | if (qf_count_key_value(&unique_idx, key, 0, 0) > 0) { 170 | fprintf(stderr, "Failed unique index for: %lx. index: %ld\n", 171 | key, idx); 172 | abort(); 173 | } 174 | qf_insert(&unique_idx, key, 0, 1, QF_NO_LOCK); 175 | int64_t newindex = qf_get_unique_index(&unique_idx, key, 0, 0); 176 | if (idx < newindex) { 177 | fprintf(stderr, "Index weirdness: index %dth key %ld was at %ld, is now at %ld\n", 178 | i, key, idx, newindex); 179 | //abort(); 180 | } 181 | 182 | i++; 183 | qfi_next(&qfi); 184 | } 185 | 186 | /* remove some counts (or keys) and validate. */ 187 | fprintf(stdout, "Testing remove/delete_key.\n"); 188 | for (uint64_t i = 0; i < nvals; i++) { 189 | uint64_t count = qf_count_key_value(&file_qf, vals[i], 0, 0); 190 | /*if (count < key_count) {*/ 191 | /*fprintf(stderr, "failed lookup during deletion for %lx %ld.\n", vals[i],*/ 192 | /*count);*/ 193 | /*abort();*/ 194 | /*}*/ 195 | int ret = qf_delete_key_value(&file_qf, vals[i], 0, QF_NO_LOCK); 196 | count = qf_count_key_value(&file_qf, vals[i], 0, 0); 197 | if (count > 0) { 198 | if (ret < 0) { 199 | fprintf(stderr, "failed deletion for %lx %ld ret code: %d.\n", 200 | vals[i], count, ret); 201 | abort(); 202 | } 203 | uint64_t new_count = qf_count_key_value(&file_qf, vals[i], 0, 0); 204 | if (new_count > 0) { 205 | fprintf(stderr, "delete key failed for %lx %ld new count: %ld.\n", 206 | vals[i], count, new_count); 207 | abort(); 208 | } 209 | } 210 | } 211 | 212 | qf_deletefile(&file_qf); 213 | 214 | fprintf(stdout, "Validated the CQF.\n"); 215 | } 216 | 217 | -------------------------------------------------------------------------------- /include/gqf.h: -------------------------------------------------------------------------------- 1 | /* 2 | * ============================================================================ 3 | * 4 | * Authors: Prashant Pandey 5 | * Rob Johnson 6 | * 7 | * ============================================================================ 8 | */ 9 | 10 | #ifndef _GQF_H_ 11 | #define _GQF_H_ 12 | 13 | #include 14 | #include 15 | 16 | #ifdef __cplusplus 17 | extern "C" { 18 | #endif 19 | 20 | typedef struct quotient_filter quotient_filter; 21 | typedef quotient_filter QF; 22 | 23 | /* CQFs support three hashing modes: 24 | 25 | - DEFAULT uses a hash that may introduce false positives, but 26 | this can be useful when inserting large keys that need to be 27 | hashed down to a small fingerprint. With this type of hash, 28 | you can iterate over the hash values of all the keys in the 29 | CQF, but you cannot iterate over the keys themselves. 30 | 31 | - INVERTIBLE has no false positives, but the size of the hash 32 | output must be the same as the size of the hash input, 33 | e.g. 17-bit keys hashed to 17-bit outputs. So this mode is 34 | generally only useful when storing small keys in the CQF. With 35 | this hashing mode, you can use iterators to enumerate both all 36 | the hashes in the CQF, or all the keys. 37 | 38 | - NONE, for when you've done the hashing yourself. WARNING: the 39 | CQF can exhibit very bad performance if you insert a skewed 40 | distribution of intputs. 41 | */ 42 | 43 | enum qf_hashmode { 44 | QF_HASH_DEFAULT, 45 | QF_HASH_INVERTIBLE, 46 | QF_HASH_NONE 47 | }; 48 | 49 | /* The CQF supports concurrent insertions and queries. Only the 50 | portion of the CQF being examined or modified is locked, so it 51 | supports high throughput even with many threads. 52 | 53 | The CQF operations support 3 locking modes: 54 | 55 | - NO_LOCK: for single-threaded applications or applications 56 | that do their own concurrency management. 57 | 58 | - WAIT_FOR_LOCK: Spin until you get the lock, then do the query 59 | or update. 60 | 61 | - TRY_ONCE_LOCK: If you can't grab the lock on the first try, 62 | return with an error code. 63 | */ 64 | #define QF_NO_LOCK (0x01) 65 | #define QF_TRY_ONCE_LOCK (0x02) 66 | #define QF_WAIT_FOR_LOCK (0x04) 67 | 68 | /* It is sometimes useful to insert a key that has already been 69 | hashed. */ 70 | #define QF_KEY_IS_HASH (0x08) 71 | 72 | /****************************************** 73 | The CQF defines low-level constructor and destructor operations 74 | that are designed to enable the application to manage the memory 75 | used by the CQF. 76 | *******************************************/ 77 | 78 | /* 79 | * Create an empty CQF in "buffer". If there is not enough space at 80 | * buffer then it will return the total size needed in bytes to 81 | * initialize the CQF. This function takes ownership of buffer. 82 | */ 83 | uint64_t qf_init(QF *qf, uint64_t nslots, uint64_t key_bits, uint64_t 84 | value_bits, enum qf_hashmode hash, uint32_t seed, void* 85 | buffer, uint64_t buffer_len); 86 | 87 | /* Create a CQF in "buffer". Note that this does not initialize the 88 | contents of bufferss Use this function if you have read a CQF, e.g. 89 | off of disk or network, and want to begin using that stream of 90 | bytes as a CQF. The CQF takes ownership of buffer. */ 91 | uint64_t qf_use(QF* qf, void* buffer, uint64_t buffer_len); 92 | 93 | /* Destroy this CQF. Returns a pointer to the memory that the CQF was 94 | using (i.e. passed into qf_init or qf_use) so that the application 95 | can release that memory. */ 96 | void *qf_destroy(QF *qf); 97 | 98 | /* Allocate a new CQF using "nslots" at "buffer" and copy elements from "qf" 99 | * into it. 100 | * If there is not enough space at buffer then it will return the total size 101 | * needed in bytes to initialize the new CQF. 102 | * */ 103 | uint64_t qf_resize(QF* qf, uint64_t nslots, void* buffer, uint64_t 104 | buffer_len); 105 | 106 | /*********************************** 107 | The following convenience functions create and destroy CQFs by 108 | using malloc/free to obtain and release the memory for the CQF. 109 | ************************************/ 110 | 111 | /* Initialize the CQF and allocate memory for the CQF. */ 112 | bool qf_malloc(QF *qf, uint64_t nslots, uint64_t key_bits, uint64_t 113 | value_bits, enum qf_hashmode hash, uint32_t seed); 114 | 115 | bool qf_free(QF *qf); 116 | 117 | /* Resize the QF to the specified number of slots. Uses malloc() to 118 | * obtain the new memory, and calls free() on the old memory. 119 | * Return value: 120 | * >= 0: number of keys copied during resizing. 121 | * */ 122 | int64_t qf_resize_malloc(QF *qf, uint64_t nslots); 123 | 124 | /* Turn on automatic resizing. Resizing is performed by calling 125 | qf_resize_malloc, so the CQF must meet the requirements of that 126 | function. */ 127 | void qf_set_auto_resize(QF* qf, bool enabled); 128 | 129 | /*********************************** 130 | Functions for modifying the CQF. 131 | ***********************************/ 132 | 133 | #define QF_NO_SPACE (-1) 134 | #define QF_COULDNT_LOCK (-2) 135 | #define QF_DOESNT_EXIST (-3) 136 | 137 | /* Increment the counter for this key/value pair by count. 138 | * Return value: 139 | * >= 0: distance from the home slot to the slot in which the key is 140 | * inserted (or 0 if count == 0). 141 | * == QF_NO_SPACE: the CQF has reached capacity. 142 | * == QF_COULDNT_LOCK: TRY_ONCE_LOCK has failed to acquire the lock. 143 | */ 144 | int qf_insert(QF *qf, uint64_t key, uint64_t value, uint64_t count, uint8_t 145 | flags); 146 | 147 | /* Set the counter for this key/value pair to count. 148 | Return value: Same as qf_insert. 149 | Returns 0 if new count is equal to old count. 150 | */ 151 | int qf_set_count(QF *qf, uint64_t key, uint64_t value, uint64_t count, 152 | uint8_t flags); 153 | 154 | /* Remove up to count instances of this key/value combination. 155 | * If the CQF contains <= count instances, then they will all be 156 | * removed, which is not an error. 157 | * Return value: 158 | * >= 0: number of slots freed. 159 | * == QF_DOESNT_EXIST: Specified item did not exist. 160 | * == QF_COULDNT_LOCK: TRY_ONCE_LOCK has failed to acquire the lock. 161 | */ 162 | int qf_remove(QF *qf, uint64_t key, uint64_t value, uint64_t count, uint8_t 163 | flags); 164 | 165 | /* Remove all instances of this key/value pair. */ 166 | int qf_delete_key_value(QF *qf, uint64_t key, uint64_t value, uint8_t flags); 167 | 168 | /* Remove all instances of this key. */ 169 | /* NOT IMPLEMENTED YET. */ 170 | //void qf_delete_key(QF *qf, uint64_t key); 171 | 172 | /* Replace the association (key, oldvalue, count) with the association 173 | (key, newvalue, count). If there is already an association (key, 174 | newvalue, count'), then the two associations will be merged and 175 | their counters will be summed, resulting in association (key, 176 | newvalue, count' + count). */ 177 | /* NOT IMPLEMENTED YET. */ 178 | //void qf_replace(QF *qf, uint64_t key, uint64_t oldvalue, uint64_t newvalue); 179 | 180 | /**************************************** 181 | Query functions 182 | ****************************************/ 183 | 184 | /* Lookup the value associated with key. Returns the count of that 185 | key/value pair in the QF. If it returns 0, then, the key is not 186 | present in the QF. Only returns the first value associated with key 187 | in the QF. If you want to see others, use an iterator. 188 | May return QF_COULDNT_LOCK if called with QF_TRY_LOCK. */ 189 | uint64_t qf_query(const QF *qf, uint64_t key, uint64_t *value, uint8_t 190 | flags); 191 | 192 | /* Return the number of times key has been inserted, with any value, 193 | into qf. */ 194 | /* NOT IMPLEMENTED YET. */ 195 | //uint64_t qf_count_key(const QF *qf, uint64_t key); 196 | 197 | /* Return the number of times key has been inserted, with the given 198 | value, into qf. 199 | May return QF_COULDNT_LOCK if called with QF_TRY_LOCK. */ 200 | uint64_t qf_count_key_value(const QF *qf, uint64_t key, uint64_t value, 201 | uint8_t flags); 202 | 203 | /* Returns a unique index corresponding to the key in the CQF. Note 204 | that this can change if further modifications are made to the 205 | CQF. 206 | 207 | If the key is not found then returns QF_DOESNT_EXIST. 208 | May return QF_COULDNT_LOCK if called with QF_TRY_LOCK. 209 | */ 210 | int64_t qf_get_unique_index(const QF *qf, uint64_t key, uint64_t value, 211 | uint8_t flags); 212 | 213 | 214 | /**************************************** 215 | Metadata accessors. 216 | ****************************************/ 217 | 218 | /* Hashing info */ 219 | enum qf_hashmode qf_get_hashmode(const QF *qf); 220 | uint64_t qf_get_hash_seed(const QF *qf); 221 | __uint128_t qf_get_hash_range(const QF *qf); 222 | 223 | /* Space usage info. */ 224 | bool qf_is_auto_resize_enabled(const QF *qf); 225 | uint64_t qf_get_total_size_in_bytes(const QF *qf); 226 | uint64_t qf_get_nslots(const QF *qf); 227 | uint64_t qf_get_num_occupied_slots(const QF *qf); 228 | 229 | /* Bit-sizes info. */ 230 | uint64_t qf_get_num_key_bits(const QF *qf); 231 | uint64_t qf_get_num_value_bits(const QF *qf); 232 | uint64_t qf_get_num_key_remainder_bits(const QF *qf); 233 | uint64_t qf_get_bits_per_slot(const QF *qf); 234 | 235 | /* Number of (distinct) key-value pairs. */ 236 | uint64_t qf_get_sum_of_counts(const QF *qf); 237 | uint64_t qf_get_num_distinct_key_value_pairs(const QF *qf); 238 | 239 | void qf_sync_counters(const QF *qf); 240 | 241 | /**************************************** 242 | Iterators 243 | *****************************************/ 244 | 245 | typedef struct quotient_filter_iterator quotient_filter_iterator; 246 | typedef quotient_filter_iterator QFi; 247 | 248 | #define QF_INVALID (-4) 249 | #define QFI_INVALID (-5) 250 | 251 | /* Initialize an iterator starting at the given position. 252 | * Return value: 253 | * >= 0: iterator is initialized and positioned at the returned slot. 254 | * = QFI_INVALID: iterator has reached end. 255 | */ 256 | int64_t qf_iterator_from_position(const QF *qf, QFi *qfi, uint64_t position); 257 | 258 | /* Initialize an iterator and position it at the smallest index 259 | * containing a key-value pair whose hash is greater than or equal 260 | * to the specified key-value pair. 261 | * Return value: 262 | * >= 0: iterator is initialized and position at the returned slot. 263 | * = QFI_INVALID: iterator has reached end. 264 | */ 265 | int64_t qf_iterator_from_key_value(const QF *qf, QFi *qfi, uint64_t key, 266 | uint64_t value, uint8_t flags); 267 | 268 | /* Requires that the hash mode of the CQF is INVERTIBLE or NONE. 269 | * If the hash mode is DEFAULT then returns QF_INVALID. 270 | * Return value: 271 | * = 0: Iterator is still valid. 272 | * = QFI_INVALID: iterator has reached end. 273 | * = QF_INVALID: hash mode is QF_DEFAULT_HASH 274 | */ 275 | int qfi_get_key(const QFi *qfi, uint64_t *key, uint64_t *value, uint64_t 276 | *count); 277 | 278 | /* Return value: 279 | * = 0: Iterator is still valid. 280 | * = QFI_INVALID: iterator has reached end. 281 | */ 282 | int qfi_get_hash(const QFi *qfi, uint64_t *hash, uint64_t *value, uint64_t 283 | *count); 284 | 285 | /* Advance to next entry. 286 | * Return value: 287 | * = 0: Iterator is still valid. 288 | * = QFI_INVALID: iterator has reached end. 289 | */ 290 | int qfi_next(QFi *qfi); 291 | 292 | /* Check to see if the if the end of the QF */ 293 | bool qfi_end(const QFi *qfi); 294 | 295 | /************************************ 296 | Miscellaneous convenience functions. 297 | *************************************/ 298 | 299 | /* Reset the CQF to an empty filter. */ 300 | void qf_reset(QF *qf); 301 | 302 | /* The caller should call qf_init on the dest QF using the same 303 | * parameters as the src QF before calling this function. Note: src 304 | * and dest must be exactly the same, including number of slots. */ 305 | void qf_copy(QF *dest, const QF *src); 306 | 307 | /* merge two QFs into the third one. Note: merges with any existing 308 | values in qfc. */ 309 | void qf_merge(const QF *qfa, const QF *qfb, QF *qfc); 310 | 311 | /* merge multiple QFs into the final QF one. */ 312 | void qf_multi_merge(const QF *qf_arr[], int nqf, QF *qfr); 313 | 314 | /* find cosine similarity between two QFs. */ 315 | uint64_t qf_inner_product(const QF *qfa, const QF *qfb); 316 | 317 | /* square of the L_2 norm of a QF (i.e. sum of squares of counts of 318 | all items in the CQF). */ 319 | uint64_t qf_magnitude(const QF *qf); 320 | 321 | /*********************************** 322 | Debugging functions. 323 | ************************************/ 324 | 325 | void qf_dump(const QF *); 326 | void qf_dump_metadata(const QF *qf); 327 | 328 | 329 | #ifdef __cplusplus 330 | } 331 | #endif 332 | 333 | #endif /* _GQF_H_ */ 334 | -------------------------------------------------------------------------------- /src/gqf_file.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ============================================================================ 3 | * 4 | * Authors: Prashant Pandey 5 | * Rob Johnson 6 | * 7 | * ============================================================================ 8 | */ 9 | 10 | #include 11 | #if 0 12 | # include 13 | #else 14 | # define assert(x) 15 | #endif 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | 26 | #include "hashutil.h" 27 | #include "gqf.h" 28 | #include "gqf_int.h" 29 | #include "gqf_file.h" 30 | 31 | #define NUM_SLOTS_TO_LOCK (1ULL<<16) 32 | 33 | bool qf_initfile(QF *qf, uint64_t nslots, uint64_t key_bits, uint64_t 34 | value_bits, enum qf_hashmode hash, uint32_t seed, const char* 35 | filename) 36 | { 37 | uint64_t total_num_bytes = qf_init(qf, nslots, key_bits, value_bits, hash, 38 | seed, NULL, 0); 39 | 40 | int ret; 41 | qf->runtimedata = (qfruntime *)calloc(sizeof(qfruntime), 1); 42 | if (qf->runtimedata == NULL) { 43 | perror("Couldn't allocate memory for runtime data."); 44 | exit(EXIT_FAILURE); 45 | } 46 | qf->runtimedata->f_info.fd = open(filename, O_RDWR | O_CREAT | O_TRUNC, 47 | S_IRWXU); 48 | if (qf->runtimedata->f_info.fd < 0) { 49 | perror("Couldn't open file."); 50 | exit(EXIT_FAILURE); 51 | } 52 | ret = posix_fallocate(qf->runtimedata->f_info.fd, 0, total_num_bytes); 53 | if (ret < 0) { 54 | perror("Couldn't fallocate file:\n"); 55 | exit(EXIT_FAILURE); 56 | } 57 | qf->metadata = (qfmetadata *)mmap(NULL, total_num_bytes, PROT_READ | 58 | PROT_WRITE, MAP_SHARED, 59 | qf->runtimedata->f_info.fd, 0); 60 | if (qf->metadata == MAP_FAILED) { 61 | perror("Couldn't mmap metadata."); 62 | exit(EXIT_FAILURE); 63 | } 64 | ret = madvise(qf->metadata, total_num_bytes, MADV_RANDOM); 65 | if (ret < 0) { 66 | perror("Couldn't fallocate file."); 67 | exit(EXIT_FAILURE); 68 | } 69 | qf->blocks = (qfblock *)(qf->metadata + 1); 70 | 71 | uint64_t init_size = qf_init(qf, nslots, key_bits, value_bits, hash, seed, 72 | qf->metadata, total_num_bytes); 73 | qf->runtimedata->f_info.filepath = (char *)malloc(strlen(filename) + 1); 74 | if (qf->runtimedata->f_info.filepath == NULL) { 75 | perror("Couldn't allocate memory for runtime f_info filepath."); 76 | exit(EXIT_FAILURE); 77 | } 78 | strcpy(qf->runtimedata->f_info.filepath, filename); 79 | /* initialize container resize */ 80 | qf->runtimedata->container_resize = qf_resize_file; 81 | 82 | if (init_size == total_num_bytes) 83 | return true; 84 | else 85 | return false; 86 | } 87 | 88 | uint64_t qf_usefile(QF* qf, const char* filename, int flag) 89 | { 90 | struct stat sb; 91 | int ret; 92 | 93 | int open_flag = 0, mmap_flag = 0; 94 | if (flag == QF_USEFILE_READ_ONLY) { 95 | open_flag = O_RDONLY; 96 | mmap_flag = PROT_READ; 97 | } else if(flag == QF_USEFILE_READ_WRITE) { 98 | open_flag = O_RDWR; 99 | mmap_flag = PROT_READ | PROT_WRITE; 100 | } else { 101 | fprintf(stderr, "Wrong flag specified.\n"); 102 | return 0; 103 | } 104 | 105 | qf->runtimedata = (qfruntime *)calloc(sizeof(qfruntime), 1); 106 | if (qf->runtimedata == NULL) { 107 | perror("Couldn't allocate memory for runtime data."); 108 | exit(EXIT_FAILURE); 109 | } 110 | qf->runtimedata->f_info.fd = open(filename, open_flag); 111 | if (qf->runtimedata->f_info.fd < 0) { 112 | perror("Couldn't open file."); 113 | exit(EXIT_FAILURE); 114 | } 115 | 116 | ret = fstat (qf->runtimedata->f_info.fd, &sb); 117 | if ( ret < 0) { 118 | perror ("fstat"); 119 | exit(EXIT_FAILURE); 120 | } 121 | 122 | if (!S_ISREG (sb.st_mode)) { 123 | fprintf (stderr, "%s is not a file.\n", filename); 124 | exit(EXIT_FAILURE); 125 | } 126 | 127 | qf->runtimedata->f_info.filepath = (char *)malloc(strlen(filename) + 1); 128 | if (qf->runtimedata->f_info.filepath == NULL) { 129 | perror("Couldn't allocate memory for runtime f_info filepath."); 130 | exit(EXIT_FAILURE); 131 | } 132 | strcpy(qf->runtimedata->f_info.filepath, filename); 133 | /* initialize container resize */ 134 | qf->runtimedata->container_resize = qf_resize_file; 135 | /* initialize all the locks to 0 */ 136 | qf->runtimedata->metadata_lock = 0; 137 | qf->runtimedata->locks = (volatile int *)calloc(qf->runtimedata->num_locks, 138 | sizeof(volatile int)); 139 | if (qf->runtimedata->locks == NULL) { 140 | perror("Couldn't allocate memory for runtime locks."); 141 | exit(EXIT_FAILURE); 142 | } 143 | #ifdef LOG_WAIT_TIME 144 | qf->runtimedata->wait_times = (wait_time_data* )calloc(qf->runtimedata->num_locks+1, 145 | sizeof(wait_time_data)); 146 | if (qf->runtimedata->wait_times == NULL) { 147 | perror("Couldn't allocate memory for runtime wait_times."); 148 | exit(EXIT_FAILURE); 149 | } 150 | #endif 151 | qf->metadata = (qfmetadata *)mmap(NULL, sb.st_size, mmap_flag, MAP_SHARED, 152 | qf->runtimedata->f_info.fd, 0); 153 | if (qf->metadata == MAP_FAILED) { 154 | perror("Couldn't mmap metadata."); 155 | exit(EXIT_FAILURE); 156 | } 157 | if (qf->metadata->magic_endian_number != MAGIC_NUMBER) { 158 | fprintf(stderr, "Can't read the CQF. It was written on a different endian machine."); 159 | exit(EXIT_FAILURE); 160 | } 161 | qf->blocks = (qfblock *)(qf->metadata + 1); 162 | 163 | pc_init(&qf->runtimedata->pc_nelts, (int64_t*)&qf->metadata->nelts, 8, 100); 164 | pc_init(&qf->runtimedata->pc_ndistinct_elts, (int64_t*)&qf->metadata->ndistinct_elts, 8, 100); 165 | pc_init(&qf->runtimedata->pc_noccupied_slots, (int64_t*)&qf->metadata->noccupied_slots, 8, 100); 166 | 167 | return sizeof(qfmetadata) + qf->metadata->total_size_in_bytes; 168 | } 169 | 170 | int64_t qf_resize_file(QF *qf, uint64_t nslots) 171 | { 172 | // calculate the new filename length 173 | int new_filename_len = strlen(qf->runtimedata->f_info.filepath) + 1; 174 | new_filename_len += 13; // To have an underscore and the nslots. 175 | char *new_filename = (char *)malloc(new_filename_len); 176 | if (new_filename == NULL) { 177 | perror("Couldn't allocate memory for filename buffer during resize."); 178 | exit(EXIT_FAILURE); 179 | } 180 | // Create new filename 181 | uint64_t ret = snprintf(new_filename, new_filename_len, "%s_%ld", 182 | qf->runtimedata->f_info.filepath, nslots); 183 | if (ret <= strlen(qf->runtimedata->f_info.filepath)) { 184 | fprintf(stderr, "Wrong new filename created!"); 185 | return -1; 186 | } 187 | 188 | QF new_qf; 189 | if (!qf_initfile(&new_qf, nslots, qf->metadata->key_bits, 190 | qf->metadata->value_bits, qf->metadata->hash_mode, 191 | qf->metadata->seed, new_filename)) 192 | return false; 193 | if (qf->runtimedata->auto_resize) 194 | qf_set_auto_resize(&new_qf, true); 195 | 196 | // copy keys from qf into new_qf 197 | QFi qfi; 198 | qf_iterator_from_position(qf, &qfi, 0); 199 | int64_t ret_numkeys = 0; 200 | do { 201 | uint64_t key, value, count; 202 | qfi_get_hash(&qfi, &key, &value, &count); 203 | qfi_next(&qfi); 204 | int ret = qf_insert(&new_qf, key, value, count, QF_NO_LOCK | QF_KEY_IS_HASH); 205 | if (ret < 0) { 206 | fprintf(stderr, "Failed to insert key: %ld into the new CQF.\n", key); 207 | return ret; 208 | } 209 | ret_numkeys++; 210 | } while(!qfi_end(&qfi)); 211 | 212 | // Copy old QF path in temp. 213 | char *path = (char *)malloc(strlen(qf->runtimedata->f_info.filepath) + 1); 214 | if (qf->runtimedata->f_info.filepath == NULL) { 215 | perror("Couldn't allocate memory for runtime f_info filepath."); 216 | exit(EXIT_FAILURE); 217 | } 218 | strcpy(path, qf->runtimedata->f_info.filepath); 219 | 220 | // delete old QF 221 | qf_deletefile(qf); 222 | memcpy(qf, &new_qf, sizeof(QF)); 223 | 224 | rename(qf->runtimedata->f_info.filepath, path); 225 | strcpy(qf->runtimedata->f_info.filepath, path); 226 | 227 | return ret_numkeys; 228 | } 229 | 230 | bool qf_closefile(QF* qf) 231 | { 232 | assert(qf->metadata != NULL); 233 | int fd = qf->runtimedata->f_info.fd; 234 | qf_sync_counters(qf); 235 | uint64_t size = qf->metadata->total_size_in_bytes + sizeof(qfmetadata); 236 | void *buffer = qf_destroy(qf); 237 | if (buffer != NULL) { 238 | munmap(buffer, size); 239 | close(fd); 240 | return true; 241 | } 242 | 243 | return false; 244 | } 245 | 246 | bool qf_deletefile(QF* qf) 247 | { 248 | assert(qf->metadata != NULL); 249 | char *path = (char *)malloc(strlen(qf->runtimedata->f_info.filepath) + 1); 250 | if (qf->runtimedata->f_info.filepath == NULL) { 251 | perror("Couldn't allocate memory for runtime f_info filepath."); 252 | exit(EXIT_FAILURE); 253 | } 254 | strcpy(path, qf->runtimedata->f_info.filepath); 255 | if (qf_closefile(qf)) { 256 | remove(path); 257 | free(path); 258 | return true; 259 | } 260 | 261 | return false; 262 | } 263 | 264 | uint64_t qf_serialize(const QF *qf, const char *filename) 265 | { 266 | FILE *fout; 267 | fout = fopen(filename, "wb+"); 268 | if (fout == NULL) { 269 | perror("Error opening file for serializing."); 270 | exit(EXIT_FAILURE); 271 | } 272 | qf_sync_counters(qf); 273 | fwrite(qf->metadata, sizeof(qfmetadata), 1, fout); 274 | fwrite(qf->blocks, qf->metadata->total_size_in_bytes, 1, fout); 275 | fclose(fout); 276 | 277 | return sizeof(qfmetadata) + qf->metadata->total_size_in_bytes; 278 | } 279 | 280 | uint64_t qf_deserialize(QF *qf, const char *filename) 281 | { 282 | FILE *fin; 283 | fin = fopen(filename, "rb"); 284 | if (fin == NULL) { 285 | perror("Error opening file for deserializing."); 286 | exit(EXIT_FAILURE); 287 | } 288 | 289 | qf->runtimedata = (qfruntime *)calloc(sizeof(qfruntime), 1); 290 | if (qf->runtimedata == NULL) { 291 | perror("Couldn't allocate memory for runtime data."); 292 | exit(EXIT_FAILURE); 293 | } 294 | qf->metadata = (qfmetadata *)calloc(sizeof(qfmetadata), 1); 295 | if (qf->metadata == NULL) { 296 | perror("Couldn't allocate memory for metadata."); 297 | exit(EXIT_FAILURE); 298 | } 299 | int ret = fread(qf->metadata, sizeof(qfmetadata), 1, fin); 300 | if (ret < 1) { 301 | perror("Couldn't read metadata from file."); 302 | exit(EXIT_FAILURE); 303 | } 304 | if (qf->metadata->magic_endian_number != MAGIC_NUMBER) { 305 | fprintf(stderr, "Can't read the CQF. It was written on a different endian machine."); 306 | exit(EXIT_FAILURE); 307 | } 308 | 309 | qf->runtimedata->f_info.filepath = (char *)malloc(strlen(filename) + 1); 310 | if (qf->runtimedata->f_info.filepath == NULL) { 311 | perror("Couldn't allocate memory for runtime f_info filepath."); 312 | exit(EXIT_FAILURE); 313 | } 314 | strcpy(qf->runtimedata->f_info.filepath, filename); 315 | /* initlialize the locks in the QF */ 316 | qf->runtimedata->num_locks = (qf->metadata->xnslots/NUM_SLOTS_TO_LOCK)+2; 317 | qf->runtimedata->metadata_lock = 0; 318 | /* initialize all the locks to 0 */ 319 | qf->runtimedata->locks = (volatile int *)calloc(qf->runtimedata->num_locks, 320 | sizeof(volatile int)); 321 | if (qf->runtimedata->locks == NULL) { 322 | perror("Couldn't allocate memory for runtime locks."); 323 | exit(EXIT_FAILURE); 324 | } 325 | qf->metadata = (qfmetadata *)realloc(qf->metadata, 326 | qf->metadata->total_size_in_bytes + 327 | sizeof(qfmetadata)); 328 | if (qf->metadata == NULL) { 329 | perror("Couldn't allocate memory for metadata."); 330 | exit(EXIT_FAILURE); 331 | } 332 | qf->blocks = (qfblock *)(qf->metadata + 1); 333 | if (qf->blocks == NULL) { 334 | perror("Couldn't allocate memory for blocks."); 335 | exit(EXIT_FAILURE); 336 | } 337 | ret = fread(qf->blocks, qf->metadata->total_size_in_bytes, 1, fin); 338 | if (ret < 1) { 339 | perror("Couldn't read metadata from file."); 340 | exit(EXIT_FAILURE); 341 | } 342 | fclose(fin); 343 | 344 | pc_init(&qf->runtimedata->pc_nelts, (int64_t*)&qf->metadata->nelts, 8, 100); 345 | pc_init(&qf->runtimedata->pc_ndistinct_elts, (int64_t*)&qf->metadata->ndistinct_elts, 8, 100); 346 | pc_init(&qf->runtimedata->pc_noccupied_slots, (int64_t*)&qf->metadata->noccupied_slots, 8, 100); 347 | 348 | return sizeof(qfmetadata) + qf->metadata->total_size_in_bytes; 349 | } 350 | 351 | #define MADVISE_GRANULARITY (32) 352 | #define ROUND_TO_PAGE_GROUP(p) ((char *)(((intptr_t)(p)) - (((intptr_t)(p)) % (page_size * MADVISE_GRANULARITY)))) 353 | 354 | static void make_madvise_calls(const QF *qf, uint64_t oldrun, uint64_t newrun) 355 | { 356 | int page_size = sysconf(_SC_PAGESIZE); 357 | 358 | char * oldblock = (char *)get_block(qf, oldrun / QF_SLOTS_PER_BLOCK); 359 | char * newblock = (char *)get_block(qf, newrun / QF_SLOTS_PER_BLOCK); 360 | 361 | oldblock = ROUND_TO_PAGE_GROUP(oldblock); 362 | newblock = ROUND_TO_PAGE_GROUP(newblock); 363 | 364 | if (oldblock < (char *)qf->blocks) 365 | return; 366 | 367 | while (oldblock < newblock) { 368 | madvise(oldblock, page_size * MADVISE_GRANULARITY, MADV_DONTNEED); 369 | oldblock += page_size * MADVISE_GRANULARITY; 370 | } 371 | } 372 | 373 | /* This wraps qfi_next, using madvise(DONTNEED) to reduce our RSS. 374 | Only valid on mmapped QFs, i.e. cqfs from qf_initfile and 375 | qf_usefile. */ 376 | int qfi_next_madvise(QFi *qfi) 377 | { 378 | uint64_t oldrun = qfi->run; 379 | int result = qfi_next(qfi); 380 | uint64_t newrun = qfi->run; 381 | 382 | make_madvise_calls(qfi->qf, oldrun, newrun); 383 | 384 | return result; 385 | } 386 | 387 | /* Furthermore, you can call this immediately after constructing the 388 | qfi to call madvise(DONTNEED) on the portion of the cqf up to the 389 | first element visited by the qfi. */ 390 | int qfi_initial_madvise(QFi *qfi) 391 | { 392 | make_madvise_calls(qfi->qf, 0, qfi->run); 393 | return 0; 394 | } 395 | -------------------------------------------------------------------------------- /src/bm.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ============================================================================ 3 | * 4 | * Authors: Prashant Pandey 5 | * Rob Johnson 6 | * 7 | * ============================================================================ 8 | */ 9 | 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #include "include/zipf.h" 22 | #include "include/gqf_wrapper.h" 23 | 24 | #ifndef USE_MYRANDOM 25 | #define RFUN random 26 | #define RSEED srandom 27 | #else 28 | #define RFUN myrandom 29 | #define RSEED mysrandom 30 | 31 | static unsigned int m_z = 1; 32 | static unsigned int m_w = 1; 33 | static void mysrandom (unsigned int seed) { 34 | m_z = seed; 35 | m_w = (seed<<16) + (seed >> 16); 36 | } 37 | 38 | static long myrandom() 39 | { 40 | m_z = 36969 * (m_z & 65535) + (m_z >> 16); 41 | m_w = 18000 * (m_w & 65535) + (m_w >> 16); 42 | return ((m_z << 16) + m_w) % 0x7FFFFFFF; 43 | } 44 | #endif 45 | 46 | static float tdiff (struct timeval *start, struct timeval *end) { 47 | return (end->tv_sec-start->tv_sec) +1e-6*(end->tv_usec - start->tv_usec); 48 | } 49 | 50 | uint64_t aes_hash2(uint64_t x) 51 | { 52 | const uint64_t round_keys[32] = 53 | { // These were generated by hashing some randomly chosen files on my laptop 54 | 0x795e15dc8136095f, 0x562371660e56b023, 55 | 0x086bb301d2fb5e87, 0x1fe74f801c68d829, 56 | 0x38a19379fd013357, 0x4a7ef2fca0f840f5, 57 | 0x7d2a08bc58553aef, 0x092cfe1997ab8b53, 58 | 0xd18a0c07dac143d4, 0x64e345ef125a576c, 59 | 0x82807902d8211a1f, 0x6985dc4ddcdaf85d, 60 | 0x2214ff750cf750af, 0xb574b4138eb8a37e, 61 | 0x83e11205e8050dd5, 0x2d62b24118df61eb, 62 | 0x8a16453f8f6b6fa1, 0x260c9e8491474d4f, 63 | 0x06eb44d6042ca8ae, 0x43efbd457306b135, 64 | 0xbfcb7ac89f346686, 0xd00362f30651d0d0, 65 | 0x016d3080768968d5, 0x74b4c2e46ef801de, 66 | 0xf623864a4396fe74, 0x9fc26ea69dad6067, 67 | 0xd0eb2f4e08564d99, 0x408b357725ae0297, 68 | 0xd19efb8e82d22151, 0x58c5ead61b7ecc15, 69 | 0x14e904bc8de1c705, 0x1ef79cd4f487912d 70 | }; 71 | __uint128_t *rks = (__uint128_t *)round_keys; 72 | uint64_t output; 73 | 74 | asm("movq %[input], %%xmm15;" 75 | "pxor %[round_keys0], %%xmm15;" 76 | "aesenc %[round_keys1], %%xmm15;" 77 | "aesenc %[round_keys2], %%xmm15;" 78 | "aesenc %[round_keys3], %%xmm15;" 79 | "aesenc %[round_keys4], %%xmm15;" 80 | "aesenc %[round_keys5], %%xmm15;" 81 | "aesenc %[round_keys6], %%xmm15;" 82 | "aesenc %[round_keys7], %%xmm15;" 83 | "aesenc %[round_keys8], %%xmm15;" 84 | "aesenc %[round_keys9], %%xmm15;" 85 | "aesenclast %[round_keysa], %%xmm15;" 86 | "vmovq %%xmm15, %[output]" 87 | : [output] "=irm" (output) 88 | : [input] "irm" (x), 89 | [round_keys0] "m" (rks[0]), 90 | [round_keys1] "m" (rks[1]), 91 | [round_keys2] "m" (rks[2]), 92 | [round_keys3] "m" (rks[3]), 93 | [round_keys4] "m" (rks[4]), 94 | [round_keys5] "m" (rks[5]), 95 | [round_keys6] "m" (rks[6]), 96 | [round_keys7] "m" (rks[7]), 97 | [round_keys8] "m" (rks[8]), 98 | [round_keys9] "m" (rks[9]), 99 | [round_keysa] "m" (rks[10]) 100 | : "xmm15" 101 | ); 102 | 103 | return output; 104 | } 105 | 106 | static __uint128_t* zipf_gen(long N, long gencount, double s) { 107 | int i; 108 | uint32_t *counts; 109 | __uint128_t *elems; 110 | struct timeval a,b,c; 111 | printf("Generating %ld elements in universe of %ld items with characteristic exponent %f\n", 112 | gencount, N, s); 113 | gettimeofday(&a, NULL); 114 | ZIPFIAN z = create_zipfian(1, N, RFUN); 115 | counts = (uint32_t *)calloc(N, sizeof(counts)); 116 | elems = (__uint128_t *)calloc(gencount, sizeof(elems)); 117 | 118 | gettimeofday(&b, NULL); 119 | printf("Setup time = %0.6fs\n", tdiff(&a, &b)); 120 | for (i=0; imaxoutputs = maxoutputs; 241 | state->nextoutput = 0; 242 | state->params = (app_params *)params; 243 | { 244 | struct timeval tv; 245 | gettimeofday(&tv, NULL); 246 | RSEED(tv.tv_sec + tv.tv_usec); 247 | } 248 | state->outputs = app_file_read(state->params->ip_file, state->params->num); 249 | assert(state->outputs != NULL); 250 | for (i = 0; i < state->params->num; i++) 251 | state->outputs[i] = (1 * state->outputs[i]) % maxvalue; 252 | 253 | return (void *)state; 254 | } 255 | 256 | int app_pregen_gen_rand(void *_state, uint64_t noutputs, __uint128_t *outputs) 257 | { 258 | app_pregen_state *state = (app_pregen_state *)_state; 259 | assert(state->nextoutput + noutputs <= state->maxoutputs); 260 | memcpy(outputs, state->outputs+state->nextoutput, noutputs * sizeof(*state->outputs)); 261 | state->nextoutput += noutputs; 262 | return noutputs; 263 | } 264 | 265 | void *app_pregen_duplicate(void *state) 266 | { 267 | app_pregen_state *newstate = (app_pregen_state *)malloc(sizeof(*newstate)); 268 | assert(newstate); 269 | memcpy(newstate, state, sizeof(*newstate)); 270 | return newstate; 271 | } 272 | 273 | void *zipfian_pregen_init(uint64_t maxoutputs, __uint128_t maxvalue, void *params) 274 | { 275 | uint32_t i; 276 | zipfian_pregen_state *state = (zipfian_pregen_state *)malloc(sizeof(zipfian_pregen_state)); 277 | assert(state != NULL); 278 | 279 | state->maxoutputs = maxoutputs; 280 | state->nextoutput = 0; 281 | state->params = (zipf_params*)params; 282 | { 283 | struct timeval tv; 284 | gettimeofday(&tv, NULL); 285 | RSEED(tv.tv_sec + tv.tv_usec); 286 | } 287 | state->outputs = zipf_gen(state->params->universe, state->params->sample, state->params->exp); 288 | assert(state->outputs != NULL); 289 | for (i = 0; i < state->maxoutputs; i++) 290 | state->outputs[i] = (1 * state->outputs[i]) % maxvalue; 291 | 292 | return (void *)state; 293 | } 294 | 295 | int zipfian_pregen_gen_rand(void *_state, uint64_t noutputs, __uint128_t *outputs) 296 | { 297 | zipfian_pregen_state *state = (zipfian_pregen_state *)_state; 298 | assert(state->nextoutput + noutputs <= state->maxoutputs); 299 | memcpy(outputs, state->outputs+state->nextoutput, noutputs * sizeof(*state->outputs)); 300 | state->nextoutput += noutputs; 301 | return noutputs; 302 | } 303 | 304 | void *zipfian_pregen_duplicate(void *state) 305 | { 306 | zipfian_pregen_state *newstate = (zipfian_pregen_state *)malloc(sizeof(*newstate)); 307 | assert(newstate); 308 | memcpy(newstate, state, sizeof(*newstate)); 309 | return newstate; 310 | } 311 | 312 | void *uniform_pregen_init(uint64_t maxoutputs, __uint128_t maxvalue, void *params) 313 | { 314 | uint32_t i; 315 | uniform_pregen_state *state = (uniform_pregen_state *)malloc(sizeof(uniform_pregen_state)); 316 | assert(state != NULL); 317 | 318 | state->nextoutput = 0; 319 | 320 | state->maxoutputs = maxoutputs; 321 | state->outputs = (__uint128_t *)malloc(state->maxoutputs * sizeof(state->outputs[0])); 322 | assert(state->outputs != NULL); 323 | RAND_bytes((unsigned char *)state->outputs, sizeof(*state->outputs) * state->maxoutputs); 324 | for (i = 0; i < state->maxoutputs; i++) 325 | state->outputs[i] = (1 * state->outputs[i]) % maxvalue; 326 | 327 | return (void *)state; 328 | } 329 | 330 | int uniform_pregen_gen_rand(void *_state, uint64_t noutputs, __uint128_t *outputs) 331 | { 332 | uniform_pregen_state *state = (uniform_pregen_state *)_state; 333 | assert(state->nextoutput + noutputs <= state->maxoutputs); 334 | memcpy(outputs, state->outputs+state->nextoutput, noutputs * sizeof(*state->outputs)); 335 | state->nextoutput += noutputs; 336 | return noutputs; 337 | } 338 | 339 | void *uniform_pregen_duplicate(void *state) 340 | { 341 | uniform_pregen_state *newstate = (uniform_pregen_state *)malloc(sizeof(*newstate)); 342 | assert(newstate); 343 | memcpy(newstate, state, sizeof(*newstate)); 344 | return newstate; 345 | } 346 | 347 | void *uniform_online_init(uint64_t maxoutputs, __uint128_t maxvalue, void *params) 348 | { 349 | uniform_online_state *state = (uniform_online_state *)malloc(sizeof(uniform_online_state)); 350 | assert(state != NULL); 351 | 352 | state->maxoutputs = maxoutputs; 353 | state->maxvalue = maxvalue; 354 | state->seed = time(NULL); 355 | state->STATELEN = 256; 356 | state->buf = (char *)calloc(256, sizeof(char)); 357 | state->rand_state = (struct random_data *)calloc(1, sizeof(struct random_data)); 358 | 359 | initstate_r(state->seed, state->buf, state->STATELEN, state->rand_state); 360 | return (void *)state; 361 | } 362 | 363 | int uniform_online_gen_rand(void *_state, uint64_t noutputs, __uint128_t *outputs) 364 | { 365 | uint32_t i, j; 366 | uniform_online_state *state = (uniform_online_state *)_state; 367 | assert(state->rand_state != NULL); 368 | memset(outputs, 0, noutputs*sizeof(__uint128_t)); 369 | for (i = 0; i < noutputs; i++) { 370 | int32_t result; 371 | for (j = 0; j < 4; j++) { 372 | random_r(state->rand_state, &result); 373 | outputs[i] = (outputs[i] * RAND_MAX) + result; 374 | } 375 | outputs[i] = (1 * outputs[i]) % state->maxvalue; 376 | } 377 | return noutputs; 378 | } 379 | 380 | void *uniform_online_duplicate(void *_state) 381 | { 382 | uniform_online_state *newstate = (uniform_online_state *)malloc(sizeof(uniform_online_state)); 383 | assert(newstate != NULL); 384 | uniform_online_state *oldstate = (uniform_online_state *)_state; 385 | 386 | newstate->maxvalue = oldstate->maxvalue; 387 | newstate->seed = oldstate->seed; 388 | newstate->STATELEN = oldstate->STATELEN; 389 | 390 | newstate->buf = (char *)calloc(256, sizeof(char)); 391 | memcpy(newstate->buf, oldstate->buf, newstate->STATELEN); 392 | newstate->rand_state = (struct random_data *)calloc(1, sizeof(struct random_data)); 393 | 394 | initstate_r(newstate->seed, newstate->buf, newstate->STATELEN, newstate->rand_state); 395 | return newstate; 396 | } 397 | 398 | rand_generator uniform_pregen = { 399 | uniform_pregen_init, 400 | uniform_pregen_gen_rand, 401 | uniform_pregen_duplicate 402 | }; 403 | 404 | rand_generator uniform_online = { 405 | uniform_online_init, 406 | uniform_online_gen_rand, 407 | uniform_online_duplicate 408 | }; 409 | 410 | rand_generator zipfian_pregen = { 411 | zipfian_pregen_init, 412 | zipfian_pregen_gen_rand, 413 | zipfian_pregen_duplicate 414 | }; 415 | 416 | rand_generator app_pregen = { 417 | app_pregen_init, 418 | app_pregen_gen_rand, 419 | app_pregen_duplicate 420 | }; 421 | 422 | filter gqf = { 423 | gqf_init, 424 | gqf_insert, 425 | gqf_lookup, 426 | gqf_range, 427 | gqf_destroy, 428 | gqf_iterator, 429 | gqf_get, 430 | gqf_next, 431 | gqf_end 432 | }; 433 | 434 | void filter_multi_merge(filter qf_arr[], int nqf, filter qfr) 435 | { 436 | int i; 437 | int flag = 0; 438 | int smallest_i = 0; 439 | uint64_t smallest_key = UINT64_MAX; 440 | for (i=0; iexp = s; 638 | ((zipf_params *)param)->universe = universe; 639 | ((zipf_params *)param)->sample = nvals; 640 | } else if (strcmp(randmode, "app_pregen") == 0) { 641 | vals_gen = &app_pregen; 642 | othervals_gen = &app_pregen; 643 | param = (app_params *)malloc(sizeof(app_params)); 644 | ((app_params *)param)->ip_file = inputfile; 645 | ((app_params *)param)->num = numvals; 646 | nvals = numvals; 647 | } else { 648 | fprintf(stderr, "Unknown randmode.\n"); 649 | usage(argv[0]); 650 | exit(1); 651 | } 652 | 653 | if (strcmp(datastruct, "gqf") == 0) { 654 | filter_ds = gqf; 655 | // } else if (strcmp(datastruct, "qf") == 0) { 656 | // filter_ds = qf; 657 | // } else if (strcmp(datastruct, "cf") == 0) { 658 | // filter_ds = cf; 659 | // } else if (strcmp(datastruct, "bf") == 0) { 660 | // filter_ds = bf; 661 | } else { 662 | fprintf(stderr, "Unknown randmode.\n"); 663 | usage(argv[0]); 664 | exit(1); 665 | } 666 | 667 | snprintf(filename_insert, strlen(dir) + strlen(outputfile) + strlen(insert_op) + 1, "%s%s%s", dir, outputfile, insert_op); 668 | snprintf(filename_exit_lookup, strlen(dir) + strlen(outputfile) + strlen(exit_lookup_op) + 1, "%s%s%s", dir, outputfile, exit_lookup_op); 669 | 670 | snprintf(filename_false_lookup, strlen(dir) + strlen(outputfile) + strlen(false_lookup_op) + 1, "%s%s%s", dir, outputfile, false_lookup_op); 671 | 672 | FILE *fp_insert = fopen(filename_insert, "w"); 673 | FILE *fp_exit_lookup = fopen(filename_exit_lookup, "w"); 674 | FILE *fp_false_lookup = fopen(filename_false_lookup, "w"); 675 | 676 | if (fp_insert == NULL || fp_exit_lookup == NULL || fp_false_lookup == NULL) { 677 | printf("Can't open the data file"); 678 | exit(1); 679 | } 680 | 681 | if (numfilters > 0) { 682 | uint64_t num_hash_bits = nbits+ ceil(numfilters/2) + 8; 683 | filter filters[numfilters]; 684 | filter final_filter; 685 | rand_generator *generator[numfilters]; 686 | void *generator_state[numfilters]; 687 | 688 | // initialize all the filters and generators 689 | for (int i = 0; i < numfilters; i++) { 690 | filters[i] = gqf; 691 | filters[i].init(nbits, num_hash_bits); 692 | generator[i] = &uniform_online; 693 | generator_state[i] = generator[i]->init(nvals, filters[i].range(), param); 694 | } 695 | final_filter = gqf; 696 | final_filter.init(nbits+ceil(numfilters/2), num_hash_bits); 697 | 698 | // insert items in the filters 699 | __uint128_t *vals = (__uint128_t *)malloc((nvals/32)*sizeof(__uint128_t)); 700 | for (int i = 0; i < numfilters; i++) { 701 | for (int k = 0; k < 32; k++) { 702 | memset(vals, 0, (nvals/32)*sizeof(__uint128_t)); 703 | assert(generator[i]->gen(generator_state[i], nvals/32, vals) == nvals/32); 704 | for (uint32_t j = 0; j < nvals/32; j++) { 705 | filters[i].insert(vals[j], 1); 706 | } 707 | } 708 | } 709 | free(vals); 710 | 711 | gettimeofday(&tv_insert[0][0], NULL); 712 | filter_multi_merge(filters, numfilters, final_filter); 713 | gettimeofday(&tv_insert[1][0], NULL); 714 | 715 | printf("Insert Performance:\n"); 716 | printf(" %f", 717 | 0.001 * (nvals*numfilters)/(tv2msec(tv_insert[1][0]) - tv2msec(tv_insert[0][0]))); 718 | printf(" Million inserts per second\n"); 719 | } else { 720 | 721 | for (run = 0; run < nruns; run++) { 722 | fps = 0; 723 | filter_ds.init(nbits, nbits+8); 724 | 725 | vals_gen_state = vals_gen->init(nvals, filter_ds.range(), param); 726 | old_vals_gen_state = vals_gen->dup(vals_gen_state); 727 | sleep(5); 728 | othervals_gen_state = othervals_gen->init(nvals, filter_ds.range(), param); 729 | 730 | for (exp = 0; exp < 2*npoints; exp += 2) { 731 | i = (exp/2)*(nvals/npoints); 732 | j = ((exp/2) + 1)*(nvals/npoints); 733 | printf("Round: %d\n", exp/2); 734 | 735 | gettimeofday(&tv_insert[exp][run], NULL); 736 | for (;i < j; i += 1<<16) { 737 | int nitems = j - i < 1<<16 ? j - i : 1<<16; 738 | __uint128_t vals[1<<16]; 739 | int m; 740 | assert(vals_gen->gen(vals_gen_state, nitems, vals) == nitems); 741 | 742 | for (m = 0; m < nitems; m++) { 743 | filter_ds.insert(vals[m], 1); 744 | } 745 | } 746 | gettimeofday(&tv_insert[exp+1][run], NULL); 747 | 748 | i = (exp/2)*(nvals/npoints); 749 | gettimeofday(&tv_exit_lookup[exp][run], NULL); 750 | for (;i < j; i += 1<<16) { 751 | int nitems = j - i < 1<<16 ? j - i : 1<<16; 752 | __uint128_t vals[1<<16]; 753 | int m; 754 | assert(vals_gen->gen(old_vals_gen_state, nitems, vals) == nitems); 755 | for (m = 0; m < nitems; m++) { 756 | if (!filter_ds.lookup(vals[m])) { 757 | fprintf(stderr, 758 | "Failed lookup for 0x%lx%016lx\n", 759 | (uint64_t)(vals[m]>>64), 760 | (uint64_t)(vals[m] & 0xffffffffffffffff)); 761 | abort(); 762 | } 763 | } 764 | } 765 | gettimeofday(&tv_exit_lookup[exp+1][run], NULL); 766 | 767 | i = (exp/2)*(nvals/npoints); 768 | gettimeofday(&tv_false_lookup[exp][run], NULL); 769 | for (;i < j; i += 1<<16) { 770 | int nitems = j - i < 1<<16 ? j - i : 1<<16; 771 | __uint128_t othervals[1<<16]; 772 | int m; 773 | assert(othervals_gen->gen(othervals_gen_state, nitems, othervals) == nitems); 774 | for (m = 0; m < nitems; m++) { 775 | fps += filter_ds.lookup(othervals[m]); 776 | } 777 | } 778 | gettimeofday(&tv_false_lookup[exp+1][run], NULL); 779 | } 780 | filter_ds.destroy(); 781 | } 782 | 783 | printf("Wiring results to file: %s\n", filename_insert); 784 | fprintf(fp_insert, "x_0"); 785 | for (run = 0; run < nruns; run++) { 786 | fprintf(fp_insert, " y_%d", run); 787 | } 788 | fprintf(fp_insert, "\n"); 789 | for (exp = 0; exp < 2*npoints; exp += 2) { 790 | fprintf(fp_insert, "%d", ((exp/2)*(100/npoints))); 791 | for (run = 0; run < nruns; run++) { 792 | fprintf(fp_insert, " %f", 793 | 0.001 * (nvals/npoints)/(tv2msec(tv_insert[exp+1][run]) - tv2msec(tv_insert[exp][run]))); 794 | } 795 | fprintf(fp_insert, "\n"); 796 | } 797 | printf("Insert Performance written\n"); 798 | 799 | printf("Wiring results to file: %s\n", filename_exit_lookup); 800 | fprintf(fp_exit_lookup, "x_0"); 801 | for (run = 0; run < nruns; run++) { 802 | fprintf(fp_exit_lookup, " y_%d", run); 803 | } 804 | fprintf(fp_exit_lookup, "\n"); 805 | for (exp = 0; exp < 2*npoints; exp += 2) { 806 | fprintf(fp_exit_lookup, "%d", ((exp/2)*(100/npoints))); 807 | for (run = 0; run < nruns; run++) { 808 | fprintf(fp_exit_lookup, " %f", 809 | 0.001 * (nvals/npoints)/(tv2msec(tv_exit_lookup[exp+1][run]) - tv2msec(tv_exit_lookup[exp][run]))); 810 | } 811 | fprintf(fp_exit_lookup, "\n"); 812 | } 813 | printf("Existing Lookup Performance written\n"); 814 | 815 | printf("Wiring results to file: %s\n", filename_false_lookup); 816 | fprintf(fp_false_lookup, "x_0"); 817 | for (run = 0; run < nruns; run++) { 818 | fprintf(fp_false_lookup, " y_%d", run); 819 | } 820 | fprintf(fp_false_lookup, "\n"); 821 | for (exp = 0; exp < 2*npoints; exp += 2) { 822 | fprintf(fp_false_lookup, "%d", ((exp/2)*(100/npoints))); 823 | for (run = 0; run < nruns; run++) { 824 | fprintf(fp_false_lookup, " %f", 825 | 0.001 * (nvals/npoints)/(tv2msec(tv_false_lookup[exp+1][run]) - tv2msec(tv_false_lookup[exp][run]))); 826 | } 827 | fprintf(fp_false_lookup, "\n"); 828 | } 829 | printf("False Lookup Performance written\n"); 830 | 831 | printf("FP rate: %f (%lu/%lu)\n", 1.0 * fps / nvals, fps, nvals); 832 | } 833 | fclose(fp_insert); 834 | fclose(fp_exit_lookup); 835 | fclose(fp_false_lookup); 836 | 837 | return 0; 838 | } 839 | -------------------------------------------------------------------------------- /src/gqf.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ============================================================================ 3 | * 4 | * Authors: Prashant Pandey 5 | * Rob Johnson 6 | * 7 | * ============================================================================ 8 | */ 9 | 10 | #include 11 | # include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | 22 | #include "hashutil.h" 23 | #include "gqf.h" 24 | #include "gqf_int.h" 25 | 26 | /****************************************************************** 27 | * Code for managing the metadata bits and slots w/o interpreting * 28 | * the content of the slots. 29 | ******************************************************************/ 30 | 31 | #define MAX_VALUE(nbits) ((1ULL << (nbits)) - 1) 32 | #define BITMASK(nbits) \ 33 | ((nbits) == 64 ? 0xffffffffffffffff : MAX_VALUE(nbits)) 34 | #define NUM_SLOTS_TO_LOCK (1ULL<<16) 35 | #define CLUSTER_SIZE (1ULL<<14) 36 | #define METADATA_WORD(qf,field,slot_index) \ 37 | (get_block((qf), (slot_index) / \ 38 | QF_SLOTS_PER_BLOCK)->field[((slot_index) % QF_SLOTS_PER_BLOCK) / 64]) 39 | 40 | #define GET_NO_LOCK(flag) (flag & QF_NO_LOCK) 41 | #define GET_TRY_ONCE_LOCK(flag) (flag & QF_TRY_ONCE_LOCK) 42 | #define GET_WAIT_FOR_LOCK(flag) (flag & QF_WAIT_FOR_LOCK) 43 | #define GET_KEY_HASH(flag) (flag & QF_KEY_IS_HASH) 44 | 45 | #define DISTANCE_FROM_HOME_SLOT_CUTOFF 1000 46 | #define BILLION 1000000000L 47 | 48 | #ifdef DEBUG 49 | #define PRINT_DEBUG 1 50 | #else 51 | #define PRINT_DEBUG 0 52 | #endif 53 | 54 | #define DEBUG_CQF(fmt, ...) \ 55 | do { if (PRINT_DEBUG) fprintf(stderr, fmt, __VA_ARGS__); } while (0) 56 | 57 | #define DEBUG_DUMP(qf) \ 58 | do { if (PRINT_DEBUG) qf_dump_metadata(qf); } while (0) 59 | 60 | static __inline__ unsigned long long rdtsc(void) 61 | { 62 | unsigned hi, lo; 63 | __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi)); 64 | return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 ); 65 | } 66 | 67 | #ifdef LOG_WAIT_TIME 68 | static inline bool qf_spin_lock(QF *qf, volatile int *lock, uint64_t idx, 69 | uint8_t flag) 70 | { 71 | struct timespec start, end; 72 | bool ret; 73 | 74 | clock_gettime(CLOCK_THREAD_CPUTIME_ID, &start); 75 | if (GET_WAIT_FOR_LOCK(flag) != QF_WAIT_FOR_LOCK) { 76 | ret = !__sync_lock_test_and_set(lock, 1); 77 | clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); 78 | qf->runtimedata->wait_times[idx].locks_acquired_single_attempt++; 79 | qf->runtimedata->wait_times[idx].total_time_single += BILLION * (end.tv_sec - 80 | start.tv_sec) + 81 | end.tv_nsec - start.tv_nsec; 82 | } else { 83 | if (!__sync_lock_test_and_set(lock, 1)) { 84 | clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); 85 | qf->runtimedata->wait_times[idx].locks_acquired_single_attempt++; 86 | qf->runtimedata->wait_times[idx].total_time_single += BILLION * (end.tv_sec - 87 | start.tv_sec) + 88 | end.tv_nsec - start.tv_nsec; 89 | } else { 90 | while (__sync_lock_test_and_set(lock, 1)) 91 | while (*lock); 92 | clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end); 93 | qf->runtimedata->wait_times[idx].total_time_spinning += BILLION * (end.tv_sec - 94 | start.tv_sec) + 95 | end.tv_nsec - start.tv_nsec; 96 | } 97 | ret = true; 98 | } 99 | qf->runtimedata->wait_times[idx].locks_taken++; 100 | 101 | return ret; 102 | 103 | /*start = rdtsc();*/ 104 | /*if (!__sync_lock_test_and_set(lock, 1)) {*/ 105 | /*clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);*/ 106 | /*qf->runtimedata->wait_times[idx].locks_acquired_single_attempt++;*/ 107 | /*qf->runtimedata->wait_times[idx].total_time_single += BILLION * (end.tv_sec - 108 | * start.tv_sec) + end.tv_nsec - start.tv_nsec;*/ 109 | /*} else {*/ 110 | /*while (__sync_lock_test_and_set(lock, 1))*/ 111 | /*while (*lock);*/ 112 | /*clock_gettime(CLOCK_THREAD_CPUTIME_ID, &end);*/ 113 | /*qf->runtimedata->wait_times[idx].total_time_spinning += BILLION * (end.tv_sec - 114 | * start.tv_sec) + end.tv_nsec - start.tv_nsec;*/ 115 | /*}*/ 116 | 117 | /*end = rdtsc();*/ 118 | /*qf->runtimedata->wait_times[idx].locks_taken++;*/ 119 | /*return;*/ 120 | } 121 | #else 122 | /** 123 | * Try to acquire a lock once and return even if the lock is busy. 124 | * If spin flag is set, then spin until the lock is available. 125 | */ 126 | static inline bool qf_spin_lock(volatile int *lock, uint8_t flag) 127 | { 128 | if (GET_WAIT_FOR_LOCK(flag) != QF_WAIT_FOR_LOCK) { 129 | return !__sync_lock_test_and_set(lock, 1); 130 | } else { 131 | while (__sync_lock_test_and_set(lock, 1)) 132 | while (*lock); 133 | return true; 134 | } 135 | 136 | return false; 137 | } 138 | #endif 139 | 140 | static inline void qf_spin_unlock(volatile int *lock) 141 | { 142 | __sync_lock_release(lock); 143 | return; 144 | } 145 | 146 | static bool qf_lock(QF *qf, uint64_t hash_bucket_index, bool small, uint8_t 147 | runtime_lock) 148 | { 149 | uint64_t hash_bucket_lock_offset = hash_bucket_index % NUM_SLOTS_TO_LOCK; 150 | if (small) { 151 | #ifdef LOG_WAIT_TIME 152 | if (!qf_spin_lock(qf, &qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK], 153 | hash_bucket_index/NUM_SLOTS_TO_LOCK, 154 | runtime_lock)) 155 | return false; 156 | if (NUM_SLOTS_TO_LOCK - hash_bucket_lock_offset <= CLUSTER_SIZE) { 157 | if (!qf_spin_lock(qf, &qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK+1], 158 | hash_bucket_index/NUM_SLOTS_TO_LOCK+1, 159 | runtime_lock)) { 160 | qf_spin_unlock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK]); 161 | return false; 162 | } 163 | } 164 | #else 165 | if (!qf_spin_lock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK], 166 | runtime_lock)) 167 | return false; 168 | if (NUM_SLOTS_TO_LOCK - hash_bucket_lock_offset <= CLUSTER_SIZE) { 169 | if (!qf_spin_lock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK+1], 170 | runtime_lock)) { 171 | qf_spin_unlock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK]); 172 | return false; 173 | } 174 | } 175 | #endif 176 | } else { 177 | #ifdef LOG_WAIT_TIME 178 | if (hash_bucket_index >= NUM_SLOTS_TO_LOCK && hash_bucket_lock_offset <= 179 | CLUSTER_SIZE) { 180 | if (!qf_spin_lock(qf, 181 | &qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK-1], 182 | runtime_lock)) 183 | return false; 184 | } 185 | if (!qf_spin_lock(qf, 186 | &qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK], 187 | runtime_lock)) { 188 | if (hash_bucket_index >= NUM_SLOTS_TO_LOCK && hash_bucket_lock_offset <= 189 | CLUSTER_SIZE) 190 | qf_spin_unlock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK-1]); 191 | return false; 192 | } 193 | if (!qf_spin_lock(qf, &qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK+1], 194 | runtime_lock)) { 195 | qf_spin_unlock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK]); 196 | if (hash_bucket_index >= NUM_SLOTS_TO_LOCK && hash_bucket_lock_offset <= 197 | CLUSTER_SIZE) 198 | qf_spin_unlock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK-1]); 199 | return false; 200 | } 201 | #else 202 | if (hash_bucket_index >= NUM_SLOTS_TO_LOCK && hash_bucket_lock_offset <= 203 | CLUSTER_SIZE) { 204 | if 205 | (!qf_spin_lock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK-1], 206 | runtime_lock)) 207 | return false; 208 | } 209 | if (!qf_spin_lock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK], 210 | runtime_lock)) { 211 | if (hash_bucket_index >= NUM_SLOTS_TO_LOCK && hash_bucket_lock_offset <= 212 | CLUSTER_SIZE) 213 | qf_spin_unlock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK-1]); 214 | return false; 215 | } 216 | if (!qf_spin_lock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK+1], 217 | runtime_lock)) { 218 | qf_spin_unlock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK]); 219 | if (hash_bucket_index >= NUM_SLOTS_TO_LOCK && hash_bucket_lock_offset <= 220 | CLUSTER_SIZE) 221 | qf_spin_unlock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK-1]); 222 | return false; 223 | } 224 | #endif 225 | } 226 | return true; 227 | } 228 | 229 | static void qf_unlock(QF *qf, uint64_t hash_bucket_index, bool small) 230 | { 231 | uint64_t hash_bucket_lock_offset = hash_bucket_index % NUM_SLOTS_TO_LOCK; 232 | if (small) { 233 | if (NUM_SLOTS_TO_LOCK - hash_bucket_lock_offset <= CLUSTER_SIZE) { 234 | qf_spin_unlock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK+1]); 235 | } 236 | qf_spin_unlock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK]); 237 | } else { 238 | qf_spin_unlock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK+1]); 239 | qf_spin_unlock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK]); 240 | if (hash_bucket_index >= NUM_SLOTS_TO_LOCK && hash_bucket_lock_offset <= 241 | CLUSTER_SIZE) 242 | qf_spin_unlock(&qf->runtimedata->locks[hash_bucket_index/NUM_SLOTS_TO_LOCK-1]); 243 | } 244 | } 245 | 246 | /*static void modify_metadata(QF *qf, uint64_t *metadata, int cnt)*/ 247 | /*{*/ 248 | /*#ifdef LOG_WAIT_TIME*/ 249 | /*qf_spin_lock(qf, &qf->runtimedata->metadata_lock,*/ 250 | /*qf->runtimedata->num_locks, QF_WAIT_FOR_LOCK);*/ 251 | /*#else*/ 252 | /*qf_spin_lock(&qf->runtimedata->metadata_lock, QF_WAIT_FOR_LOCK);*/ 253 | /*#endif*/ 254 | /**metadata = *metadata + cnt;*/ 255 | /*qf_spin_unlock(&qf->runtimedata->metadata_lock);*/ 256 | /*return;*/ 257 | /*}*/ 258 | 259 | static void modify_metadata(pc_t *metadata, int cnt) 260 | { 261 | pc_add(metadata, cnt); 262 | return; 263 | } 264 | 265 | static inline int popcnt(uint64_t val) 266 | { 267 | asm("popcnt %[val], %[val]" 268 | : [val] "+r" (val) 269 | : 270 | : "cc"); 271 | return val; 272 | } 273 | 274 | static inline int64_t bitscanreverse(uint64_t val) 275 | { 276 | if (val == 0) { 277 | return -1; 278 | } else { 279 | asm("bsr %[val], %[val]" 280 | : [val] "+r" (val) 281 | : 282 | : "cc"); 283 | return val; 284 | } 285 | } 286 | 287 | static inline int popcntv(const uint64_t val, int ignore) 288 | { 289 | if (ignore % 64) 290 | return popcnt (val & ~BITMASK(ignore % 64)); 291 | else 292 | return popcnt(val); 293 | } 294 | 295 | // Returns the number of 1s up to (and including) the pos'th bit 296 | // Bits are numbered from 0 297 | static inline int bitrank(uint64_t val, int pos) { 298 | val = val & ((2ULL << pos) - 1); 299 | asm("popcnt %[val], %[val]" 300 | : [val] "+r" (val) 301 | : 302 | : "cc"); 303 | return val; 304 | } 305 | 306 | /** 307 | * Returns the position of the k-th 1 in the 64-bit word x. 308 | * k is 0-based, so k=0 returns the position of the first 1. 309 | * 310 | * Uses the broadword selection algorithm by Vigna [1], improved by Gog 311 | * and Petri [2] and Vigna [3]. 312 | * 313 | * [1] Sebastiano Vigna. Broadword Implementation of Rank/Select 314 | * Queries. WEA, 2008 315 | * 316 | * [2] Simon Gog, Matthias Petri. Optimized succinct data 317 | * structures for massive data. Softw. Pract. Exper., 2014 318 | * 319 | * [3] Sebastiano Vigna. MG4J 5.2.1. http://mg4j.di.unimi.it/ 320 | * The following code is taken from 321 | * https://github.com/facebook/folly/blob/b28186247104f8b90cfbe094d289c91f9e413317/folly/experimental/Select64.h 322 | */ 323 | const uint8_t kSelectInByte[2048] = { 324 | 8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 325 | 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 326 | 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 327 | 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 328 | 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 7, 0, 329 | 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 330 | 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 331 | 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 332 | 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 333 | 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 8, 8, 8, 1, 334 | 8, 2, 2, 1, 8, 3, 3, 1, 3, 2, 2, 1, 8, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 335 | 2, 1, 8, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1, 5, 4, 4, 1, 4, 2, 2, 1, 336 | 4, 3, 3, 1, 3, 2, 2, 1, 8, 6, 6, 1, 6, 2, 2, 1, 6, 3, 3, 1, 3, 2, 2, 1, 6, 4, 337 | 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 6, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 338 | 3, 2, 2, 1, 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 8, 7, 7, 1, 7, 2, 339 | 2, 1, 7, 3, 3, 1, 3, 2, 2, 1, 7, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 340 | 7, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1, 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 341 | 3, 1, 3, 2, 2, 1, 7, 6, 6, 1, 6, 2, 2, 1, 6, 3, 3, 1, 3, 2, 2, 1, 6, 4, 4, 1, 342 | 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 6, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 343 | 2, 1, 5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1, 8, 8, 8, 8, 8, 8, 8, 2, 344 | 8, 8, 8, 3, 8, 3, 3, 2, 8, 8, 8, 4, 8, 4, 4, 2, 8, 4, 4, 3, 4, 3, 3, 2, 8, 8, 345 | 8, 5, 8, 5, 5, 2, 8, 5, 5, 3, 5, 3, 3, 2, 8, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 346 | 4, 3, 3, 2, 8, 8, 8, 6, 8, 6, 6, 2, 8, 6, 6, 3, 6, 3, 3, 2, 8, 6, 6, 4, 6, 4, 347 | 4, 2, 6, 4, 4, 3, 4, 3, 3, 2, 8, 6, 6, 5, 6, 5, 5, 2, 6, 5, 5, 3, 5, 3, 3, 2, 348 | 6, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2, 8, 8, 8, 7, 8, 7, 7, 2, 8, 7, 349 | 7, 3, 7, 3, 3, 2, 8, 7, 7, 4, 7, 4, 4, 2, 7, 4, 4, 3, 4, 3, 3, 2, 8, 7, 7, 5, 350 | 7, 5, 5, 2, 7, 5, 5, 3, 5, 3, 3, 2, 7, 5, 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 351 | 3, 2, 8, 7, 7, 6, 7, 6, 6, 2, 7, 6, 6, 3, 6, 3, 3, 2, 7, 6, 6, 4, 6, 4, 4, 2, 352 | 6, 4, 4, 3, 4, 3, 3, 2, 7, 6, 6, 5, 6, 5, 5, 2, 6, 5, 5, 3, 5, 3, 3, 2, 6, 5, 353 | 5, 4, 5, 4, 4, 2, 5, 4, 4, 3, 4, 3, 3, 2, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 354 | 8, 8, 8, 3, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 4, 8, 4, 4, 3, 8, 8, 8, 8, 8, 8, 355 | 8, 5, 8, 8, 8, 5, 8, 5, 5, 3, 8, 8, 8, 5, 8, 5, 5, 4, 8, 5, 5, 4, 5, 4, 4, 3, 356 | 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 6, 8, 6, 6, 3, 8, 8, 8, 6, 8, 6, 6, 4, 8, 6, 357 | 6, 4, 6, 4, 4, 3, 8, 8, 8, 6, 8, 6, 6, 5, 8, 6, 6, 5, 6, 5, 5, 3, 8, 6, 6, 5, 358 | 6, 5, 5, 4, 6, 5, 5, 4, 5, 4, 4, 3, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 359 | 7, 3, 8, 8, 8, 7, 8, 7, 7, 4, 8, 7, 7, 4, 7, 4, 4, 3, 8, 8, 8, 7, 8, 7, 7, 5, 360 | 8, 7, 7, 5, 7, 5, 5, 3, 8, 7, 7, 5, 7, 5, 5, 4, 7, 5, 5, 4, 5, 4, 4, 3, 8, 8, 361 | 8, 7, 8, 7, 7, 6, 8, 7, 7, 6, 7, 6, 6, 3, 8, 7, 7, 6, 7, 6, 6, 4, 7, 6, 6, 4, 362 | 6, 4, 4, 3, 8, 7, 7, 6, 7, 6, 6, 5, 7, 6, 6, 5, 6, 5, 5, 3, 7, 6, 6, 5, 6, 5, 363 | 5, 4, 6, 5, 5, 4, 5, 4, 4, 3, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 364 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 365 | 8, 8, 8, 8, 8, 5, 8, 8, 8, 8, 8, 8, 8, 5, 8, 8, 8, 5, 8, 5, 5, 4, 8, 8, 8, 8, 366 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 6, 8, 6, 367 | 6, 4, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 6, 8, 6, 6, 5, 8, 8, 8, 6, 8, 6, 6, 5, 368 | 8, 6, 6, 5, 6, 5, 5, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 369 | 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 4, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 370 | 8, 7, 7, 5, 8, 8, 8, 7, 8, 7, 7, 5, 8, 7, 7, 5, 7, 5, 5, 4, 8, 8, 8, 8, 8, 8, 371 | 8, 7, 8, 8, 8, 7, 8, 7, 7, 6, 8, 8, 8, 7, 8, 7, 7, 6, 8, 7, 7, 6, 7, 6, 6, 4, 372 | 8, 8, 8, 7, 8, 7, 7, 6, 8, 7, 7, 6, 7, 6, 6, 5, 8, 7, 7, 6, 7, 6, 6, 5, 7, 6, 373 | 6, 5, 6, 5, 5, 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 374 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 375 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 5, 8, 8, 8, 8, 8, 8, 8, 8, 376 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 377 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 8, 8, 8, 8, 6, 8, 8, 8, 6, 378 | 8, 6, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 379 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 380 | 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 381 | 8, 8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 6, 8, 8, 8, 8, 382 | 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 6, 8, 8, 8, 7, 8, 7, 7, 6, 8, 7, 7, 6, 7, 6, 383 | 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 384 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 385 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 386 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 387 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 6, 388 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 389 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 390 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 391 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 8, 392 | 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 8, 8, 8, 8, 7, 8, 8, 8, 7, 8, 7, 7, 6, 8, 8, 393 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 394 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 395 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 396 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 397 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 398 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 399 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 400 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 401 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 402 | 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 7 403 | }; 404 | 405 | static inline uint64_t _select64(uint64_t x, int k) 406 | { 407 | if (k >= popcnt(x)) { return 64; } 408 | 409 | const uint64_t kOnesStep4 = 0x1111111111111111ULL; 410 | const uint64_t kOnesStep8 = 0x0101010101010101ULL; 411 | const uint64_t kMSBsStep8 = 0x80ULL * kOnesStep8; 412 | 413 | uint64_t s = x; 414 | s = s - ((s & 0xA * kOnesStep4) >> 1); 415 | s = (s & 0x3 * kOnesStep4) + ((s >> 2) & 0x3 * kOnesStep4); 416 | s = (s + (s >> 4)) & 0xF * kOnesStep8; 417 | uint64_t byteSums = s * kOnesStep8; 418 | 419 | uint64_t kStep8 = k * kOnesStep8; 420 | uint64_t geqKStep8 = (((kStep8 | kMSBsStep8) - byteSums) & kMSBsStep8); 421 | uint64_t place = popcnt(geqKStep8) * 8; 422 | uint64_t byteRank = k - (((byteSums << 8) >> place) & (uint64_t)(0xFF)); 423 | return place + kSelectInByte[((x >> place) & 0xFF) | (byteRank << 8)]; 424 | } 425 | 426 | // Returns the position of the rank'th 1. (rank = 0 returns the 1st 1) 427 | // Returns 64 if there are fewer than rank+1 1s. 428 | static inline uint64_t bitselect(uint64_t val, int rank) { 429 | #ifdef __SSE4_2_ 430 | uint64_t i = 1ULL << rank; 431 | asm("pdep %[val], %[mask], %[val]" 432 | : [val] "+r" (val) 433 | : [mask] "r" (i)); 434 | asm("tzcnt %[bit], %[index]" 435 | : [index] "=r" (i) 436 | : [bit] "g" (val) 437 | : "cc"); 438 | return i; 439 | #endif 440 | return _select64(val, rank); 441 | } 442 | 443 | static inline uint64_t bitselectv(const uint64_t val, int ignore, int rank) 444 | { 445 | return bitselect(val & ~BITMASK(ignore % 64), rank); 446 | } 447 | 448 | static inline int is_runend(const QF *qf, uint64_t index) 449 | { 450 | return (METADATA_WORD(qf, runends, index) >> ((index % QF_SLOTS_PER_BLOCK) % 451 | 64)) & 1ULL; 452 | } 453 | 454 | static inline int is_occupied(const QF *qf, uint64_t index) 455 | { 456 | return (METADATA_WORD(qf, occupieds, index) >> ((index % QF_SLOTS_PER_BLOCK) % 457 | 64)) & 1ULL; 458 | } 459 | 460 | #if QF_BITS_PER_SLOT == 8 || QF_BITS_PER_SLOT == 16 || QF_BITS_PER_SLOT == 32 || QF_BITS_PER_SLOT == 64 461 | 462 | static inline uint64_t get_slot(const QF *qf, uint64_t index) 463 | { 464 | assert(index < qf->metadata->xnslots); 465 | return get_block(qf, index / QF_SLOTS_PER_BLOCK)->slots[index % QF_SLOTS_PER_BLOCK]; 466 | } 467 | 468 | static inline void set_slot(const QF *qf, uint64_t index, uint64_t value) 469 | { 470 | assert(index < qf->metadata->xnslots); 471 | get_block(qf, index / QF_SLOTS_PER_BLOCK)->slots[index % QF_SLOTS_PER_BLOCK] = 472 | value & BITMASK(qf->metadata->bits_per_slot); 473 | } 474 | 475 | #elif QF_BITS_PER_SLOT > 0 476 | 477 | /* Little-endian code .... Big-endian is TODO */ 478 | 479 | static inline uint64_t get_slot(const QF *qf, uint64_t index) 480 | { 481 | /* Should use __uint128_t to support up to 64-bit remainders, but gcc seems 482 | * to generate buggy code. :/ */ 483 | assert(index < qf->metadata->xnslots); 484 | uint64_t *p = (uint64_t *)&get_block(qf, index / 485 | QF_SLOTS_PER_BLOCK)->slots[(index % 486 | QF_SLOTS_PER_BLOCK) 487 | * QF_BITS_PER_SLOT / 8]; 488 | return (uint64_t)(((*p) >> (((index % QF_SLOTS_PER_BLOCK) * QF_BITS_PER_SLOT) % 489 | 8)) & BITMASK(QF_BITS_PER_SLOT)); 490 | } 491 | 492 | static inline void set_slot(const QF *qf, uint64_t index, uint64_t value) 493 | { 494 | /* Should use __uint128_t to support up to 64-bit remainders, but gcc seems 495 | * to generate buggy code. :/ */ 496 | assert(index < qf->metadata->xnslots); 497 | uint64_t *p = (uint64_t *)&get_block(qf, index / 498 | QF_SLOTS_PER_BLOCK)->slots[(index % 499 | QF_SLOTS_PER_BLOCK) 500 | * QF_BITS_PER_SLOT / 8]; 501 | uint64_t t = *p; 502 | uint64_t mask = BITMASK(QF_BITS_PER_SLOT); 503 | uint64_t v = value; 504 | int shift = ((index % QF_SLOTS_PER_BLOCK) * QF_BITS_PER_SLOT) % 8; 505 | mask <<= shift; 506 | v <<= shift; 507 | t &= ~mask; 508 | t |= v; 509 | *p = t; 510 | } 511 | 512 | #else 513 | 514 | /* Little-endian code .... Big-endian is TODO */ 515 | 516 | static inline uint64_t get_slot(const QF *qf, uint64_t index) 517 | { 518 | assert(index < qf->metadata->xnslots); 519 | /* Should use __uint128_t to support up to 64-bit remainders, but gcc seems 520 | * to generate buggy code. :/ */ 521 | uint64_t *p = (uint64_t *)&get_block(qf, index / 522 | QF_SLOTS_PER_BLOCK)->slots[(index % 523 | QF_SLOTS_PER_BLOCK) 524 | * qf->metadata->bits_per_slot / 8]; 525 | return (uint64_t)(((*p) >> (((index % QF_SLOTS_PER_BLOCK) * 526 | qf->metadata->bits_per_slot) % 8)) & 527 | BITMASK(qf->metadata->bits_per_slot)); 528 | } 529 | 530 | static inline void set_slot(const QF *qf, uint64_t index, uint64_t value) 531 | { 532 | assert(index < qf->metadata->xnslots); 533 | /* Should use __uint128_t to support up to 64-bit remainders, but gcc seems 534 | * to generate buggy code. :/ */ 535 | uint64_t *p = (uint64_t *)&get_block(qf, index / 536 | QF_SLOTS_PER_BLOCK)->slots[(index % 537 | QF_SLOTS_PER_BLOCK) 538 | * qf->metadata->bits_per_slot / 8]; 539 | uint64_t t = *p; 540 | uint64_t mask = BITMASK(qf->metadata->bits_per_slot); 541 | uint64_t v = value; 542 | int shift = ((index % QF_SLOTS_PER_BLOCK) * qf->metadata->bits_per_slot) % 8; 543 | mask <<= shift; 544 | v <<= shift; 545 | t &= ~mask; 546 | t |= v; 547 | *p = t; 548 | } 549 | 550 | #endif 551 | 552 | static inline uint64_t run_end(const QF *qf, uint64_t hash_bucket_index); 553 | 554 | static inline uint64_t block_offset(const QF *qf, uint64_t blockidx) 555 | { 556 | /* If we have extended counters and a 16-bit (or larger) offset 557 | field, then we can safely ignore the possibility of overflowing 558 | that field. */ 559 | if (sizeof(qf->blocks[0].offset) > 1 || 560 | get_block(qf, blockidx)->offset < BITMASK(8*sizeof(qf->blocks[0].offset))) 561 | return get_block(qf, blockidx)->offset; 562 | 563 | return run_end(qf, QF_SLOTS_PER_BLOCK * blockidx - 1) - QF_SLOTS_PER_BLOCK * 564 | blockidx + 1; 565 | } 566 | 567 | static inline uint64_t run_end(const QF *qf, uint64_t hash_bucket_index) 568 | { 569 | uint64_t bucket_block_index = hash_bucket_index / QF_SLOTS_PER_BLOCK; 570 | uint64_t bucket_intrablock_offset = hash_bucket_index % QF_SLOTS_PER_BLOCK; 571 | uint64_t bucket_blocks_offset = block_offset(qf, bucket_block_index); 572 | 573 | uint64_t bucket_intrablock_rank = bitrank(get_block(qf, 574 | bucket_block_index)->occupieds[0], 575 | bucket_intrablock_offset); 576 | 577 | if (bucket_intrablock_rank == 0) { 578 | if (bucket_blocks_offset <= bucket_intrablock_offset) 579 | return hash_bucket_index; 580 | else 581 | return QF_SLOTS_PER_BLOCK * bucket_block_index + bucket_blocks_offset - 1; 582 | } 583 | 584 | uint64_t runend_block_index = bucket_block_index + bucket_blocks_offset / 585 | QF_SLOTS_PER_BLOCK; 586 | uint64_t runend_ignore_bits = bucket_blocks_offset % QF_SLOTS_PER_BLOCK; 587 | uint64_t runend_rank = bucket_intrablock_rank - 1; 588 | uint64_t runend_block_offset = bitselectv(get_block(qf, 589 | runend_block_index)->runends[0], 590 | runend_ignore_bits, runend_rank); 591 | if (runend_block_offset == QF_SLOTS_PER_BLOCK) { 592 | if (bucket_blocks_offset == 0 && bucket_intrablock_rank == 0) { 593 | /* The block begins in empty space, and this bucket is in that region of 594 | * empty space */ 595 | return hash_bucket_index; 596 | } else { 597 | do { 598 | runend_rank -= popcntv(get_block(qf, 599 | runend_block_index)->runends[0], 600 | runend_ignore_bits); 601 | runend_block_index++; 602 | runend_ignore_bits = 0; 603 | runend_block_offset = bitselectv(get_block(qf, 604 | runend_block_index)->runends[0], 605 | runend_ignore_bits, runend_rank); 606 | } while (runend_block_offset == QF_SLOTS_PER_BLOCK); 607 | } 608 | } 609 | 610 | uint64_t runend_index = QF_SLOTS_PER_BLOCK * runend_block_index + 611 | runend_block_offset; 612 | if (runend_index < hash_bucket_index) 613 | return hash_bucket_index; 614 | else 615 | return runend_index; 616 | } 617 | 618 | static inline int offset_lower_bound(const QF *qf, uint64_t slot_index) 619 | { 620 | const qfblock * b = get_block(qf, slot_index / QF_SLOTS_PER_BLOCK); 621 | const uint64_t slot_offset = slot_index % QF_SLOTS_PER_BLOCK; 622 | const uint64_t boffset = b->offset; 623 | const uint64_t occupieds = b->occupieds[0] & BITMASK(slot_offset+1); 624 | assert(QF_SLOTS_PER_BLOCK == 64); 625 | if (boffset <= slot_offset) { 626 | const uint64_t runends = (b->runends[0] & BITMASK(slot_offset)) >> boffset; 627 | return popcnt(occupieds) - popcnt(runends); 628 | } 629 | return boffset - slot_offset + popcnt(occupieds); 630 | } 631 | 632 | static inline int is_empty(const QF *qf, uint64_t slot_index) 633 | { 634 | return offset_lower_bound(qf, slot_index) == 0; 635 | } 636 | 637 | static inline int might_be_empty(const QF *qf, uint64_t slot_index) 638 | { 639 | return !is_occupied(qf, slot_index) 640 | && !is_runend(qf, slot_index); 641 | } 642 | 643 | static inline int probably_is_empty(const QF *qf, uint64_t slot_index) 644 | { 645 | return get_slot(qf, slot_index) == 0 646 | && !is_occupied(qf, slot_index) 647 | && !is_runend(qf, slot_index); 648 | } 649 | 650 | static inline uint64_t find_first_empty_slot(QF *qf, uint64_t from) 651 | { 652 | do { 653 | int t = offset_lower_bound(qf, from); 654 | assert(t>=0); 655 | if (t == 0) 656 | break; 657 | from = from + t; 658 | } while(1); 659 | return from; 660 | } 661 | 662 | static inline uint64_t shift_into_b(const uint64_t a, const uint64_t b, 663 | const int bstart, const int bend, 664 | const int amount) 665 | { 666 | const uint64_t a_component = bstart == 0 ? (a >> (64 - amount)) : 0; 667 | const uint64_t b_shifted_mask = BITMASK(bend - bstart) << bstart; 668 | const uint64_t b_shifted = ((b_shifted_mask & b) << amount) & b_shifted_mask; 669 | const uint64_t b_mask = ~b_shifted_mask; 670 | return a_component | b_shifted | (b & b_mask); 671 | } 672 | 673 | #if QF_BITS_PER_SLOT == 8 || QF_BITS_PER_SLOT == 16 || QF_BITS_PER_SLOT == 32 || QF_BITS_PER_SLOT == 64 674 | 675 | static inline void shift_remainders(QF *qf, uint64_t start_index, uint64_t 676 | empty_index) 677 | { 678 | uint64_t start_block = start_index / QF_SLOTS_PER_BLOCK; 679 | uint64_t start_offset = start_index % QF_SLOTS_PER_BLOCK; 680 | uint64_t empty_block = empty_index / QF_SLOTS_PER_BLOCK; 681 | uint64_t empty_offset = empty_index % QF_SLOTS_PER_BLOCK; 682 | 683 | assert (start_index <= empty_index && empty_index < qf->metadata->xnslots); 684 | 685 | while (start_block < empty_block) { 686 | memmove(&get_block(qf, empty_block)->slots[1], 687 | &get_block(qf, empty_block)->slots[0], 688 | empty_offset * sizeof(qf->blocks[0].slots[0])); 689 | get_block(qf, empty_block)->slots[0] = get_block(qf, 690 | empty_block-1)->slots[QF_SLOTS_PER_BLOCK-1]; 691 | empty_block--; 692 | empty_offset = QF_SLOTS_PER_BLOCK-1; 693 | } 694 | 695 | memmove(&get_block(qf, empty_block)->slots[start_offset+1], 696 | &get_block(qf, empty_block)->slots[start_offset], 697 | (empty_offset - start_offset) * sizeof(qf->blocks[0].slots[0])); 698 | } 699 | 700 | #else 701 | 702 | #define REMAINDER_WORD(qf, i) ((uint64_t *)&(get_block(qf, (i)/qf->metadata->bits_per_slot)->slots[8 * ((i) % qf->metadata->bits_per_slot)])) 703 | 704 | static inline void shift_remainders(QF *qf, const uint64_t start_index, const 705 | uint64_t empty_index) 706 | { 707 | uint64_t last_word = (empty_index + 1) * qf->metadata->bits_per_slot / 64; 708 | const uint64_t first_word = start_index * qf->metadata->bits_per_slot / 64; 709 | int bend = ((empty_index + 1) * qf->metadata->bits_per_slot) % 64; 710 | const int bstart = (start_index * qf->metadata->bits_per_slot) % 64; 711 | 712 | while (last_word != first_word) { 713 | *REMAINDER_WORD(qf, last_word) = shift_into_b(*REMAINDER_WORD(qf, last_word-1), 714 | *REMAINDER_WORD(qf, last_word), 715 | 0, bend, qf->metadata->bits_per_slot); 716 | last_word--; 717 | bend = 64; 718 | } 719 | *REMAINDER_WORD(qf, last_word) = shift_into_b(0, *REMAINDER_WORD(qf, 720 | last_word), 721 | bstart, bend, 722 | qf->metadata->bits_per_slot); 723 | } 724 | 725 | #endif 726 | 727 | static inline void qf_dump_block(const QF *qf, uint64_t i) 728 | { 729 | uint64_t j; 730 | 731 | printf("%-192d", get_block(qf, i)->offset); 732 | printf("\n"); 733 | 734 | for (j = 0; j < QF_SLOTS_PER_BLOCK; j++) 735 | printf("%02lx ", j); 736 | printf("\n"); 737 | 738 | for (j = 0; j < QF_SLOTS_PER_BLOCK; j++) 739 | printf(" %d ", (get_block(qf, i)->occupieds[j/64] & (1ULL << (j%64))) ? 1 : 0); 740 | printf("\n"); 741 | 742 | for (j = 0; j < QF_SLOTS_PER_BLOCK; j++) 743 | printf(" %d ", (get_block(qf, i)->runends[j/64] & (1ULL << (j%64))) ? 1 : 0); 744 | printf("\n"); 745 | 746 | #if QF_BITS_PER_SLOT == 8 || QF_BITS_PER_SLOT == 16 || QF_BITS_PER_SLOT == 32 747 | for (j = 0; j < QF_SLOTS_PER_BLOCK; j++) 748 | printf("%02x ", get_block(qf, i)->slots[j]); 749 | #elif QF_BITS_PER_SLOT == 64 750 | for (j = 0; j < QF_SLOTS_PER_BLOCK; j++) 751 | printf("%02lx ", get_block(qf, i)->slots[j]); 752 | #else 753 | for (j = 0; j < QF_SLOTS_PER_BLOCK * qf->metadata->bits_per_slot / 8; j++) 754 | printf("%02x ", get_block(qf, i)->slots[j]); 755 | #endif 756 | 757 | printf("\n"); 758 | 759 | printf("\n"); 760 | } 761 | 762 | void qf_dump_metadata(const QF *qf) { 763 | printf("Slots: %lu Occupied: %lu Elements: %lu Distinct: %lu\n", 764 | qf->metadata->nslots, 765 | qf->metadata->noccupied_slots, 766 | qf->metadata->nelts, 767 | qf->metadata->ndistinct_elts); 768 | printf("Key_bits: %lu Value_bits: %lu Remainder_bits: %lu Bits_per_slot: %lu\n", 769 | qf->metadata->key_bits, 770 | qf->metadata->value_bits, 771 | qf->metadata->key_remainder_bits, 772 | qf->metadata->bits_per_slot); 773 | } 774 | 775 | void qf_dump(const QF *qf) 776 | { 777 | uint64_t i; 778 | 779 | printf("%lu %lu %lu\n", 780 | qf->metadata->nblocks, 781 | qf->metadata->ndistinct_elts, 782 | qf->metadata->nelts); 783 | 784 | for (i = 0; i < qf->metadata->nblocks; i++) { 785 | qf_dump_block(qf, i); 786 | } 787 | 788 | } 789 | 790 | static inline void find_next_n_empty_slots(QF *qf, uint64_t from, uint64_t n, 791 | uint64_t *indices) 792 | { 793 | while (n) { 794 | indices[--n] = find_first_empty_slot(qf, from); 795 | from = indices[n] + 1; 796 | } 797 | } 798 | 799 | static inline void shift_slots(QF *qf, int64_t first, uint64_t last, uint64_t 800 | distance) 801 | { 802 | int64_t i; 803 | if (distance == 1) 804 | shift_remainders(qf, first, last+1); 805 | else 806 | for (i = last; i >= first; i--) 807 | set_slot(qf, i + distance, get_slot(qf, i)); 808 | } 809 | 810 | static inline void shift_runends(QF *qf, int64_t first, uint64_t last, 811 | uint64_t distance) 812 | { 813 | assert(last < qf->metadata->xnslots && distance < 64); 814 | uint64_t first_word = first / 64; 815 | uint64_t bstart = first % 64; 816 | uint64_t last_word = (last + distance + 1) / 64; 817 | uint64_t bend = (last + distance + 1) % 64; 818 | 819 | if (last_word != first_word) { 820 | METADATA_WORD(qf, runends, 64*last_word) = shift_into_b(METADATA_WORD(qf, runends, 64*(last_word-1)), 821 | METADATA_WORD(qf, runends, 64*last_word), 822 | 0, bend, distance); 823 | bend = 64; 824 | last_word--; 825 | while (last_word != first_word) { 826 | METADATA_WORD(qf, runends, 64*last_word) = shift_into_b(METADATA_WORD(qf, runends, 64*(last_word-1)), 827 | METADATA_WORD(qf, runends, 64*last_word), 828 | 0, bend, distance); 829 | last_word--; 830 | } 831 | } 832 | METADATA_WORD(qf, runends, 64*last_word) = shift_into_b(0, METADATA_WORD(qf, 833 | runends, 834 | 64*last_word), 835 | bstart, bend, distance); 836 | 837 | } 838 | 839 | static inline bool insert_replace_slots_and_shift_remainders_and_runends_and_offsets(QF *qf, 840 | int operation, 841 | uint64_t bucket_index, 842 | uint64_t overwrite_index, 843 | const uint64_t *remainders, 844 | uint64_t total_remainders, 845 | uint64_t noverwrites) 846 | { 847 | uint64_t empties[67]; 848 | uint64_t i; 849 | int64_t j; 850 | int64_t ninserts = total_remainders - noverwrites; 851 | uint64_t insert_index = overwrite_index + noverwrites; 852 | 853 | if (ninserts > 0) { 854 | /* First, shift things to create n empty spaces where we need them. */ 855 | find_next_n_empty_slots(qf, insert_index, ninserts, empties); 856 | if (empties[0] >= qf->metadata->xnslots) { 857 | return false; 858 | } 859 | for (j = 0; j < ninserts - 1; j++) 860 | shift_slots(qf, empties[j+1] + 1, empties[j] - 1, j + 1); 861 | shift_slots(qf, insert_index, empties[ninserts - 1] - 1, ninserts); 862 | 863 | for (j = 0; j < ninserts - 1; j++) 864 | shift_runends(qf, empties[j+1] + 1, empties[j] - 1, j + 1); 865 | shift_runends(qf, insert_index, empties[ninserts - 1] - 1, ninserts); 866 | 867 | for (i = noverwrites; i < total_remainders - 1; i++) 868 | METADATA_WORD(qf, runends, overwrite_index + i) &= ~(1ULL << 869 | (((overwrite_index 870 | + i) % 871 | QF_SLOTS_PER_BLOCK) 872 | % 64)); 873 | 874 | switch (operation) { 875 | case 0: /* insert into empty bucket */ 876 | assert (noverwrites == 0); 877 | METADATA_WORD(qf, runends, overwrite_index + total_remainders - 1) |= 878 | 1ULL << (((overwrite_index + total_remainders - 1) % 879 | QF_SLOTS_PER_BLOCK) % 64); 880 | break; 881 | case 1: /* append to bucket */ 882 | METADATA_WORD(qf, runends, overwrite_index + noverwrites - 1) &= 883 | ~(1ULL << (((overwrite_index + noverwrites - 1) % QF_SLOTS_PER_BLOCK) % 884 | 64)); 885 | METADATA_WORD(qf, runends, overwrite_index + total_remainders - 1) |= 886 | 1ULL << (((overwrite_index + total_remainders - 1) % 887 | QF_SLOTS_PER_BLOCK) % 64); 888 | break; 889 | case 2: /* insert into bucket */ 890 | METADATA_WORD(qf, runends, overwrite_index + total_remainders - 1) &= 891 | ~(1ULL << (((overwrite_index + total_remainders - 1) % 892 | QF_SLOTS_PER_BLOCK) % 64)); 893 | break; 894 | default: 895 | fprintf(stderr, "Invalid operation %d\n", operation); 896 | abort(); 897 | } 898 | 899 | uint64_t npreceding_empties = 0; 900 | for (i = bucket_index / QF_SLOTS_PER_BLOCK + 1; i <= empties[0]/QF_SLOTS_PER_BLOCK; i++) { 901 | while ((int64_t)npreceding_empties < ninserts && 902 | empties[ninserts - 1 - npreceding_empties] / QF_SLOTS_PER_BLOCK < i) 903 | npreceding_empties++; 904 | 905 | if (get_block(qf, i)->offset + ninserts - npreceding_empties < BITMASK(8*sizeof(qf->blocks[0].offset))) 906 | get_block(qf, i)->offset += ninserts - npreceding_empties; 907 | else 908 | get_block(qf, i)->offset = (uint8_t) BITMASK(8*sizeof(qf->blocks[0].offset)); 909 | } 910 | } 911 | 912 | for (i = 0; i < total_remainders; i++) 913 | set_slot(qf, overwrite_index + i, remainders[i]); 914 | 915 | modify_metadata(&qf->runtimedata->pc_noccupied_slots, ninserts); 916 | 917 | return true; 918 | } 919 | 920 | static inline int remove_replace_slots_and_shift_remainders_and_runends_and_offsets(QF *qf, 921 | int operation, 922 | uint64_t bucket_index, 923 | uint64_t overwrite_index, 924 | const uint64_t *remainders, 925 | uint64_t total_remainders, 926 | uint64_t old_length) 927 | { 928 | uint64_t i; 929 | 930 | // Update the slots 931 | for (i = 0; i < total_remainders; i++) 932 | set_slot(qf, overwrite_index + i, remainders[i]); 933 | 934 | // If this is the last thing in its run, then we may need to set a new runend bit 935 | if (is_runend(qf, overwrite_index + old_length - 1)) { 936 | if (total_remainders > 0) { 937 | // If we're not deleting this entry entirely, then it will still the last entry in this run 938 | METADATA_WORD(qf, runends, overwrite_index + total_remainders - 1) |= 1ULL << ((overwrite_index + total_remainders - 1) % 64); 939 | } else if (overwrite_index > bucket_index && 940 | !is_runend(qf, overwrite_index - 1)) { 941 | // If we're deleting this entry entirely, but it is not the first entry in this run, 942 | // then set the preceding entry to be the runend 943 | METADATA_WORD(qf, runends, overwrite_index - 1) |= 1ULL << ((overwrite_index - 1) % 64); 944 | } 945 | } 946 | 947 | // shift slots back one run at a time 948 | uint64_t original_bucket = bucket_index; 949 | uint64_t current_bucket = bucket_index; 950 | uint64_t current_slot = overwrite_index + total_remainders; 951 | uint64_t current_distance = old_length - total_remainders; 952 | int ret_current_distance = current_distance; 953 | 954 | while (current_distance > 0) { 955 | if (is_runend(qf, current_slot + current_distance - 1)) { 956 | do { 957 | current_bucket++; 958 | } while (current_bucket < current_slot + current_distance && 959 | !is_occupied(qf, current_bucket)); 960 | } 961 | 962 | if (current_bucket <= current_slot) { 963 | set_slot(qf, current_slot, get_slot(qf, current_slot + current_distance)); 964 | if (is_runend(qf, current_slot) != 965 | is_runend(qf, current_slot + current_distance)) 966 | METADATA_WORD(qf, runends, current_slot) ^= 1ULL << (current_slot % 64); 967 | current_slot++; 968 | 969 | } else if (current_bucket <= current_slot + current_distance) { 970 | uint64_t i; 971 | for (i = current_slot; i < current_slot + current_distance; i++) { 972 | set_slot(qf, i, 0); 973 | METADATA_WORD(qf, runends, i) &= ~(1ULL << (i % 64)); 974 | } 975 | 976 | current_distance = current_slot + current_distance - current_bucket; 977 | current_slot = current_bucket; 978 | } else { 979 | current_distance = 0; 980 | } 981 | } 982 | 983 | // reset the occupied bit of the hash bucket index if the hash is the 984 | // only item in the run and is removed completely. 985 | if (operation && !total_remainders) 986 | METADATA_WORD(qf, occupieds, bucket_index) &= ~(1ULL << (bucket_index % 64)); 987 | 988 | // update the offset bits. 989 | // find the number of occupied slots in the original_bucket block. 990 | // Then find the runend slot corresponding to the last run in the 991 | // original_bucket block. 992 | // Update the offset of the block to which it belongs. 993 | uint64_t original_block = original_bucket / QF_SLOTS_PER_BLOCK; 994 | if (old_length > total_remainders) { // we only update offsets if we shift/delete anything 995 | while (1) { 996 | uint64_t last_occupieds_hash_index = QF_SLOTS_PER_BLOCK * original_block + (QF_SLOTS_PER_BLOCK - 1); 997 | uint64_t runend_index = run_end(qf, last_occupieds_hash_index); 998 | // runend spans across the block 999 | // update the offset of the next block 1000 | if (runend_index / QF_SLOTS_PER_BLOCK == original_block) { // if the run ends in the same block 1001 | if (get_block(qf, original_block + 1)->offset == 0) 1002 | break; 1003 | get_block(qf, original_block + 1)->offset = 0; 1004 | } else { // if the last run spans across the block 1005 | if (get_block(qf, original_block + 1)->offset == (runend_index - last_occupieds_hash_index)) 1006 | break; 1007 | get_block(qf, original_block + 1)->offset = (runend_index - last_occupieds_hash_index); 1008 | } 1009 | original_block++; 1010 | } 1011 | } 1012 | 1013 | int num_slots_freed = old_length - total_remainders; 1014 | modify_metadata(&qf->runtimedata->pc_noccupied_slots, -num_slots_freed); 1015 | /*qf->metadata->noccupied_slots -= (old_length - total_remainders);*/ 1016 | if (!total_remainders) { 1017 | modify_metadata(&qf->runtimedata->pc_ndistinct_elts, -1); 1018 | /*qf->metadata->ndistinct_elts--;*/ 1019 | } 1020 | 1021 | return ret_current_distance; 1022 | } 1023 | 1024 | /***************************************************************************** 1025 | * Code that uses the above to implement a QF with keys and inline counters. * 1026 | *****************************************************************************/ 1027 | 1028 | /* 1029 | Counter format: 1030 | 0 xs: 1031 | 1 x: x 1032 | 2 xs: xx 1033 | 3 0s: 000 1034 | >2 xs: xbc...cx for x != 0, b < x, c != 0, x 1035 | >3 0s: 0c...c00 for c != 0 1036 | */ 1037 | static inline uint64_t *encode_counter(QF *qf, uint64_t remainder, uint64_t 1038 | counter, uint64_t *slots) 1039 | { 1040 | uint64_t digit = remainder; 1041 | uint64_t base = (1ULL << qf->metadata->bits_per_slot) - 1; 1042 | uint64_t *p = slots; 1043 | 1044 | if (counter == 0) 1045 | return p; 1046 | 1047 | *--p = remainder; 1048 | 1049 | if (counter == 1) 1050 | return p; 1051 | 1052 | if (counter == 2) { 1053 | *--p = remainder; 1054 | return p; 1055 | } 1056 | 1057 | if (counter == 3 && remainder == 0) { 1058 | *--p = remainder; 1059 | *--p = remainder; 1060 | return p; 1061 | } 1062 | 1063 | if (counter == 3 && remainder > 0) { 1064 | *--p = 0; 1065 | *--p = remainder; 1066 | return p; 1067 | } 1068 | 1069 | if (remainder == 0) 1070 | *--p = remainder; 1071 | else 1072 | base--; 1073 | 1074 | if (remainder) 1075 | counter -= 3; 1076 | else 1077 | counter -= 4; 1078 | do { 1079 | digit = counter % base; 1080 | digit++; /* Zero not allowed */ 1081 | if (remainder && digit >= remainder) 1082 | digit++; /* Cannot overflow since digit is mod 2^r-2 */ 1083 | *--p = digit; 1084 | counter /= base; 1085 | } while (counter); 1086 | 1087 | if (remainder && digit >= remainder) 1088 | *--p = 0; 1089 | 1090 | *--p = remainder; 1091 | 1092 | return p; 1093 | } 1094 | 1095 | /* Returns the length of the encoding. 1096 | REQUIRES: index points to first slot of a counter. */ 1097 | static inline uint64_t decode_counter(const QF *qf, uint64_t index, uint64_t 1098 | *remainder, uint64_t *count) 1099 | { 1100 | uint64_t base; 1101 | uint64_t rem; 1102 | uint64_t cnt; 1103 | uint64_t digit; 1104 | uint64_t end; 1105 | 1106 | *remainder = rem = get_slot(qf, index); 1107 | 1108 | if (is_runend(qf, index)) { /* Entire run is "0" */ 1109 | *count = 1; 1110 | return index; 1111 | } 1112 | 1113 | digit = get_slot(qf, index + 1); 1114 | 1115 | if (is_runend(qf, index + 1)) { 1116 | *count = digit == rem ? 2 : 1; 1117 | return index + (digit == rem ? 1 : 0); 1118 | } 1119 | 1120 | if (rem > 0 && digit >= rem) { 1121 | *count = digit == rem ? 2 : 1; 1122 | return index + (digit == rem ? 1 : 0); 1123 | } 1124 | 1125 | if (rem > 0 && digit == 0 && get_slot(qf, index + 2) == rem) { 1126 | *count = 3; 1127 | return index + 2; 1128 | } 1129 | 1130 | if (rem == 0 && digit == 0) { 1131 | if (get_slot(qf, index + 2) == 0) { 1132 | *count = 3; 1133 | return index + 2; 1134 | } else { 1135 | *count = 2; 1136 | return index + 1; 1137 | } 1138 | } 1139 | 1140 | cnt = 0; 1141 | base = (1ULL << qf->metadata->bits_per_slot) - (rem ? 2 : 1); 1142 | 1143 | end = index + 1; 1144 | while (digit != rem && !is_runend(qf, end)) { 1145 | if (digit > rem) 1146 | digit--; 1147 | if (digit && rem) 1148 | digit--; 1149 | cnt = cnt * base + digit; 1150 | 1151 | end++; 1152 | digit = get_slot(qf, end); 1153 | } 1154 | 1155 | if (rem) { 1156 | *count = cnt + 3; 1157 | return end; 1158 | } 1159 | 1160 | if (is_runend(qf, end) || get_slot(qf, end + 1) != 0) { 1161 | *count = 1; 1162 | return index; 1163 | } 1164 | 1165 | *count = cnt + 4; 1166 | return end + 1; 1167 | } 1168 | 1169 | /* return the next slot which corresponds to a 1170 | * different element 1171 | * */ 1172 | static inline uint64_t next_slot(QF *qf, uint64_t current) 1173 | { 1174 | uint64_t rem = get_slot(qf, current); 1175 | current++; 1176 | 1177 | while (get_slot(qf, current) == rem && current <= qf->metadata->nslots) { 1178 | current++; 1179 | } 1180 | return current; 1181 | } 1182 | 1183 | static inline int insert1(QF *qf, __uint128_t hash, uint8_t runtime_lock) 1184 | { 1185 | int ret_distance = 0; 1186 | uint64_t hash_remainder = hash & BITMASK(qf->metadata->bits_per_slot); 1187 | uint64_t hash_bucket_index = hash >> qf->metadata->bits_per_slot; 1188 | uint64_t hash_bucket_block_offset = hash_bucket_index % QF_SLOTS_PER_BLOCK; 1189 | 1190 | if (GET_NO_LOCK(runtime_lock) != QF_NO_LOCK) { 1191 | if (!qf_lock(qf, hash_bucket_index, /*small*/ true, runtime_lock)) 1192 | return QF_COULDNT_LOCK; 1193 | } 1194 | if (is_empty(qf, hash_bucket_index) /* might_be_empty(qf, hash_bucket_index) && runend_index == hash_bucket_index */) { 1195 | METADATA_WORD(qf, runends, hash_bucket_index) |= 1ULL << 1196 | (hash_bucket_block_offset % 64); 1197 | set_slot(qf, hash_bucket_index, hash_remainder); 1198 | METADATA_WORD(qf, occupieds, hash_bucket_index) |= 1ULL << 1199 | (hash_bucket_block_offset % 64); 1200 | 1201 | ret_distance = 0; 1202 | modify_metadata(&qf->runtimedata->pc_ndistinct_elts, 1); 1203 | modify_metadata(&qf->runtimedata->pc_noccupied_slots, 1); 1204 | modify_metadata(&qf->runtimedata->pc_nelts, 1); 1205 | } else { 1206 | uint64_t runend_index = run_end(qf, hash_bucket_index); 1207 | int operation = 0; /* Insert into empty bucket */ 1208 | uint64_t insert_index = runend_index + 1; 1209 | uint64_t new_value = hash_remainder; 1210 | 1211 | /* printf("RUNSTART: %02lx RUNEND: %02lx\n", runstart_index, runend_index); */ 1212 | 1213 | uint64_t runstart_index = hash_bucket_index == 0 ? 0 : run_end(qf, 1214 | hash_bucket_index 1215 | - 1) + 1; 1216 | 1217 | if (is_occupied(qf, hash_bucket_index)) { 1218 | 1219 | /* Find the counter for this remainder if it exists. */ 1220 | uint64_t current_remainder = get_slot(qf, runstart_index); 1221 | uint64_t zero_terminator = runstart_index; 1222 | 1223 | /* The counter for 0 is special. */ 1224 | if (current_remainder == 0) { 1225 | uint64_t t = runstart_index + 1; 1226 | while (t < runend_index && get_slot(qf, t) != 0) 1227 | t++; 1228 | if (t < runend_index && get_slot(qf, t+1) == 0) 1229 | zero_terminator = t+1; /* Three or more 0s */ 1230 | else if (runstart_index < runend_index && get_slot(qf, runstart_index 1231 | + 1) == 0) 1232 | zero_terminator = runstart_index + 1; /* Exactly two 0s */ 1233 | /* Otherwise, exactly one 0 (i.e. zero_terminator == runstart_index) */ 1234 | 1235 | /* May read past end of run, but that's OK because loop below 1236 | can handle that */ 1237 | if (hash_remainder != 0) { 1238 | runstart_index = zero_terminator + 1; 1239 | current_remainder = get_slot(qf, runstart_index); 1240 | } 1241 | } 1242 | 1243 | /* Skip over counters for other remainders. */ 1244 | while (current_remainder < hash_remainder && runstart_index <= 1245 | runend_index) { 1246 | /* If this remainder has an extended counter, skip over it. */ 1247 | if (runstart_index < runend_index && 1248 | get_slot(qf, runstart_index + 1) < current_remainder) { 1249 | runstart_index = runstart_index + 2; 1250 | while (runstart_index < runend_index && 1251 | get_slot(qf, runstart_index) != current_remainder) 1252 | runstart_index++; 1253 | runstart_index++; 1254 | 1255 | /* This remainder has a simple counter. */ 1256 | } else { 1257 | runstart_index++; 1258 | } 1259 | 1260 | /* This may read past the end of the run, but the while loop 1261 | condition will prevent us from using the invalid result in 1262 | that case. */ 1263 | current_remainder = get_slot(qf, runstart_index); 1264 | } 1265 | 1266 | /* If this is the first time we've inserted the new remainder, 1267 | and it is larger than any remainder in the run. */ 1268 | if (runstart_index > runend_index) { 1269 | operation = 1; 1270 | insert_index = runstart_index; 1271 | new_value = hash_remainder; 1272 | modify_metadata(&qf->runtimedata->pc_ndistinct_elts, 1); 1273 | 1274 | /* This is the first time we're inserting this remainder, but 1275 | there are larger remainders already in the run. */ 1276 | } else if (current_remainder != hash_remainder) { 1277 | operation = 2; /* Inserting */ 1278 | insert_index = runstart_index; 1279 | new_value = hash_remainder; 1280 | modify_metadata(&qf->runtimedata->pc_ndistinct_elts, 1); 1281 | 1282 | /* Cases below here: we're incrementing the (simple or 1283 | extended) counter for this remainder. */ 1284 | 1285 | /* If there's exactly one instance of this remainder. */ 1286 | } else if (runstart_index == runend_index || 1287 | (hash_remainder > 0 && get_slot(qf, runstart_index + 1) > 1288 | hash_remainder) || 1289 | (hash_remainder == 0 && zero_terminator == runstart_index)) { 1290 | operation = 2; /* Insert */ 1291 | insert_index = runstart_index; 1292 | new_value = hash_remainder; 1293 | 1294 | /* If there are exactly two instances of this remainder. */ 1295 | } else if ((hash_remainder > 0 && get_slot(qf, runstart_index + 1) == 1296 | hash_remainder) || 1297 | (hash_remainder == 0 && zero_terminator == runstart_index + 1)) { 1298 | operation = 2; /* Insert */ 1299 | insert_index = runstart_index + 1; 1300 | new_value = 0; 1301 | 1302 | /* Special case for three 0s */ 1303 | } else if (hash_remainder == 0 && zero_terminator == runstart_index + 2) { 1304 | operation = 2; /* Insert */ 1305 | insert_index = runstart_index + 1; 1306 | new_value = 1; 1307 | 1308 | /* There is an extended counter for this remainder. */ 1309 | } else { 1310 | 1311 | /* Move to the LSD of the counter. */ 1312 | insert_index = runstart_index + 1; 1313 | while (get_slot(qf, insert_index+1) != hash_remainder) 1314 | insert_index++; 1315 | 1316 | /* Increment the counter. */ 1317 | uint64_t digit, carry; 1318 | do { 1319 | carry = 0; 1320 | digit = get_slot(qf, insert_index); 1321 | // Convert a leading 0 (which is special) to a normal encoded digit 1322 | if (digit == 0) { 1323 | digit++; 1324 | if (digit == current_remainder) 1325 | digit++; 1326 | } 1327 | 1328 | // Increment the digit 1329 | digit = (digit + 1) & BITMASK(qf->metadata->bits_per_slot); 1330 | 1331 | // Ensure digit meets our encoding requirements 1332 | if (digit == 0) { 1333 | digit++; 1334 | carry = 1; 1335 | } 1336 | if (digit == current_remainder) 1337 | digit = (digit + 1) & BITMASK(qf->metadata->bits_per_slot); 1338 | if (digit == 0) { 1339 | digit++; 1340 | carry = 1; 1341 | } 1342 | 1343 | set_slot(qf, insert_index, digit); 1344 | insert_index--; 1345 | } while(insert_index > runstart_index && carry); 1346 | 1347 | /* If the counter needs to be expanded. */ 1348 | if (insert_index == runstart_index && (carry > 0 || (current_remainder 1349 | != 0 && digit >= 1350 | current_remainder))) 1351 | { 1352 | operation = 2; /* insert */ 1353 | insert_index = runstart_index + 1; 1354 | if (!carry) /* To prepend a 0 before the counter if the MSD is greater than the rem */ 1355 | new_value = 0; 1356 | else if (carry) { /* Increment the new value because we don't use 0 to encode counters */ 1357 | new_value = 2; 1358 | /* If the rem is greater than or equal to the new_value then fail*/ 1359 | if (current_remainder > 0) 1360 | assert(new_value < current_remainder); 1361 | } 1362 | } else { 1363 | operation = -1; 1364 | } 1365 | } 1366 | } else { 1367 | modify_metadata(&qf->runtimedata->pc_ndistinct_elts, 1); 1368 | } 1369 | 1370 | if (operation >= 0) { 1371 | uint64_t empty_slot_index = find_first_empty_slot(qf, runend_index+1); 1372 | if (empty_slot_index >= qf->metadata->xnslots) { 1373 | return QF_NO_SPACE; 1374 | } 1375 | shift_remainders(qf, insert_index, empty_slot_index); 1376 | 1377 | set_slot(qf, insert_index, new_value); 1378 | ret_distance = insert_index - hash_bucket_index; 1379 | 1380 | shift_runends(qf, insert_index, empty_slot_index-1, 1); 1381 | switch (operation) { 1382 | case 0: 1383 | METADATA_WORD(qf, runends, insert_index) |= 1ULL << ((insert_index 1384 | % 1385 | QF_SLOTS_PER_BLOCK) 1386 | % 64); 1387 | break; 1388 | case 1: 1389 | METADATA_WORD(qf, runends, insert_index-1) &= ~(1ULL << 1390 | (((insert_index-1) % 1391 | QF_SLOTS_PER_BLOCK) % 1392 | 64)); 1393 | METADATA_WORD(qf, runends, insert_index) |= 1ULL << ((insert_index 1394 | % 1395 | QF_SLOTS_PER_BLOCK) 1396 | % 64); 1397 | break; 1398 | case 2: 1399 | METADATA_WORD(qf, runends, insert_index) &= ~(1ULL << 1400 | ((insert_index % 1401 | QF_SLOTS_PER_BLOCK) % 1402 | 64)); 1403 | break; 1404 | default: 1405 | fprintf(stderr, "Invalid operation %d\n", operation); 1406 | abort(); 1407 | } 1408 | /* 1409 | * Increment the offset for each block between the hash bucket index 1410 | * and block of the empty slot 1411 | * */ 1412 | uint64_t i; 1413 | for (i = hash_bucket_index / QF_SLOTS_PER_BLOCK + 1; i <= 1414 | empty_slot_index/QF_SLOTS_PER_BLOCK; i++) { 1415 | if (get_block(qf, i)->offset < BITMASK(8*sizeof(qf->blocks[0].offset))) 1416 | get_block(qf, i)->offset++; 1417 | assert(get_block(qf, i)->offset != 0); 1418 | } 1419 | modify_metadata(&qf->runtimedata->pc_noccupied_slots, 1); 1420 | } 1421 | modify_metadata(&qf->runtimedata->pc_nelts, 1); 1422 | METADATA_WORD(qf, occupieds, hash_bucket_index) |= 1ULL << 1423 | (hash_bucket_block_offset % 64); 1424 | } 1425 | 1426 | if (GET_NO_LOCK(runtime_lock) != QF_NO_LOCK) { 1427 | qf_unlock(qf, hash_bucket_index, /*small*/ true); 1428 | } 1429 | 1430 | return ret_distance; 1431 | } 1432 | 1433 | static inline int insert(QF *qf, __uint128_t hash, uint64_t count, uint8_t 1434 | runtime_lock) 1435 | { 1436 | int ret_distance = 0; 1437 | uint64_t hash_remainder = hash & BITMASK(qf->metadata->bits_per_slot); 1438 | uint64_t hash_bucket_index = hash >> qf->metadata->bits_per_slot; 1439 | uint64_t hash_bucket_block_offset = hash_bucket_index % QF_SLOTS_PER_BLOCK; 1440 | /*uint64_t hash_bucket_lock_offset = hash_bucket_index % NUM_SLOTS_TO_LOCK;*/ 1441 | 1442 | if (GET_NO_LOCK(runtime_lock) != QF_NO_LOCK) { 1443 | if (!qf_lock(qf, hash_bucket_index, /*small*/ false, runtime_lock)) 1444 | return QF_COULDNT_LOCK; 1445 | } 1446 | 1447 | uint64_t runend_index = run_end(qf, hash_bucket_index); 1448 | 1449 | /* Empty slot */ 1450 | if (might_be_empty(qf, hash_bucket_index) && runend_index == 1451 | hash_bucket_index) { 1452 | METADATA_WORD(qf, runends, hash_bucket_index) |= 1ULL << 1453 | (hash_bucket_block_offset % 64); 1454 | set_slot(qf, hash_bucket_index, hash_remainder); 1455 | METADATA_WORD(qf, occupieds, hash_bucket_index) |= 1ULL << 1456 | (hash_bucket_block_offset % 64); 1457 | 1458 | modify_metadata(&qf->runtimedata->pc_ndistinct_elts, 1); 1459 | modify_metadata(&qf->runtimedata->pc_noccupied_slots, 1); 1460 | modify_metadata(&qf->runtimedata->pc_nelts, 1); 1461 | /* This trick will, I hope, keep the fast case fast. */ 1462 | if (count > 1) { 1463 | insert(qf, hash, count - 1, QF_NO_LOCK); 1464 | } 1465 | } else { /* Non-empty slot */ 1466 | uint64_t new_values[67]; 1467 | int64_t runstart_index = hash_bucket_index == 0 ? 0 : run_end(qf, 1468 | hash_bucket_index 1469 | - 1) + 1; 1470 | 1471 | bool ret; 1472 | if (!is_occupied(qf, hash_bucket_index)) { /* Empty bucket, but its slot is occupied. */ 1473 | uint64_t *p = encode_counter(qf, hash_remainder, count, &new_values[67]); 1474 | ret = insert_replace_slots_and_shift_remainders_and_runends_and_offsets(qf, 1475 | 0, 1476 | hash_bucket_index, 1477 | runstart_index, 1478 | p, 1479 | &new_values[67] - p, 1480 | 0); 1481 | if (!ret) 1482 | return QF_NO_SPACE; 1483 | modify_metadata(&qf->runtimedata->pc_ndistinct_elts, 1); 1484 | ret_distance = runstart_index - hash_bucket_index; 1485 | } else { /* Non-empty bucket */ 1486 | 1487 | uint64_t current_remainder, current_count, current_end; 1488 | 1489 | /* Find the counter for this remainder, if one exists. */ 1490 | current_end = decode_counter(qf, runstart_index, ¤t_remainder, 1491 | ¤t_count); 1492 | while (current_remainder < hash_remainder && !is_runend(qf, current_end)) { 1493 | runstart_index = current_end + 1; 1494 | current_end = decode_counter(qf, runstart_index, ¤t_remainder, 1495 | ¤t_count); 1496 | } 1497 | 1498 | /* If we reached the end of the run w/o finding a counter for this remainder, 1499 | then append a counter for this remainder to the run. */ 1500 | if (current_remainder < hash_remainder) { 1501 | uint64_t *p = encode_counter(qf, hash_remainder, count, &new_values[67]); 1502 | ret = insert_replace_slots_and_shift_remainders_and_runends_and_offsets(qf, 1503 | 1, /* Append to bucket */ 1504 | hash_bucket_index, 1505 | current_end + 1, 1506 | p, 1507 | &new_values[67] - p, 1508 | 0); 1509 | if (!ret) 1510 | return QF_NO_SPACE; 1511 | modify_metadata(&qf->runtimedata->pc_ndistinct_elts, 1); 1512 | ret_distance = (current_end + 1) - hash_bucket_index; 1513 | /* Found a counter for this remainder. Add in the new count. */ 1514 | } else if (current_remainder == hash_remainder) { 1515 | uint64_t *p = encode_counter(qf, hash_remainder, current_count + count, &new_values[67]); 1516 | ret = insert_replace_slots_and_shift_remainders_and_runends_and_offsets(qf, 1517 | is_runend(qf, current_end) ? 1 : 2, 1518 | hash_bucket_index, 1519 | runstart_index, 1520 | p, 1521 | &new_values[67] - p, 1522 | current_end - runstart_index + 1); 1523 | if (!ret) 1524 | return QF_NO_SPACE; 1525 | ret_distance = runstart_index - hash_bucket_index; 1526 | /* No counter for this remainder, but there are larger 1527 | remainders, so we're not appending to the bucket. */ 1528 | } else { 1529 | uint64_t *p = encode_counter(qf, hash_remainder, count, &new_values[67]); 1530 | ret = insert_replace_slots_and_shift_remainders_and_runends_and_offsets(qf, 1531 | 2, /* Insert to bucket */ 1532 | hash_bucket_index, 1533 | runstart_index, 1534 | p, 1535 | &new_values[67] - p, 1536 | 0); 1537 | if (!ret) 1538 | return QF_NO_SPACE; 1539 | modify_metadata(&qf->runtimedata->pc_ndistinct_elts, 1); 1540 | ret_distance = runstart_index - hash_bucket_index; 1541 | } 1542 | } 1543 | METADATA_WORD(qf, occupieds, hash_bucket_index) |= 1ULL << (hash_bucket_block_offset % 64); 1544 | 1545 | modify_metadata(&qf->runtimedata->pc_nelts, count); 1546 | } 1547 | 1548 | if (GET_NO_LOCK(runtime_lock) != QF_NO_LOCK) { 1549 | qf_unlock(qf, hash_bucket_index, /*small*/ false); 1550 | } 1551 | 1552 | return ret_distance; 1553 | } 1554 | 1555 | inline static int _remove(QF *qf, __uint128_t hash, uint64_t count, uint8_t 1556 | runtime_lock) 1557 | { 1558 | int ret_numfreedslots = 0; 1559 | uint64_t hash_remainder = hash & BITMASK(qf->metadata->bits_per_slot); 1560 | uint64_t hash_bucket_index = hash >> qf->metadata->bits_per_slot; 1561 | uint64_t current_remainder, current_count, current_end; 1562 | uint64_t new_values[67]; 1563 | 1564 | if (GET_NO_LOCK(runtime_lock) != QF_NO_LOCK) { 1565 | if (!qf_lock(qf, hash_bucket_index, /*small*/ false, runtime_lock)) 1566 | return -2; 1567 | } 1568 | 1569 | /* Empty bucket */ 1570 | if (!is_occupied(qf, hash_bucket_index)) 1571 | return -1; 1572 | 1573 | uint64_t runstart_index = hash_bucket_index == 0 ? 0 : run_end(qf, hash_bucket_index - 1) + 1; 1574 | uint64_t original_runstart_index = runstart_index; 1575 | int only_item_in_the_run = 0; 1576 | 1577 | /*Find the counter for this remainder, if one exists.*/ 1578 | current_end = decode_counter(qf, runstart_index, ¤t_remainder, ¤t_count); 1579 | while (current_remainder < hash_remainder && !is_runend(qf, current_end)) { 1580 | runstart_index = current_end + 1; 1581 | current_end = decode_counter(qf, runstart_index, ¤t_remainder, ¤t_count); 1582 | } 1583 | /* remainder not found in the given run */ 1584 | if (current_remainder != hash_remainder) 1585 | return -1; 1586 | 1587 | if (original_runstart_index == runstart_index && is_runend(qf, current_end)) 1588 | only_item_in_the_run = 1; 1589 | 1590 | /* endode the new counter */ 1591 | uint64_t *p = encode_counter(qf, hash_remainder, 1592 | count > current_count ? 0 : current_count - count, 1593 | &new_values[67]); 1594 | ret_numfreedslots = remove_replace_slots_and_shift_remainders_and_runends_and_offsets(qf, 1595 | only_item_in_the_run, 1596 | hash_bucket_index, 1597 | runstart_index, 1598 | p, 1599 | &new_values[67] - p, 1600 | current_end - runstart_index + 1); 1601 | 1602 | // update the nelements. 1603 | modify_metadata(&qf->runtimedata->pc_nelts, -count); 1604 | /*qf->metadata->nelts -= count;*/ 1605 | 1606 | if (GET_NO_LOCK(runtime_lock) != QF_NO_LOCK) { 1607 | qf_unlock(qf, hash_bucket_index, /*small*/ false); 1608 | } 1609 | 1610 | return ret_numfreedslots; 1611 | } 1612 | 1613 | /*********************************************************************** 1614 | * Code that uses the above to implement key-value-counter operations. * 1615 | ***********************************************************************/ 1616 | 1617 | uint64_t qf_init(QF *qf, uint64_t nslots, uint64_t key_bits, uint64_t value_bits, 1618 | enum qf_hashmode hash, uint32_t seed, void* buffer, uint64_t 1619 | buffer_len) 1620 | { 1621 | uint64_t num_slots, xnslots, nblocks; 1622 | uint64_t key_remainder_bits, bits_per_slot; 1623 | uint64_t size; 1624 | uint64_t total_num_bytes; 1625 | 1626 | assert(popcnt(nslots) == 1); /* nslots must be a power of 2 */ 1627 | num_slots = nslots; 1628 | xnslots = nslots + 10*sqrt((double)nslots); 1629 | nblocks = (xnslots + QF_SLOTS_PER_BLOCK - 1) / QF_SLOTS_PER_BLOCK; 1630 | key_remainder_bits = key_bits; 1631 | while (nslots > 1 && key_remainder_bits > 0) { 1632 | key_remainder_bits--; 1633 | nslots >>= 1; 1634 | } 1635 | assert(key_remainder_bits >= 2); 1636 | 1637 | bits_per_slot = key_remainder_bits + value_bits; 1638 | assert (QF_BITS_PER_SLOT == 0 || QF_BITS_PER_SLOT == qf->metadata->bits_per_slot); 1639 | assert(bits_per_slot > 1); 1640 | #if QF_BITS_PER_SLOT == 8 || QF_BITS_PER_SLOT == 16 || QF_BITS_PER_SLOT == 32 || QF_BITS_PER_SLOT == 64 1641 | size = nblocks * sizeof(qfblock); 1642 | #else 1643 | size = nblocks * (sizeof(qfblock) + QF_SLOTS_PER_BLOCK * bits_per_slot / 8); 1644 | #endif 1645 | 1646 | total_num_bytes = sizeof(qfmetadata) + size; 1647 | if (buffer == NULL || total_num_bytes > buffer_len) 1648 | return total_num_bytes; 1649 | 1650 | // memset(buffer, 0, total_num_bytes); 1651 | qf->metadata = (qfmetadata *)(buffer); 1652 | qf->blocks = (qfblock *)(qf->metadata + 1); 1653 | 1654 | qf->metadata->magic_endian_number = MAGIC_NUMBER; 1655 | qf->metadata->reserved = 0; 1656 | qf->metadata->hash_mode = hash; 1657 | qf->metadata->total_size_in_bytes = size; 1658 | qf->metadata->seed = seed; 1659 | qf->metadata->nslots = num_slots; 1660 | qf->metadata->xnslots = xnslots; 1661 | qf->metadata->key_bits = key_bits; 1662 | qf->metadata->value_bits = value_bits; 1663 | qf->metadata->key_remainder_bits = key_remainder_bits; 1664 | qf->metadata->bits_per_slot = bits_per_slot; 1665 | 1666 | qf->metadata->range = qf->metadata->nslots; 1667 | qf->metadata->range <<= qf->metadata->key_remainder_bits; 1668 | qf->metadata->nblocks = (qf->metadata->xnslots + QF_SLOTS_PER_BLOCK - 1) / 1669 | QF_SLOTS_PER_BLOCK; 1670 | qf->metadata->nelts = 0; 1671 | qf->metadata->ndistinct_elts = 0; 1672 | qf->metadata->noccupied_slots = 0; 1673 | 1674 | qf->runtimedata->num_locks = (qf->metadata->xnslots/NUM_SLOTS_TO_LOCK)+2; 1675 | 1676 | pc_init(&qf->runtimedata->pc_nelts, (int64_t*)&qf->metadata->nelts, 8, 100); 1677 | pc_init(&qf->runtimedata->pc_ndistinct_elts, (int64_t*)&qf->metadata->ndistinct_elts, 8, 100); 1678 | pc_init(&qf->runtimedata->pc_noccupied_slots, (int64_t*)&qf->metadata->noccupied_slots, 8, 100); 1679 | /* initialize container resize */ 1680 | qf->runtimedata->auto_resize = 0; 1681 | qf->runtimedata->container_resize = qf_resize_malloc; 1682 | /* initialize all the locks to 0 */ 1683 | qf->runtimedata->metadata_lock = 0; 1684 | qf->runtimedata->locks = (volatile int *)calloc(qf->runtimedata->num_locks, 1685 | sizeof(volatile int)); 1686 | if (qf->runtimedata->locks == NULL) { 1687 | perror("Couldn't allocate memory for runtime locks."); 1688 | exit(EXIT_FAILURE); 1689 | } 1690 | #ifdef LOG_WAIT_TIME 1691 | qf->runtimedata->wait_times = (wait_time_data* 1692 | )calloc(qf->runtimedata->num_locks+1, 1693 | sizeof(wait_time_data)); 1694 | if (qf->runtimedata->wait_times == NULL) { 1695 | perror("Couldn't allocate memory for runtime wait_times."); 1696 | exit(EXIT_FAILURE); 1697 | } 1698 | #endif 1699 | 1700 | return total_num_bytes; 1701 | } 1702 | 1703 | uint64_t qf_use(QF* qf, void* buffer, uint64_t buffer_len) 1704 | { 1705 | qf->metadata = (qfmetadata *)(buffer); 1706 | if (qf->metadata->total_size_in_bytes + sizeof(qfmetadata) > buffer_len) { 1707 | return qf->metadata->total_size_in_bytes + sizeof(qfmetadata); 1708 | } 1709 | qf->blocks = (qfblock *)(qf->metadata + 1); 1710 | 1711 | qf->runtimedata = (qfruntime *)calloc(sizeof(qfruntime), 1); 1712 | if (qf->runtimedata == NULL) { 1713 | perror("Couldn't allocate memory for runtime data."); 1714 | exit(EXIT_FAILURE); 1715 | } 1716 | /* initialize all the locks to 0 */ 1717 | qf->runtimedata->metadata_lock = 0; 1718 | qf->runtimedata->locks = (volatile int *)calloc(qf->runtimedata->num_locks, 1719 | sizeof(volatile int)); 1720 | if (qf->runtimedata->locks == NULL) { 1721 | perror("Couldn't allocate memory for runtime locks."); 1722 | exit(EXIT_FAILURE); 1723 | } 1724 | #ifdef LOG_WAIT_TIME 1725 | qf->runtimedata->wait_times = (wait_time_data* 1726 | )calloc(qf->runtimedata->num_locks+1, 1727 | sizeof(wait_time_data)); 1728 | if (qf->runtimedata->wait_times == NULL) { 1729 | perror("Couldn't allocate memory for runtime wait_times."); 1730 | exit(EXIT_FAILURE); 1731 | } 1732 | #endif 1733 | 1734 | return sizeof(qfmetadata) + qf->metadata->total_size_in_bytes; 1735 | } 1736 | 1737 | void *qf_destroy(QF *qf) 1738 | { 1739 | assert(qf->runtimedata != NULL); 1740 | if (qf->runtimedata->locks != NULL) 1741 | free((void*)qf->runtimedata->locks); 1742 | if (qf->runtimedata->wait_times != NULL) 1743 | free(qf->runtimedata->wait_times); 1744 | if (qf->runtimedata->f_info.filepath != NULL) 1745 | free(qf->runtimedata->f_info.filepath); 1746 | free(qf->runtimedata); 1747 | 1748 | return (void*)qf->metadata; 1749 | } 1750 | 1751 | bool qf_malloc(QF *qf, uint64_t nslots, uint64_t key_bits, uint64_t 1752 | value_bits, enum qf_hashmode hash, uint32_t seed) 1753 | { 1754 | uint64_t total_num_bytes = qf_init(qf, nslots, key_bits, value_bits, 1755 | hash, seed, NULL, 0); 1756 | 1757 | void *buffer = malloc(total_num_bytes); 1758 | if (buffer == NULL) { 1759 | perror("Couldn't allocate memory for the CQF."); 1760 | exit(EXIT_FAILURE); 1761 | } 1762 | 1763 | qf->runtimedata = (qfruntime *)calloc(sizeof(qfruntime), 1); 1764 | if (qf->runtimedata == NULL) { 1765 | perror("Couldn't allocate memory for runtime data."); 1766 | exit(EXIT_FAILURE); 1767 | } 1768 | 1769 | uint64_t init_size = qf_init(qf, nslots, key_bits, value_bits, hash, seed, 1770 | buffer, total_num_bytes); 1771 | 1772 | if (init_size == total_num_bytes) 1773 | return true; 1774 | else 1775 | return false; 1776 | } 1777 | 1778 | bool qf_free(QF *qf) 1779 | { 1780 | assert(qf->metadata != NULL); 1781 | void *buffer = qf_destroy(qf); 1782 | if (buffer != NULL) { 1783 | free(buffer); 1784 | return true; 1785 | } 1786 | 1787 | return false; 1788 | } 1789 | 1790 | void qf_copy(QF *dest, const QF *src) 1791 | { 1792 | DEBUG_CQF("%s\n","Source CQF"); 1793 | DEBUG_DUMP(src); 1794 | memcpy(dest->runtimedata, src->runtimedata, sizeof(qfruntime)); 1795 | memcpy(dest->metadata, src->metadata, sizeof(qfmetadata)); 1796 | memcpy(dest->blocks, src->blocks, src->metadata->total_size_in_bytes); 1797 | DEBUG_CQF("%s\n","Destination CQF after copy."); 1798 | DEBUG_DUMP(dest); 1799 | } 1800 | 1801 | void qf_reset(QF *qf) 1802 | { 1803 | qf->metadata->nelts = 0; 1804 | qf->metadata->ndistinct_elts = 0; 1805 | qf->metadata->noccupied_slots = 0; 1806 | 1807 | #ifdef LOG_WAIT_TIME 1808 | memset(qf->wait_times, 0, 1809 | (qf->runtimedata->num_locks+1)*sizeof(wait_time_data)); 1810 | #endif 1811 | #if QF_BITS_PER_SLOT == 8 || QF_BITS_PER_SLOT == 16 || QF_BITS_PER_SLOT == 32 || QF_BITS_PER_SLOT == 64 1812 | memset(qf->blocks, 0, qf->metadata->nblocks* sizeof(qfblock)); 1813 | #else 1814 | memset(qf->blocks, 0, qf->metadata->nblocks*(sizeof(qfblock) + QF_SLOTS_PER_BLOCK * 1815 | qf->metadata->bits_per_slot / 8)); 1816 | #endif 1817 | } 1818 | 1819 | int64_t qf_resize_malloc(QF *qf, uint64_t nslots) 1820 | { 1821 | QF new_qf; 1822 | if (!qf_malloc(&new_qf, nslots, qf->metadata->key_bits, 1823 | qf->metadata->value_bits, qf->metadata->hash_mode, 1824 | qf->metadata->seed)) 1825 | return -1; 1826 | if (qf->runtimedata->auto_resize) 1827 | qf_set_auto_resize(&new_qf, true); 1828 | 1829 | // copy keys from qf into new_qf 1830 | QFi qfi; 1831 | qf_iterator_from_position(qf, &qfi, 0); 1832 | int64_t ret_numkeys = 0; 1833 | do { 1834 | uint64_t key, value, count; 1835 | qfi_get_hash(&qfi, &key, &value, &count); 1836 | qfi_next(&qfi); 1837 | int ret = qf_insert(&new_qf, key, value, count, QF_NO_LOCK | QF_KEY_IS_HASH); 1838 | if (ret < 0) { 1839 | fprintf(stderr, "Failed to insert key: %ld into the new CQF.\n", key); 1840 | return ret; 1841 | } 1842 | ret_numkeys++; 1843 | } while(!qfi_end(&qfi)); 1844 | 1845 | qf_free(qf); 1846 | memcpy(qf, &new_qf, sizeof(QF)); 1847 | 1848 | return ret_numkeys; 1849 | } 1850 | 1851 | uint64_t qf_resize(QF* qf, uint64_t nslots, void* buffer, uint64_t buffer_len) 1852 | { 1853 | QF new_qf; 1854 | new_qf.runtimedata = (qfruntime *)calloc(sizeof(qfruntime), 1); 1855 | if (new_qf.runtimedata == NULL) { 1856 | perror("Couldn't allocate memory for runtime data.\n"); 1857 | exit(EXIT_FAILURE); 1858 | } 1859 | 1860 | uint64_t init_size = qf_init(&new_qf, nslots, qf->metadata->key_bits, 1861 | qf->metadata->value_bits, 1862 | qf->metadata->hash_mode, qf->metadata->seed, 1863 | buffer, buffer_len); 1864 | 1865 | if (init_size > buffer_len) 1866 | return init_size; 1867 | 1868 | if (qf->runtimedata->auto_resize) 1869 | qf_set_auto_resize(&new_qf, true); 1870 | 1871 | // copy keys from qf into new_qf 1872 | QFi qfi; 1873 | qf_iterator_from_position(qf, &qfi, 0); 1874 | do { 1875 | uint64_t key, value, count; 1876 | qfi_get_hash(&qfi, &key, &value, &count); 1877 | qfi_next(&qfi); 1878 | int ret = qf_insert(&new_qf, key, value, count, QF_NO_LOCK | QF_KEY_IS_HASH); 1879 | if (ret < 0) { 1880 | fprintf(stderr, "Failed to insert key: %ld into the new CQF.\n", key); 1881 | abort(); 1882 | } 1883 | } while(!qfi_end(&qfi)); 1884 | 1885 | qf_free(qf); 1886 | memcpy(qf, &new_qf, sizeof(QF)); 1887 | 1888 | return init_size; 1889 | } 1890 | 1891 | void qf_set_auto_resize(QF* qf, bool enabled) 1892 | { 1893 | if (enabled) 1894 | qf->runtimedata->auto_resize = 1; 1895 | else 1896 | qf->runtimedata->auto_resize = 0; 1897 | } 1898 | 1899 | int qf_insert(QF *qf, uint64_t key, uint64_t value, uint64_t count, uint8_t 1900 | flags) 1901 | { 1902 | // We fill up the CQF up to 95% load factor. 1903 | // This is a very conservative check. 1904 | if (qf_get_num_occupied_slots(qf) >= qf->metadata->nslots * 0.95) { 1905 | if (qf->runtimedata->auto_resize) { 1906 | /*fprintf(stdout, "Resizing the CQF.\n");*/ 1907 | if (qf->runtimedata->container_resize(qf, qf->metadata->nslots * 2) < 0) 1908 | { 1909 | fprintf(stderr, "Resizing the failed.\n"); 1910 | return QF_NO_SPACE; 1911 | } 1912 | } else 1913 | return QF_NO_SPACE; 1914 | } 1915 | if (count == 0) 1916 | return 0; 1917 | 1918 | if (GET_KEY_HASH(flags) != QF_KEY_IS_HASH) { 1919 | if (qf->metadata->hash_mode == QF_HASH_DEFAULT) 1920 | key = MurmurHash64A(((void *)&key), sizeof(key), 1921 | qf->metadata->seed) % qf->metadata->range; 1922 | else if (qf->metadata->hash_mode == QF_HASH_INVERTIBLE) 1923 | key = hash_64(key, BITMASK(qf->metadata->key_bits)); 1924 | } 1925 | uint64_t hash = (key << qf->metadata->value_bits) | (value & 1926 | BITMASK(qf->metadata->value_bits)); 1927 | int ret; 1928 | if (count == 1) 1929 | ret = insert1(qf, hash, flags); 1930 | else 1931 | ret = insert(qf, hash, count, flags); 1932 | 1933 | // check for fullness based on the distance from the home slot to the slot 1934 | // in which the key is inserted 1935 | if (ret == QF_NO_SPACE || ret > DISTANCE_FROM_HOME_SLOT_CUTOFF) { 1936 | float load_factor = qf_get_num_occupied_slots(qf) / 1937 | (float)qf->metadata->nslots; 1938 | fprintf(stdout, "Load factor: %lf\n", load_factor); 1939 | if (qf->runtimedata->auto_resize) { 1940 | fprintf(stdout, "Resizing the CQF.\n"); 1941 | if (qf->runtimedata->container_resize(qf, qf->metadata->nslots * 2) > 0) 1942 | { 1943 | if (ret == QF_NO_SPACE) { 1944 | if (count == 1) 1945 | ret = insert1(qf, hash, flags); 1946 | else 1947 | ret = insert(qf, hash, count, flags); 1948 | } 1949 | fprintf(stderr, "Resize finished.\n"); 1950 | } else { 1951 | fprintf(stderr, "Resize failed\n"); 1952 | ret = QF_NO_SPACE; 1953 | } 1954 | } else { 1955 | fprintf(stderr, "The CQF is filling up.\n"); 1956 | ret = QF_NO_SPACE; 1957 | } 1958 | } 1959 | return ret; 1960 | } 1961 | 1962 | int qf_set_count(QF *qf, uint64_t key, uint64_t value, uint64_t count, uint8_t 1963 | flags) 1964 | { 1965 | if (count == 0) 1966 | return 0; 1967 | 1968 | uint64_t cur_count = qf_count_key_value(qf, key, value, flags); 1969 | int64_t delta = count - cur_count; 1970 | 1971 | int ret; 1972 | if (delta == 0) 1973 | ret = 0; 1974 | else if (delta > 0) 1975 | ret = qf_insert(qf, key, value, delta, flags); 1976 | else 1977 | ret = qf_remove(qf, key, value, labs(delta), flags); 1978 | 1979 | return ret; 1980 | } 1981 | 1982 | int qf_remove(QF *qf, uint64_t key, uint64_t value, uint64_t count, uint8_t 1983 | flags) 1984 | { 1985 | if (count == 0) 1986 | return true; 1987 | 1988 | if (GET_KEY_HASH(flags) != QF_KEY_IS_HASH) { 1989 | if (qf->metadata->hash_mode == QF_HASH_DEFAULT) 1990 | key = MurmurHash64A(((void *)&key), sizeof(key), 1991 | qf->metadata->seed) % qf->metadata->range; 1992 | else if (qf->metadata->hash_mode == QF_HASH_INVERTIBLE) 1993 | key = hash_64(key, BITMASK(qf->metadata->key_bits)); 1994 | } 1995 | uint64_t hash = (key << qf->metadata->value_bits) | (value & 1996 | BITMASK(qf->metadata->value_bits)); 1997 | return _remove(qf, hash, count, flags); 1998 | } 1999 | 2000 | int qf_delete_key_value(QF *qf, uint64_t key, uint64_t value, uint8_t flags) 2001 | { 2002 | uint64_t count = qf_count_key_value(qf, key, value, flags); 2003 | if (count == 0) 2004 | return true; 2005 | 2006 | if (GET_KEY_HASH(flags) != QF_KEY_IS_HASH) { 2007 | if (qf->metadata->hash_mode == QF_HASH_DEFAULT) 2008 | key = MurmurHash64A(((void *)&key), sizeof(key), 2009 | qf->metadata->seed) % qf->metadata->range; 2010 | else if (qf->metadata->hash_mode == QF_HASH_INVERTIBLE) 2011 | key = hash_64(key, BITMASK(qf->metadata->key_bits)); 2012 | } 2013 | uint64_t hash = (key << qf->metadata->value_bits) | (value & 2014 | BITMASK(qf->metadata->value_bits)); 2015 | return _remove(qf, hash, count, flags); 2016 | } 2017 | 2018 | uint64_t qf_count_key_value(const QF *qf, uint64_t key, uint64_t value, 2019 | uint8_t flags) 2020 | { 2021 | if (GET_KEY_HASH(flags) != QF_KEY_IS_HASH) { 2022 | if (qf->metadata->hash_mode == QF_HASH_DEFAULT) 2023 | key = MurmurHash64A(((void *)&key), sizeof(key), 2024 | qf->metadata->seed) % qf->metadata->range; 2025 | else if (qf->metadata->hash_mode == QF_HASH_INVERTIBLE) 2026 | key = hash_64(key, BITMASK(qf->metadata->key_bits)); 2027 | } 2028 | uint64_t hash = (key << qf->metadata->value_bits) | (value & 2029 | BITMASK(qf->metadata->value_bits)); 2030 | uint64_t hash_remainder = hash & BITMASK(qf->metadata->bits_per_slot); 2031 | int64_t hash_bucket_index = hash >> qf->metadata->bits_per_slot; 2032 | 2033 | if (!is_occupied(qf, hash_bucket_index)) 2034 | return 0; 2035 | 2036 | int64_t runstart_index = hash_bucket_index == 0 ? 0 : run_end(qf, 2037 | hash_bucket_index-1) 2038 | + 1; 2039 | if (runstart_index < hash_bucket_index) 2040 | runstart_index = hash_bucket_index; 2041 | 2042 | /* printf("MC RUNSTART: %02lx RUNEND: %02lx\n", runstart_index, runend_index); */ 2043 | 2044 | uint64_t current_remainder, current_count, current_end; 2045 | do { 2046 | current_end = decode_counter(qf, runstart_index, ¤t_remainder, 2047 | ¤t_count); 2048 | if (current_remainder == hash_remainder) 2049 | return current_count; 2050 | runstart_index = current_end + 1; 2051 | } while (!is_runend(qf, current_end)); 2052 | 2053 | return 0; 2054 | } 2055 | 2056 | uint64_t qf_query(const QF *qf, uint64_t key, uint64_t *value, uint8_t flags) 2057 | { 2058 | if (GET_KEY_HASH(flags) != QF_KEY_IS_HASH) { 2059 | if (qf->metadata->hash_mode == QF_HASH_DEFAULT) 2060 | key = MurmurHash64A(((void *)&key), sizeof(key), 2061 | qf->metadata->seed) % qf->metadata->range; 2062 | else if (qf->metadata->hash_mode == QF_HASH_INVERTIBLE) 2063 | key = hash_64(key, BITMASK(qf->metadata->key_bits)); 2064 | } 2065 | uint64_t hash = key; 2066 | uint64_t hash_remainder = hash & BITMASK(qf->metadata->key_remainder_bits); 2067 | int64_t hash_bucket_index = hash >> qf->metadata->key_remainder_bits; 2068 | 2069 | if (!is_occupied(qf, hash_bucket_index)) 2070 | return 0; 2071 | 2072 | int64_t runstart_index = hash_bucket_index == 0 ? 0 : run_end(qf, 2073 | hash_bucket_index-1) 2074 | + 1; 2075 | if (runstart_index < hash_bucket_index) 2076 | runstart_index = hash_bucket_index; 2077 | 2078 | /* printf("MC RUNSTART: %02lx RUNEND: %02lx\n", runstart_index, runend_index); */ 2079 | 2080 | uint64_t current_remainder, current_count, current_end; 2081 | do { 2082 | current_end = decode_counter(qf, runstart_index, ¤t_remainder, 2083 | ¤t_count); 2084 | *value = current_remainder & BITMASK(qf->metadata->value_bits); 2085 | current_remainder = current_remainder >> qf->metadata->value_bits; 2086 | if (current_remainder == hash_remainder) { 2087 | return current_count; 2088 | } 2089 | runstart_index = current_end + 1; 2090 | } while (!is_runend(qf, current_end)); 2091 | 2092 | return 0; 2093 | } 2094 | 2095 | int64_t qf_get_unique_index(const QF *qf, uint64_t key, uint64_t value, 2096 | uint8_t flags) 2097 | { 2098 | if (GET_KEY_HASH(flags) != QF_KEY_IS_HASH) { 2099 | if (qf->metadata->hash_mode == QF_HASH_DEFAULT) 2100 | key = MurmurHash64A(((void *)&key), sizeof(key), 2101 | qf->metadata->seed) % qf->metadata->range; 2102 | else if (qf->metadata->hash_mode == QF_HASH_INVERTIBLE) 2103 | key = hash_64(key, BITMASK(qf->metadata->key_bits)); 2104 | } 2105 | uint64_t hash = (key << qf->metadata->value_bits) | (value & 2106 | BITMASK(qf->metadata->value_bits)); 2107 | uint64_t hash_remainder = hash & BITMASK(qf->metadata->bits_per_slot); 2108 | int64_t hash_bucket_index = hash >> qf->metadata->bits_per_slot; 2109 | 2110 | if (!is_occupied(qf, hash_bucket_index)) 2111 | return QF_DOESNT_EXIST; 2112 | 2113 | int64_t runstart_index = hash_bucket_index == 0 ? 0 : run_end(qf, 2114 | hash_bucket_index-1) 2115 | + 1; 2116 | if (runstart_index < hash_bucket_index) 2117 | runstart_index = hash_bucket_index; 2118 | 2119 | /* printf("MC RUNSTART: %02lx RUNEND: %02lx\n", runstart_index, runend_index); */ 2120 | 2121 | uint64_t current_remainder, current_count, current_end; 2122 | do { 2123 | current_end = decode_counter(qf, runstart_index, ¤t_remainder, 2124 | ¤t_count); 2125 | if (current_remainder == hash_remainder) 2126 | return runstart_index; 2127 | 2128 | runstart_index = current_end + 1; 2129 | } while (!is_runend(qf, current_end)); 2130 | 2131 | return QF_DOESNT_EXIST; 2132 | } 2133 | 2134 | enum qf_hashmode qf_get_hashmode(const QF *qf) { 2135 | return qf->metadata->hash_mode; 2136 | } 2137 | uint64_t qf_get_hash_seed(const QF *qf) { 2138 | return qf->metadata->seed; 2139 | } 2140 | __uint128_t qf_get_hash_range(const QF *qf) { 2141 | return qf->metadata->range; 2142 | } 2143 | 2144 | bool qf_is_auto_resize_enabled(const QF *qf) { 2145 | if (qf->runtimedata->auto_resize == 1) 2146 | return true; 2147 | return false; 2148 | } 2149 | uint64_t qf_get_total_size_in_bytes(const QF *qf) { 2150 | return qf->metadata->total_size_in_bytes; 2151 | } 2152 | uint64_t qf_get_nslots(const QF *qf) { 2153 | return qf->metadata->nslots; 2154 | } 2155 | uint64_t qf_get_num_occupied_slots(const QF *qf) { 2156 | pc_sync(&qf->runtimedata->pc_noccupied_slots); 2157 | return qf->metadata->noccupied_slots; 2158 | } 2159 | 2160 | uint64_t qf_get_num_key_bits(const QF *qf) { 2161 | return qf->metadata->key_bits; 2162 | } 2163 | uint64_t qf_get_num_value_bits(const QF *qf) { 2164 | return qf->metadata->value_bits; 2165 | } 2166 | uint64_t qf_get_num_key_remainder_bits(const QF *qf) { 2167 | return qf->metadata->key_remainder_bits; 2168 | } 2169 | uint64_t qf_get_bits_per_slot(const QF *qf) { 2170 | return qf->metadata->bits_per_slot; 2171 | } 2172 | 2173 | uint64_t qf_get_sum_of_counts(const QF *qf) { 2174 | pc_sync(&qf->runtimedata->pc_nelts); 2175 | return qf->metadata->nelts; 2176 | } 2177 | uint64_t qf_get_num_distinct_key_value_pairs(const QF *qf) { 2178 | pc_sync(&qf->runtimedata->pc_ndistinct_elts); 2179 | return qf->metadata->ndistinct_elts; 2180 | } 2181 | 2182 | void qf_sync_counters(const QF *qf) { 2183 | pc_sync(&qf->runtimedata->pc_ndistinct_elts); 2184 | pc_sync(&qf->runtimedata->pc_nelts); 2185 | pc_sync(&qf->runtimedata->pc_noccupied_slots); 2186 | } 2187 | 2188 | /* initialize the iterator at the run corresponding 2189 | * to the position index 2190 | */ 2191 | int64_t qf_iterator_from_position(const QF *qf, QFi *qfi, uint64_t position) 2192 | { 2193 | if (position == 0xffffffffffffffff) { 2194 | qfi->current = 0xffffffffffffffff; 2195 | qfi->qf = qf; 2196 | return QFI_INVALID; 2197 | } 2198 | assert(position < qf->metadata->nslots); 2199 | if (!is_occupied(qf, position)) { 2200 | uint64_t block_index = position; 2201 | uint64_t idx = bitselect(get_block(qf, block_index)->occupieds[0], 0); 2202 | if (idx == 64) { 2203 | while(idx == 64 && block_index < qf->metadata->nblocks) { 2204 | block_index++; 2205 | idx = bitselect(get_block(qf, block_index)->occupieds[0], 0); 2206 | } 2207 | } 2208 | position = block_index * QF_SLOTS_PER_BLOCK + idx; 2209 | } 2210 | 2211 | qfi->qf = qf; 2212 | qfi->num_clusters = 0; 2213 | qfi->run = position; 2214 | qfi->current = position == 0 ? 0 : run_end(qfi->qf, position-1) + 1; 2215 | if (qfi->current < position) 2216 | qfi->current = position; 2217 | 2218 | #ifdef LOG_CLUSTER_LENGTH 2219 | qfi->c_info = (cluster_data* )calloc(qf->metadata->nslots/32, 2220 | sizeof(cluster_data)); 2221 | if (qfi->c_info == NULL) { 2222 | perror("Couldn't allocate memory for c_info."); 2223 | exit(EXIT_FAILURE); 2224 | } 2225 | qfi->cur_start_index = position; 2226 | qfi->cur_length = 1; 2227 | #endif 2228 | 2229 | if (qfi->current >= qf->metadata->nslots) 2230 | return QFI_INVALID; 2231 | return qfi->current; 2232 | } 2233 | 2234 | int64_t qf_iterator_from_key_value(const QF *qf, QFi *qfi, uint64_t key, 2235 | uint64_t value, uint8_t flags) 2236 | { 2237 | if (key >= qf->metadata->range) { 2238 | qfi->current = 0xffffffffffffffff; 2239 | qfi->qf = qf; 2240 | return QFI_INVALID; 2241 | } 2242 | 2243 | qfi->qf = qf; 2244 | qfi->num_clusters = 0; 2245 | 2246 | if (GET_KEY_HASH(flags) != QF_KEY_IS_HASH) { 2247 | if (qf->metadata->hash_mode == QF_HASH_DEFAULT) 2248 | key = MurmurHash64A(((void *)&key), sizeof(key), 2249 | qf->metadata->seed) % qf->metadata->range; 2250 | else if (qf->metadata->hash_mode == QF_HASH_INVERTIBLE) 2251 | key = hash_64(key, BITMASK(qf->metadata->key_bits)); 2252 | } 2253 | uint64_t hash = (key << qf->metadata->value_bits) | (value & 2254 | BITMASK(qf->metadata->value_bits)); 2255 | 2256 | uint64_t hash_remainder = hash & BITMASK(qf->metadata->bits_per_slot); 2257 | uint64_t hash_bucket_index = hash >> qf->metadata->bits_per_slot; 2258 | bool flag = false; 2259 | 2260 | // If a run starts at "position" move the iterator to point it to the 2261 | // smallest key greater than or equal to "hash". 2262 | if (is_occupied(qf, hash_bucket_index)) { 2263 | uint64_t runstart_index = hash_bucket_index == 0 ? 0 : run_end(qf, 2264 | hash_bucket_index-1) 2265 | + 1; 2266 | if (runstart_index < hash_bucket_index) 2267 | runstart_index = hash_bucket_index; 2268 | uint64_t current_remainder, current_count, current_end; 2269 | do { 2270 | current_end = decode_counter(qf, runstart_index, ¤t_remainder, 2271 | ¤t_count); 2272 | if (current_remainder >= hash_remainder) { 2273 | flag = true; 2274 | break; 2275 | } 2276 | runstart_index = current_end + 1; 2277 | } while (!is_runend(qf, current_end)); 2278 | // found "hash" or smallest key greater than "hash" in this run. 2279 | if (flag) { 2280 | qfi->run = hash_bucket_index; 2281 | qfi->current = runstart_index; 2282 | } 2283 | } 2284 | // If a run doesn't start at "position" or the largest key in the run 2285 | // starting at "position" is smaller than "hash" then find the start of the 2286 | // next run. 2287 | if (!is_occupied(qf, hash_bucket_index) || !flag) { 2288 | uint64_t position = hash_bucket_index; 2289 | assert(position < qf->metadata->nslots); 2290 | uint64_t block_index = position / QF_SLOTS_PER_BLOCK; 2291 | uint64_t idx = bitselect(get_block(qf, block_index)->occupieds[0], 0); 2292 | if (idx == 64) { 2293 | while(idx == 64 && block_index < qf->metadata->nblocks) { 2294 | block_index++; 2295 | idx = bitselect(get_block(qf, block_index)->occupieds[0], 0); 2296 | } 2297 | } 2298 | position = block_index * QF_SLOTS_PER_BLOCK + idx; 2299 | qfi->run = position; 2300 | qfi->current = position == 0 ? 0 : run_end(qfi->qf, position-1) + 1; 2301 | if (qfi->current < position) 2302 | qfi->current = position; 2303 | } 2304 | 2305 | if (qfi->current >= qf->metadata->nslots) 2306 | return QFI_INVALID; 2307 | return qfi->current; 2308 | } 2309 | 2310 | static int qfi_get(const QFi *qfi, uint64_t *key, uint64_t *value, uint64_t 2311 | *count) 2312 | { 2313 | if (qfi_end(qfi)) 2314 | return QFI_INVALID; 2315 | 2316 | uint64_t current_remainder, current_count; 2317 | decode_counter(qfi->qf, qfi->current, ¤t_remainder, ¤t_count); 2318 | 2319 | *value = current_remainder & BITMASK(qfi->qf->metadata->value_bits); 2320 | current_remainder = current_remainder >> qfi->qf->metadata->value_bits; 2321 | *key = (qfi->run << qfi->qf->metadata->key_remainder_bits) | current_remainder; 2322 | *count = current_count; 2323 | 2324 | return 0; 2325 | } 2326 | 2327 | int qfi_get_key(const QFi *qfi, uint64_t *key, uint64_t *value, uint64_t 2328 | *count) 2329 | { 2330 | *key = *value = *count = 0; 2331 | int ret = qfi_get(qfi, key, value, count); 2332 | if (ret == 0) { 2333 | if (qfi->qf->metadata->hash_mode == QF_HASH_DEFAULT) { 2334 | *key = 0; *value = 0; *count = 0; 2335 | return QF_INVALID; 2336 | } else if (qfi->qf->metadata->hash_mode == QF_HASH_INVERTIBLE) 2337 | *key = hash_64i(*key, BITMASK(qfi->qf->metadata->key_bits)); 2338 | } 2339 | 2340 | return ret; 2341 | } 2342 | 2343 | int qfi_get_hash(const QFi *qfi, uint64_t *key, uint64_t *value, uint64_t 2344 | *count) 2345 | { 2346 | *key = *value = *count = 0; 2347 | return qfi_get(qfi, key, value, count); 2348 | } 2349 | 2350 | int qfi_next(QFi *qfi) 2351 | { 2352 | if (qfi_end(qfi)) 2353 | return QFI_INVALID; 2354 | else { 2355 | /* move to the end of the current counter*/ 2356 | uint64_t current_remainder, current_count; 2357 | qfi->current = decode_counter(qfi->qf, qfi->current, ¤t_remainder, 2358 | ¤t_count); 2359 | 2360 | if (!is_runend(qfi->qf, qfi->current)) { 2361 | qfi->current++; 2362 | #ifdef LOG_CLUSTER_LENGTH 2363 | qfi->cur_length++; 2364 | #endif 2365 | if (qfi_end(qfi)) 2366 | return QFI_INVALID; 2367 | return 0; 2368 | } else { 2369 | #ifdef LOG_CLUSTER_LENGTH 2370 | /* save to check if the new current is the new cluster. */ 2371 | uint64_t old_current = qfi->current; 2372 | #endif 2373 | uint64_t block_index = qfi->run / QF_SLOTS_PER_BLOCK; 2374 | uint64_t rank = bitrank(get_block(qfi->qf, block_index)->occupieds[0], 2375 | qfi->run % QF_SLOTS_PER_BLOCK); 2376 | uint64_t next_run = bitselect(get_block(qfi->qf, 2377 | block_index)->occupieds[0], 2378 | rank); 2379 | if (next_run == 64) { 2380 | rank = 0; 2381 | while (next_run == 64 && block_index < qfi->qf->metadata->nblocks) { 2382 | block_index++; 2383 | next_run = bitselect(get_block(qfi->qf, block_index)->occupieds[0], 2384 | rank); 2385 | } 2386 | } 2387 | if (block_index == qfi->qf->metadata->nblocks) { 2388 | /* set the index values to max. */ 2389 | qfi->run = qfi->current = qfi->qf->metadata->xnslots; 2390 | return QFI_INVALID; 2391 | } 2392 | qfi->run = block_index * QF_SLOTS_PER_BLOCK + next_run; 2393 | qfi->current++; 2394 | if (qfi->current < qfi->run) 2395 | qfi->current = qfi->run; 2396 | #ifdef LOG_CLUSTER_LENGTH 2397 | if (qfi->current > old_current + 1) { /* new cluster. */ 2398 | if (qfi->cur_length > 10) { 2399 | qfi->c_info[qfi->num_clusters].start_index = qfi->cur_start_index; 2400 | qfi->c_info[qfi->num_clusters].length = qfi->cur_length; 2401 | qfi->num_clusters++; 2402 | } 2403 | qfi->cur_start_index = qfi->run; 2404 | qfi->cur_length = 1; 2405 | } else { 2406 | qfi->cur_length++; 2407 | } 2408 | #endif 2409 | return 0; 2410 | } 2411 | } 2412 | } 2413 | 2414 | bool qfi_end(const QFi *qfi) 2415 | { 2416 | if (qfi->current >= qfi->qf->metadata->xnslots /*&& is_runend(qfi->qf, qfi->current)*/) 2417 | return true; 2418 | return false; 2419 | } 2420 | 2421 | /* 2422 | * Merge qfa and qfb into qfc 2423 | */ 2424 | /* 2425 | * iterate over both qf (qfa and qfb) 2426 | * simultaneously 2427 | * for each index i 2428 | * min(get_value(qfa, ia) < get_value(qfb, ib)) 2429 | * insert(min, ic) 2430 | * increment either ia or ib, whichever is minimum. 2431 | */ 2432 | void qf_merge(const QF *qfa, const QF *qfb, QF *qfc) 2433 | { 2434 | QFi qfia, qfib; 2435 | qf_iterator_from_position(qfa, &qfia, 0); 2436 | qf_iterator_from_position(qfb, &qfib, 0); 2437 | 2438 | if (qfa->metadata->hash_mode != qfc->metadata->hash_mode && 2439 | qfa->metadata->seed != qfc->metadata->seed && 2440 | qfb->metadata->hash_mode != qfc->metadata->hash_mode && 2441 | qfb->metadata->seed != qfc->metadata->seed) { 2442 | fprintf(stderr, "Output QF and input QFs do not have the same hash mode or seed.\n"); 2443 | exit(1); 2444 | } 2445 | 2446 | uint64_t keya, valuea, counta, keyb, valueb, countb; 2447 | qfi_get_hash(&qfia, &keya, &valuea, &counta); 2448 | qfi_get_hash(&qfib, &keyb, &valueb, &countb); 2449 | do { 2450 | if (keya < keyb) { 2451 | qf_insert(qfc, keya, valuea, counta, QF_NO_LOCK | QF_KEY_IS_HASH); 2452 | qfi_next(&qfia); 2453 | qfi_get_hash(&qfia, &keya, &valuea, &counta); 2454 | } 2455 | else { 2456 | qf_insert(qfc, keyb, valueb, countb, QF_NO_LOCK | QF_KEY_IS_HASH); 2457 | qfi_next(&qfib); 2458 | qfi_get_hash(&qfib, &keyb, &valueb, &countb); 2459 | } 2460 | } while(!qfi_end(&qfia) && !qfi_end(&qfib)); 2461 | 2462 | if (!qfi_end(&qfia)) { 2463 | do { 2464 | qfi_get_hash(&qfia, &keya, &valuea, &counta); 2465 | qf_insert(qfc, keya, valuea, counta, QF_NO_LOCK | QF_KEY_IS_HASH); 2466 | } while(!qfi_next(&qfia)); 2467 | } 2468 | if (!qfi_end(&qfib)) { 2469 | do { 2470 | qfi_get_hash(&qfib, &keyb, &valueb, &countb); 2471 | qf_insert(qfc, keyb, valueb, countb, QF_NO_LOCK | QF_KEY_IS_HASH); 2472 | } while(!qfi_next(&qfib)); 2473 | } 2474 | } 2475 | 2476 | /* 2477 | * Merge an array of qfs into the resultant QF 2478 | */ 2479 | void qf_multi_merge(const QF *qf_arr[], int nqf, QF *qfr) 2480 | { 2481 | int i; 2482 | QFi qfi_arr[nqf]; 2483 | int smallest_idx = 0; 2484 | uint64_t smallest_key = UINT64_MAX; 2485 | for (i=0; imetadata->hash_mode != qfr->metadata->hash_mode && 2487 | qf_arr[i]->metadata->seed != qfr->metadata->seed) { 2488 | fprintf(stderr, "Output QF and input QFs do not have the same hash mode or seed.\n"); 2489 | exit(1); 2490 | } 2491 | qf_iterator_from_position(qf_arr[i], &qfi_arr[i], 0); 2492 | } 2493 | 2494 | DEBUG_CQF("Merging %d CQFs\n", nqf); 2495 | for (i=0; i 1) { 2501 | uint64_t keys[nqf]; 2502 | uint64_t values[nqf]; 2503 | uint64_t counts[nqf]; 2504 | for (i=0; imetadata->hash_mode != qfb->metadata->hash_mode && 2554 | qfa->metadata->seed != qfb->metadata->seed) { 2555 | fprintf(stderr, "Input QFs do not have the same hash mode or seed.\n"); 2556 | exit(1); 2557 | } 2558 | 2559 | // create the iterator on the larger QF. 2560 | if (qfa->metadata->total_size_in_bytes > qfb->metadata->total_size_in_bytes) 2561 | { 2562 | qf_mem = qfb; 2563 | qf_disk = qfa; 2564 | } else { 2565 | qf_mem = qfa; 2566 | qf_disk = qfb; 2567 | } 2568 | 2569 | qf_iterator_from_position(qf_disk, &qfi, 0); 2570 | do { 2571 | uint64_t key = 0, value = 0, count = 0; 2572 | uint64_t count_mem; 2573 | qfi_get_hash(&qfi, &key, &value, &count); 2574 | if ((count_mem = qf_count_key_value(qf_mem, key, 0, QF_KEY_IS_HASH)) > 0) { 2575 | acc += count*count_mem; 2576 | } 2577 | } while (!qfi_next(&qfi)); 2578 | 2579 | return acc; 2580 | } 2581 | 2582 | /* find cosine similarity between two QFs. */ 2583 | void qf_intersect(const QF *qfa, const QF *qfb, QF *qfr) 2584 | { 2585 | QFi qfi; 2586 | const QF *qf_mem, *qf_disk; 2587 | 2588 | if (qfa->metadata->hash_mode != qfr->metadata->hash_mode && 2589 | qfa->metadata->seed != qfr->metadata->seed && 2590 | qfb->metadata->hash_mode != qfr->metadata->hash_mode && 2591 | qfb->metadata->seed != qfr->metadata->seed) { 2592 | fprintf(stderr, "Output QF and input QFs do not have the same hash mode or seed.\n"); 2593 | exit(1); 2594 | } 2595 | 2596 | // create the iterator on the larger QF. 2597 | if (qfa->metadata->total_size_in_bytes > qfb->metadata->total_size_in_bytes) 2598 | { 2599 | qf_mem = qfb; 2600 | qf_disk = qfa; 2601 | } else { 2602 | qf_mem = qfa; 2603 | qf_disk = qfb; 2604 | } 2605 | 2606 | qf_iterator_from_position(qf_disk, &qfi, 0); 2607 | do { 2608 | uint64_t key = 0, value = 0, count = 0; 2609 | qfi_get_hash(&qfi, &key, &value, &count); 2610 | if (qf_count_key_value(qf_mem, key, 0, QF_KEY_IS_HASH) > 0) 2611 | qf_insert(qfr, key, value, count, QF_NO_LOCK | QF_KEY_IS_HASH); 2612 | } while (!qfi_next(&qfi)); 2613 | } 2614 | 2615 | /* magnitude of a QF. */ 2616 | uint64_t qf_magnitude(const QF *qf) 2617 | { 2618 | return sqrt(qf_inner_product(qf, qf)); 2619 | } 2620 | 2621 | --------------------------------------------------------------------------------