├── LICENSE ├── Makefile ├── README.md ├── bitmap.cc ├── bitmap.h ├── bitmap_bench.cc ├── popcount.h └── shared.h /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) 2013, Carnegie Mellon University 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | 15 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | CPP = gcc -E 3 | CXX = g++ 4 | CXXCPP = g++ -E 5 | 6 | # Flags passed to the C++ compiler. 7 | CXXFLAGS += -g -Wall -Wextra 8 | CXXFLAGS += -O9 -mpopcnt 9 | 10 | %.o: %.cc 11 | $(CXX) $(CPPFLAGS) $(CXXFLAGS) -c $< -o $@ 12 | 13 | .PHONY: all clean 14 | 15 | all: bitmap_bench 16 | 17 | clean: 18 | rm -f bitmap_bench *.o 19 | 20 | bitmap_bench: bitmap.o bitmap_bench.o 21 | $(CXX) $(CXXFLAGS) $(CXXFLAGS) $^ -o $@ 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | rankselect 2 | ========== 3 | 4 | Space-Efficient, High-Performance Rank & Select Structures on Uncompressed Bit Sequences -------------------------------------------------------------------------------- /bitmap.cc: -------------------------------------------------------------------------------- 1 | /** 2 | Copyright 2013 Carnegie Mellon University 3 | 4 | Authors: Dong Zhou, David G. Andersen and Michale Kaminsky 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | #include 20 | #include 21 | #include 22 | 23 | #include "bitmap.h" 24 | #include "popcount.h" 25 | #include "shared.h" 26 | 27 | BitmapPoppy::BitmapPoppy(uint64 *bits, uint64 nbits) 28 | { 29 | bits_ = bits; 30 | nbits_ = nbits; 31 | 32 | l1EntryCount_ = std::max(nbits_ >> 32, (uint64) 1); 33 | l2EntryCount_ = nbits_ >> 11; 34 | basicBlockCount_ = nbits_ / kBasicBlockSize; 35 | 36 | assert(posix_memalign((void **) &l1Entries_, kCacheLineSize, l1EntryCount_ * sizeof(uint64)) >= 0); 37 | assert(posix_memalign((void **) &l2Entries_, kCacheLineSize, l2EntryCount_ * sizeof(uint64)) >= 0); 38 | 39 | uint64 l2Id = 0; 40 | uint64 basicBlockId = 0; 41 | 42 | pCount_ = 0; 43 | memset(locCount_, 0, sizeof(locCount_)); 44 | 45 | for (uint64 i = 0; i < l1EntryCount_; i++) { 46 | l1Entries_[i] = pCount_; 47 | 48 | uint32 cum = 0; 49 | for (int k = 0; k < kL2EntryCountPerL1Entry; k++) { 50 | l2Entries_[l2Id] = cum; 51 | 52 | for (int offset = 0; offset < 30; offset += 10) { 53 | int c = popcountLinear(bits_, 54 | basicBlockId * kWordCountPerBasicBlock, 55 | kBasicBlockSize); 56 | cum += c; 57 | basicBlockId++; 58 | l2Entries_[l2Id] |= (uint64) c << (32 + offset); 59 | } 60 | cum += popcountLinear(bits_, basicBlockId * kWordCountPerBasicBlock, kBasicBlockSize); 61 | basicBlockId++; 62 | 63 | if (++l2Id >= l2EntryCount_) break; 64 | } 65 | 66 | locCount_[i] = (cum + kLocFreq - 1) / kLocFreq; 67 | pCount_ += cum; 68 | } 69 | 70 | basicBlockId = 0; 71 | 72 | for (uint64 i = 0; i < l1EntryCount_; i++) { 73 | loc_[i] = new uint32[locCount_[i]]; 74 | locCount_[i] = 0; 75 | 76 | uint32 oneCount = 0; 77 | 78 | for (uint32 k = 0; k < kBasicBlockCountPerL1Entry; k++) { 79 | uint64 woff = basicBlockId * kWordCountPerBasicBlock; 80 | for (int widx = 0; widx < kWordCountPerBasicBlock; widx++) 81 | for (int bit = 0; bit < kWordSize; bit++) 82 | if (bits_[woff + widx] & (1ULL << (63 - bit))) { 83 | oneCount++; 84 | if ((oneCount & kLocFreqMask) == 1) { 85 | loc_[i][locCount_[i]] = k * kBasicBlockSize + widx * kWordSize + bit; 86 | locCount_[i]++; 87 | } 88 | } 89 | 90 | basicBlockId++; 91 | if (basicBlockId >= basicBlockCount_) break; 92 | } 93 | } 94 | } 95 | 96 | uint64 BitmapPoppy::rank(uint64 pos) 97 | { 98 | assert(pos <= nbits_); 99 | //--pos; 100 | 101 | uint64 l1Id = pos >> 32; 102 | uint64 l2Id = pos >> 11; 103 | uint64 x = l2Entries_[l2Id]; 104 | 105 | uint64 res = l1Entries_[l1Id] + (x & 0xFFFFFFFFULL); 106 | x >>= 32; 107 | 108 | int groupId = (pos & 2047) / 512; 109 | for (int i = 0; i < groupId; i++) { 110 | res += x & 1023; 111 | x >>= 10; 112 | } 113 | res += popcountLinear(bits_, (l2Id * 4 + groupId) * kWordCountPerBasicBlock, (pos & 511)); 114 | 115 | return res; 116 | } 117 | 118 | uint64 BitmapPoppy::select(uint64 rank) 119 | { 120 | assert(rank <= pCount_); 121 | 122 | uint64 l1Id; 123 | for (l1Id = l1EntryCount_ - 1; l1Id >= 0; l1Id--) { 124 | if (l1Entries_[l1Id] < rank) { 125 | rank -= l1Entries_[l1Id]; 126 | break; 127 | } 128 | } 129 | 130 | uint32 offset = l1Id * kL2EntryCountPerL1Entry; 131 | uint32 maxL2Id = kL2EntryCountPerL1Entry; 132 | if (l1Id == l1EntryCount_ - 1) 133 | maxL2Id = l2EntryCount_ - offset; 134 | 135 | uint32 pos = loc_[l1Id][(rank - 1) / kLocFreq]; 136 | uint32 l2Id = pos >> 11; 137 | 138 | while (l2Id + 1 < maxL2Id && (l2Entries_[l2Id + 1] & 0xFFFFFFFFULL) < rank) 139 | l2Id++; 140 | rank -= l2Entries_[l2Id] & 0xFFFFFFFFULL; 141 | 142 | uint32 x = l2Entries_[l2Id] >> 32; 143 | int groupId; 144 | 145 | for (groupId = 0; groupId < 3; groupId++) { 146 | int k = x & 1023; 147 | if (rank > k) 148 | rank -= k; 149 | else 150 | break; 151 | x >>= 10; 152 | } 153 | 154 | return (l1Id << 32) + (l2Id << 11) + (groupId << 9) + select512(bits_, ((offset + l2Id) * 4 + groupId) * kWordCountPerBasicBlock, rank); 155 | } 156 | -------------------------------------------------------------------------------- /bitmap.h: -------------------------------------------------------------------------------- 1 | /** 2 | Copyright 2013 Carnegie Mellon University 3 | 4 | Authors: Dong Zhou, David G. Andersen and Michale Kaminsky 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); 7 | you may not use this file except in compliance with the License. 8 | You may obtain a copy of the License at 9 | 10 | http://www.apache.org/licenses/LICENSE-2.0 11 | 12 | Unless required by applicable law or agreed to in writing, software 13 | distributed under the License is distributed on an "AS IS" BASIS, 14 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | See the License for the specific language governing permissions and 16 | limitations under the License. 17 | */ 18 | 19 | /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -* -*/ 20 | 21 | #ifndef _BITMAP_H_ 22 | #define _BITMAP_H_ 23 | 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #define __STDC_FORMAT_MACROS 31 | #include 32 | 33 | #include "shared.h" 34 | 35 | const int kWordSize = 64; 36 | const int kBasicBlockSize = 512; 37 | const int kBasicBlockBits = 9; 38 | const int kBasicBlockMask = kBasicBlockSize - 1; 39 | const int kWordCountPerBasicBlock = kBasicBlockSize / kWordSize; 40 | 41 | class Bitmap { 42 | public: 43 | Bitmap() { pCount_ = 0; } 44 | 45 | virtual uint64 rank(uint64 pos) = 0; 46 | virtual uint64 select(uint64 rank) = 0; 47 | 48 | uint64 pCount() { return pCount_; } 49 | 50 | protected: 51 | uint64 pCount_; 52 | }; 53 | 54 | class BitmapPoppy: public Bitmap { 55 | public: 56 | BitmapPoppy(uint64* bits, uint64 nbits); 57 | ~BitmapPoppy() {} 58 | 59 | uint64 rank(uint64 pos); 60 | uint64 select(uint64 rank); 61 | 62 | private: 63 | uint64* bits_; 64 | uint64 nbits_; 65 | 66 | uint64* l2Entries_; 67 | uint64 l2EntryCount_; 68 | uint64* l1Entries_; 69 | uint64 l1EntryCount_; 70 | uint64 basicBlockCount_; 71 | 72 | uint32* loc_[1 << 16]; 73 | uint32 locCount_[1 << 16]; 74 | 75 | static const int kLocFreq = 8192; 76 | static const int kLocFreqMask = 8191; 77 | static const int kL2EntryCountPerL1Entry = 1 << 21; 78 | static const int kBasicBlockCountPerL1Entry = 1 << 23; 79 | }; 80 | 81 | #endif /* _BITMAP_H_ */ 82 | -------------------------------------------------------------------------------- /bitmap_bench.cc: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ 2 | 3 | #include 4 | #include 5 | #include 6 | #define __STDC_LIMIT_MACROS 7 | #include 8 | #include 9 | #include 10 | 11 | #include "bitmap.h" 12 | #include "shared.h" 13 | 14 | double densityL = 0.1; 15 | double densityR = 0.1; 16 | 17 | uint64 numOnesL = 0; 18 | uint64 numOnesR = 0; 19 | 20 | const int numIters = 10; 21 | const int numQueries = 1000000; 22 | uint64 queries[numQueries]; 23 | 24 | uint32 seed = 1; 25 | 26 | inline uint32 xRand() 27 | { 28 | return seed = (279470273ULL * seed) % 4294967291ULL; 29 | } 30 | 31 | inline uint64 xRand64() 32 | { 33 | return (uint64) xRand() << 32 | xRand(); 34 | } 35 | 36 | uint64* createRandomBits(uint64 nbits, uint32 thresholdL, uint32 thresholdR) 37 | { 38 | fprintf(stderr, "nbits to create: %" PRIu64 "\n", nbits); 39 | fprintf(stderr, "allocated bits: %" PRIu64 " bytes\n", nbits/8); 40 | 41 | uint64* bits = NULL; 42 | assert(posix_memalign((void **) &bits, 4096, nbits / 8) == 0); 43 | 44 | for (uint64 i = 0; i < nbits / 2; i++) { 45 | if (xRand() < thresholdL) { 46 | bits[i / 64] |= 1UL << (i % 64); 47 | ++numOnesL; 48 | } else { 49 | bits[i / 64] &= ~(1ULL << (i % 64)); 50 | } 51 | } 52 | for (uint64 i = nbits / 2; i < nbits; i++) { 53 | if (xRand() < thresholdR) { 54 | bits[i / 64] |= 1ULL << (i % 64); 55 | ++numOnesR; 56 | } else { 57 | bits[i / 64] &= ~(1ULL << (i % 64)); 58 | } 59 | } 60 | 61 | return bits; 62 | } 63 | 64 | enum benchmode { 65 | BENCH_RANK, 66 | BENCH_SELECT, 67 | }; 68 | 69 | int main(int argc, char **argv) 70 | { 71 | extern int optind; 72 | int ch; 73 | 74 | uint64 nbits; 75 | benchmode mode = BENCH_RANK; 76 | 77 | while ((ch = getopt(argc, argv, "sn:d:")) != -1) { 78 | switch (ch) { 79 | case 's': 80 | mode = BENCH_SELECT; 81 | break; 82 | case 'n': 83 | nbits = atoi(optarg); 84 | nbits = 1ULL << nbits; 85 | break; 86 | case 'd': 87 | densityL = densityR = atof(optarg); 88 | break; 89 | } 90 | } 91 | 92 | printf("benchmode: %s\n", mode == BENCH_RANK ? "rank" : "select"); 93 | 94 | uint32 thresholdL = (uint32) (UINT32_MAX * densityL); 95 | uint32 thresholdR = (uint32) (UINT32_MAX * densityR); 96 | 97 | uint64* bits = createRandomBits(nbits, thresholdL, thresholdR); 98 | BitmapPoppy* bitmap = new BitmapPoppy(bits, nbits); 99 | uint64 dummy = 0x1234567890ABCDEF; 100 | 101 | if (mode == BENCH_RANK) { 102 | for (int i = 0; i < numQueries; i++) { 103 | queries[i] = xRand64() % nbits + 1; 104 | } 105 | } else { 106 | assert(mode == BENCH_SELECT); 107 | 108 | for (int i = 0; i < numQueries / 2; i++) { 109 | queries[i] = xRand64() % numOnesL + 1; 110 | } 111 | for (int i = numQueries / 2; i < numQueries; i++) { 112 | queries[i] = xRand64() % numOnesR + 1 + numOnesL; 113 | } 114 | } 115 | 116 | struct timeval tv_start, tv_end; 117 | gettimeofday(&tv_start, NULL); 118 | 119 | if (mode == BENCH_RANK) { 120 | for (int iter = 0; iter < numIters; iter++) 121 | for (int i = 0; i < numQueries; i++) 122 | dummy ^= bitmap->rank(queries[i]); 123 | } else { 124 | assert(mode == BENCH_SELECT); 125 | 126 | for (int iter = 0; iter < numIters; iter++) 127 | for (int i = 0; i < numQueries; i++) 128 | dummy ^= bitmap->select(queries[i]); 129 | } 130 | gettimeofday(&tv_end, NULL); 131 | 132 | double elapsed_seconds = timeval_diff(&tv_start, &tv_end); 133 | printf("%" PRIu64 " ops, %.2f seconds, ns/op: %.2f\n", 134 | (uint64) numIters * numQueries, 135 | elapsed_seconds, 136 | elapsed_seconds * 1000000000 / ((uint64) numIters * numQueries)); 137 | 138 | if (dummy == 42) printf("42\n"); 139 | 140 | return 0; 141 | } 142 | -------------------------------------------------------------------------------- /popcount.h: -------------------------------------------------------------------------------- 1 | /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil -*- */ 2 | #ifndef _FASTRANK_POPCOUNT_H_ 3 | #define _FASTRANK_POPCOUNT_H_ 4 | 5 | #include 6 | #include 7 | 8 | #define L8 0x0101010101010101ULL // Every lowest 8th bit set: 00000001... 9 | #define G2 0xAAAAAAAAAAAAAAAAULL // Every highest 2nd bit: 101010... 10 | #define G4 0x3333333333333333ULL // 00110011 ... used to group the sum of 4 bits. 11 | #define G8 0x0F0F0F0F0F0F0F0FULL 12 | #define H8 0x8080808080808080ULL 13 | #define L9 0x0040201008040201ULL 14 | #define H9 (L9 << 8) 15 | #define L16 0x0001000100010001ULL 16 | #define H16 0x8000800080008000ULL 17 | 18 | #define ONES_STEP_4 ( 0x1111111111111111ULL ) 19 | #define ONES_STEP_8 ( 0x0101010101010101ULL ) 20 | #define ONES_STEP_9 ( 1ULL << 0 | 1ULL << 9 | 1ULL << 18 | 1ULL << 27 | 1ULL << 36 | 1ULL << 45 | 1ULL << 54 ) 21 | #define ONES_STEP_16 ( 1ULL << 0 | 1ULL << 16 | 1ULL << 32 | 1ULL << 48 ) 22 | #define MSBS_STEP_4 ( 0x8ULL * ONES_STEP_4 ) 23 | #define MSBS_STEP_8 ( 0x80ULL * ONES_STEP_8 ) 24 | #define MSBS_STEP_9 ( 0x100ULL * ONES_STEP_9 ) 25 | #define MSBS_STEP_16 ( 0x8000ULL * ONES_STEP_16 ) 26 | #define INCR_STEP_8 ( 0x80ULL << 56 | 0x40ULL << 48 | 0x20ULL << 40 | 0x10ULL << 32 | 0x8ULL << 24 | 0x4ULL << 16 | 0x2ULL << 8 | 0x1 ) 27 | 28 | #define ONES_STEP_32 ( 0x0000000100000001ULL ) 29 | #define MSBS_STEP_32 ( 0x8000000080000000ULL ) 30 | 31 | #define COMPARE_STEP_8(x,y) ( ( ( ( ( (x) | MSBS_STEP_8 ) - ( (y) & ~MSBS_STEP_8 ) ) ^ (x) ^ ~(y) ) & MSBS_STEP_8 ) >> 7 ) 32 | #define LEQ_STEP_8(x,y) ( ( ( ( ( (y) | MSBS_STEP_8 ) - ( (x) & ~MSBS_STEP_8 ) ) ^ (x) ^ (y) ) & MSBS_STEP_8 ) >> 7 ) 33 | 34 | #define UCOMPARE_STEP_9(x,y) ( ( ( ( ( ( (x) | MSBS_STEP_9 ) - ( (y) & ~MSBS_STEP_9 ) ) | ( x ^ y ) ) ^ ( x | ~y ) ) & MSBS_STEP_9 ) >> 8 ) 35 | #define UCOMPARE_STEP_16(x,y) ( ( ( ( ( ( (x) | MSBS_STEP_16 ) - ( (y) & ~MSBS_STEP_16 ) ) | ( x ^ y ) ) ^ ( x | ~y ) ) & MSBS_STEP_16 ) >> 15 ) 36 | #define ULEQ_STEP_9(x,y) ( ( ( ( ( ( (y) | MSBS_STEP_9 ) - ( (x) & ~MSBS_STEP_9 ) ) | ( x ^ y ) ) ^ ( x & ~y ) ) & MSBS_STEP_9 ) >> 8 ) 37 | #define ULEQ_STEP_16(x,y) ( ( ( ( ( ( (y) | MSBS_STEP_16 ) - ( (x) & ~MSBS_STEP_16 ) ) | ( x ^ y ) ) ^ ( x & ~y ) ) & MSBS_STEP_16 ) >> 15 ) 38 | #define ZCOMPARE_STEP_8(x) ( ( ( x | ( ( x | MSBS_STEP_8 ) - ONES_STEP_8 ) ) & MSBS_STEP_8 ) >> 7 ) 39 | 40 | // Population count of a 64 bit integer in SWAR (SIMD within a register) style 41 | // From Sebastiano Vigna, "Broadword Implementation of Rank/Select Queries" 42 | // http://sux.dsi.unimi.it/paper.pdf p4 43 | // This variant uses multiplication for the last summation instead of 44 | // continuing the shift/mask/addition chain. 45 | inline int suxpopcount(uint64 x) { 46 | // Step 1: 00 - 00 = 0; 01 - 00 = 01; 10 - 01 = 01; 11 - 01 = 10; 47 | x = x - ((x & G2) >> 1); 48 | // step 2: add 2 groups of 2. 49 | x = (x & G4) + ((x >> 2) & G4); 50 | // 2 groups of 4. 51 | x = (x + (x >> 4)) & G8; 52 | // Using a multiply to collect the 8 groups of 8 together. 53 | x = x * L8 >> 56; 54 | return x; 55 | } 56 | 57 | // Default to using the GCC builtin popcount. On architectures 58 | // with -march popcnt, this compiles to a single popcnt instruction. 59 | #ifndef popcount 60 | #define popcount __builtin_popcountll 61 | //#define popcount suxpopcount 62 | #endif 63 | 64 | #define popcountsize 64ULL 65 | #define popcountmask (popcountsize - 1) 66 | 67 | inline uint64 popcountLinear(uint64 *bits, uint64 x, uint64 nbits) { 68 | if (nbits == 0) { return 0; } 69 | uint64 lastword = (nbits - 1) / popcountsize; 70 | uint64 p = 0; 71 | 72 | for (int i = 0; i < lastword; i++) { /* tested; manually unrolling doesn't help, at least in C */ 73 | p += popcount(bits[x+i]); // note that use binds us to 64 bit popcount impls 74 | } 75 | 76 | // 'nbits' may or may not fall on a multiple of 64 boundary, 77 | // so we may need to zero out the right side of the last word 78 | // (accomplished by shifting it right, since we're just popcounting) 79 | uint64 lastshifted = bits[x+lastword] >> (63 - ((nbits - 1) & popcountmask)); 80 | p += popcount(lastshifted); 81 | return p; 82 | } 83 | 84 | // Return the index of the kth bit set in x 85 | inline int select64_naive(uint64 x, int k) { 86 | int count = -1; 87 | for (int i = 63; i >= 0; i--) { 88 | count++; 89 | if (x & (1ULL << i)) { 90 | k--; 91 | if (k == 0) { 92 | return count; 93 | } 94 | } 95 | } 96 | return -1; 97 | } 98 | 99 | inline int select64_popcount_search(uint64 x, int k) { 100 | int loc = -1; 101 | // if (k > popcount(x)) { return -1; } 102 | 103 | for (int testbits = 32; testbits > 0; testbits >>= 1) { 104 | int lcount = popcount(x >> testbits); 105 | if (k > lcount) { 106 | x &= ((1ULL << testbits)-1); 107 | loc += testbits; 108 | k -= lcount; 109 | } else { 110 | x >>= testbits; 111 | } 112 | } 113 | return loc+k; 114 | } 115 | 116 | inline int select64_broadword(uint64 x, int k) { 117 | uint64 word = x; 118 | int residual = k; 119 | register uint64 byte_sums; 120 | 121 | byte_sums = word - ( ( word & 0xa * ONES_STEP_4 ) >> 1 ); 122 | byte_sums = ( byte_sums & 3 * ONES_STEP_4 ) + ( ( byte_sums >> 2 ) & 3 * ONES_STEP_4 ); 123 | byte_sums = ( byte_sums + ( byte_sums >> 4 ) ) & 0x0f * ONES_STEP_8; 124 | byte_sums *= ONES_STEP_8; 125 | 126 | // Phase 2: compare each byte sum with the residual 127 | const uint64 residual_step_8 = residual * ONES_STEP_8; 128 | const int place = ( LEQ_STEP_8( byte_sums, residual_step_8 ) * ONES_STEP_8 >> 53 ) & ~0x7; 129 | 130 | // Phase 3: Locate the relevant byte and make 8 copies with incremental masks 131 | const int byte_rank = residual - ( ( ( byte_sums << 8 ) >> place ) & 0xFF ); 132 | 133 | const uint64 spread_bits = ( word >> place & 0xFF ) * ONES_STEP_8 & INCR_STEP_8; 134 | const uint64 bit_sums = ZCOMPARE_STEP_8( spread_bits ) * ONES_STEP_8; 135 | 136 | // Compute the inside-byte location and return the sum 137 | const uint64 byte_rank_step_8 = byte_rank * ONES_STEP_8; 138 | 139 | return place + ( LEQ_STEP_8( bit_sums, byte_rank_step_8 ) * ONES_STEP_8 >> 56 ); 140 | } 141 | 142 | inline int select64(uint64 x, int k) { 143 | return select64_popcount_search(x, k); 144 | } 145 | 146 | // x is the starting offset of the 512 bits; 147 | // k is the thing we're selecting for. 148 | inline int select512(uint64 *bits, int x, int k) { 149 | __asm__ __volatile__ ( 150 | "prefetchnta (%0)\n" 151 | : : "r" (&bits[x]) ); 152 | int i = 0; 153 | int pop = popcount(bits[x+i]); 154 | while (k > pop && i < 7) { 155 | k -= pop; 156 | i++; 157 | pop = popcount(bits[x+i]); 158 | } 159 | if (i == 7 && popcount(bits[x+i]) < k) { 160 | return -1; 161 | } 162 | // We're now certain that the bit we want is stored in bv[x+i] 163 | return i*64 + select64(bits[x+i], k); 164 | } 165 | 166 | // brute-force linear select 167 | // x is the starting offset of the bits in bv; 168 | // k is the thing we're selecting for (starting from bv[x]). 169 | // bvlen is the total length of bv 170 | inline uint64 selectLinear(uint64* bits, uint64 length, uint64 x, uint64 k) { 171 | if (k > (length - x) * 64) 172 | return -1; 173 | uint64 i = 0; 174 | uint64 pop = popcount(bits[x+i]); 175 | while (k > pop && i < (length - 1)) { 176 | k -= pop; 177 | i++; 178 | pop = popcount(bits[x+i]); 179 | } 180 | if ((i == length - 1) && (pop < k)) { 181 | return -1; 182 | } 183 | // We're now certain that the bit we want is stored in bits[x+i] 184 | return i*64 + select64(bits[x+i], k); 185 | } 186 | 187 | #endif /* _FASTRANK_POPCOUNT_H_ */ 188 | -------------------------------------------------------------------------------- /shared.h: -------------------------------------------------------------------------------- 1 | #ifndef _SHARED_H_ 2 | #define _SHARED_H_ 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | typedef uint16_t uint16; 10 | typedef uint32_t uint32; 11 | typedef uint64_t uint64; 12 | 13 | const int kCacheLineSize = 64; 14 | 15 | inline double 16 | timeval_diff(const struct timeval *start, const struct timeval *end) 17 | { 18 | double r = (end->tv_sec - start->tv_sec)* 1000000; 19 | 20 | if (end->tv_usec > start->tv_usec) 21 | r += (end->tv_usec - start->tv_usec); 22 | else if (end->tv_usec < start->tv_usec) 23 | r -= (start->tv_usec - end->tv_usec); 24 | 25 | return (double) r / 1000000; 26 | } 27 | 28 | #endif /* _SHARED_H_ */ 29 | 30 | --------------------------------------------------------------------------------