├── .circleci
└── config.yml
├── .gitignore
├── .gitmodules
├── CMakeLists.txt
├── README.md
├── src
├── bbsearch.c
└── bbsearch.h
└── test
└── test_bbsearch.cpp
/.circleci/config.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | jobs:
4 | build:
5 | docker:
6 | - image: "debian:buster"
7 | steps:
8 | - checkout
9 | - run:
10 | name: Apt update
11 | command: 'apt-get update'
12 | - run:
13 | name: Install dependencies
14 | command: 'apt-get install -y gcc g++ cmake git'
15 | - run:
16 | name: "Pull Submodules"
17 | command: |
18 | git submodule init
19 | git submodule update --remote
20 | - run:
21 | name: Create make file
22 | command: 'mkdir cmake-build-release && cd cmake-build-release && cmake ..'
23 | - run:
24 | name: Building
25 | command: 'cd cmake-build-release && make'
26 | - run:
27 | name: Testing
28 | command: './cmake-build-release/test_bbsearch'
29 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | cmake-build-*
2 | .idea
3 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "extern/googletest"]
2 | path = extern/googletest
3 | url = https://github.com/google/googletest.git
4 | shallow = true
5 |
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.13)
2 | project(branchless_binary_search C CXX)
3 | set(CMAKE_C_STANDARD 11)
4 | enable_testing()
5 |
6 | #CMAKE_C_FLAGS_[DEBUG|RELEASE|RELWITHDEBINFO|MINSIZEREL]
7 | #set(CMAKE_CXX_FLAGS "-O3 -Winline")
8 |
9 | # extern
10 | add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/extern/googletest)
11 |
12 | # src
13 | add_library(bbsearch SHARED
14 | src/bbsearch.c
15 | src/bbsearch.h
16 | )
17 | target_include_directories(bbsearch PUBLIC src)
18 | target_compile_options(bbsearch PRIVATE "-O3" "-Winline")
19 |
20 | # test
21 | add_executable(test_bbsearch
22 | test/test_bbsearch.cpp
23 | )
24 | target_include_directories(test_bbsearch
25 | PUBLIC ${gtest_SOURCE_DIR}/include
26 | PUBLIC ${gtest_SOURCE_DIR}
27 | )
28 | target_link_libraries(test_bbsearch
29 | PUBLIC bbsearch
30 | PUBLIC gtest
31 | PUBLIC gtest_main
32 | )
33 | add_test(NAME test_bbsearch
34 | COMMAND test_bbsearch
35 | )
36 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Branchless Binary Search
2 | [](https://circleci.com/gh/SiftScience/sift-java)
3 |
4 | This is a proof-of-concept binary search implementation that entirely avoids branching,
5 | i.e. `if-then-else`. It accomplishes this with a mix of bit twiddling,
6 | jump tables, and access to the x86 `BSR` instruction through a compiler builtin.
7 |
8 | ## Background
9 | * [Binary search](https://en.wikipedia.org/wiki/Binary_search_algorithm)
10 | * [Branch prediction](https://en.wikipedia.org/wiki/Branch_predictor)
11 |
12 | ## Motivation
13 | Branch mispredictions can be expensive, especially on architectures with long
14 | pipelines. This approach avoids branching entirely, at the expense of code size
15 | and more overall instructions.
16 |
17 | Additionally, because the same code is run regardless of input, this approach can be
18 | extended to have SIMD/vector support, parallelizing searches across the same
19 | (or different) data.
20 |
21 | ## Implementation
22 | At a high level, the search works as follows:
23 |
24 | 1. compute `log2(length)`
25 | 2. `switch` on the result
26 | 3. Taking advantage of case fall-through, run a modified binary search update
27 | `log2(length)` times.
28 | 4. Interpret the values of the search window to get the result
29 |
30 | ### `log2(length)` call
31 | This is an optimization. The can work correctly by running `log2(sizeof(length))`
32 | (e.g. 32, 64) times.
33 |
34 | A standard binary search implicitly computes this empirically, essentially
35 | dividing `length` by 2 until it's 0. Because this implementation uses a jump
36 | table to avoid a loop condition, it needs to be known before the search.
37 |
38 | ```c
39 | unsigned int bits = 64 - __builtin_clzl(size);
40 | ```
41 |
42 | ### Jump table
43 | This is functionally the same as the loop in an iterative binary search
44 | implementation—it runs `update_search_window` `log2(length)` times.
45 |
46 | Without `bits % 64`, the compiler adds a check to make sure there's a case
47 | for `bits`.
48 |
49 | ```c
50 | switch (bits % 64) {
51 | case 63: update_search_window(array, &start, &end, key);
52 | case 62: update_search_window(array, &start, &end, key);
53 | case 61: update_search_window(array, &start, &end, key);
54 | ...
55 | ```
56 |
57 | ### Window update function
58 | For the most part, the update function looks similar to a standard binary
59 | search, but instead of returning when the result is found, the window remains
60 | the same allowing the function to be run again.
61 |
62 | The second difference is more in implementation than function. Rather than
63 | using an `if-then-else` construct to updated `start` or `end`, both are
64 | updated regardless, with bit masks used to select the next value.
65 |
66 | ```c
67 | __attribute__((always_inline)) static inline void update_search_window(
68 | const int32_t *array, ptrdiff_t *start, ptrdiff_t *end, const int32_t key) {
69 |
70 | ptrdiff_t median = (*start + *end) / 2;
71 | int32_t value = array[median];
72 |
73 | uintptr_t start_mask = -(key < value);
74 | uintptr_t end_mask = -(key > value);
75 |
76 | *start = (median & ~start_mask) | (*start & start_mask);
77 | *end = ((median & ~end_mask) | (*end & end_mask)) +
78 | (~start_mask & ~end_mask & 1);
79 | }
80 | ```
81 |
82 | ### Computing the result
83 | Like a recursive binary search's base case, the result is determined by
84 | comparing the `key` being looked up the current window. Like in the window
85 | update function, bitmasks are used to avoid branching.
86 |
87 | Unlike `bsearch()`, but like Java's `Arrays.binarySearch()`,
88 | this returns `-insertion_point - 1` when `key` isn't found.
89 |
90 | ```c
91 | int is_match_mask = -(array[start] == key);
92 | int is_after_start = -(array[start] < key);
93 | int is_before_end = -(array[end] > key);
94 |
95 | return (is_empty_mask & -1) | (is_non_empty_mask & (
96 | (is_match_mask & start) | (~is_match_mask & (
97 | (is_after_start & (-end - 1)) |
98 | (~is_after_start & (is_before_end & (-start - 1)))))));
99 | ```
100 |
--------------------------------------------------------------------------------
/src/bbsearch.c:
--------------------------------------------------------------------------------
1 | #include "bbsearch.h"
2 |
3 | static int32_t EMPTY_INT32[1] = {INT32_MAX};
4 |
5 | __attribute__((always_inline)) static inline void update_search_window(const int32_t *array, ptrdiff_t *start, ptrdiff_t *end, const int32_t key) {
6 | ptrdiff_t median = (*start + *end) / 2;
7 | int32_t value = array[median];
8 |
9 | uintptr_t start_mask = -(key < value);
10 | uintptr_t end_mask = -(key > value);
11 |
12 | *start = (median & ~start_mask) | (*start & start_mask);
13 | *end = ((median & ~end_mask) | (*end & end_mask)) +
14 | (~start_mask & ~end_mask & 1);
15 | }
16 |
17 | ptrdiff_t bbsearch(int32_t *array, size_t size, int32_t key) {
18 | unsigned int bits = 64 - __builtin_clzl(size);
19 | int is_non_empty_mask = -(size > 0);
20 | int is_empty_mask = ~is_non_empty_mask;
21 |
22 | // __builtin_clzl has undefined behavior for 0, so set bits to 0 in that case
23 | bits = (unsigned int) is_non_empty_mask & bits;
24 |
25 | // Clear most bits to make a jump table more likely
26 | // The switch below has extra cases to avoid checking if bits is handled by
27 | // a case
28 | bits = bits % 64;
29 |
30 | ptrdiff_t start = 0;
31 |
32 | // use a dummy array if size == 0
33 | array = (int32_t *) (((uintptr_t) is_empty_mask & (uintptr_t) EMPTY_INT32) |
34 | ((uintptr_t) is_non_empty_mask & (uintptr_t) array));
35 |
36 | ptrdiff_t end = is_non_empty_mask & size | is_empty_mask & 1;
37 |
38 | switch (bits) { // NOLINT(hicpp-multiway-paths-covered)
39 | case 63: update_search_window(array, &start, &end, key);
40 | case 62: update_search_window(array, &start, &end, key);
41 | case 61: update_search_window(array, &start, &end, key);
42 | case 60: update_search_window(array, &start, &end, key);
43 | case 59: update_search_window(array, &start, &end, key);
44 | case 58: update_search_window(array, &start, &end, key);
45 | case 57: update_search_window(array, &start, &end, key);
46 | case 56: update_search_window(array, &start, &end, key);
47 | case 55: update_search_window(array, &start, &end, key);
48 | case 54: update_search_window(array, &start, &end, key);
49 | case 53: update_search_window(array, &start, &end, key);
50 | case 52: update_search_window(array, &start, &end, key);
51 | case 51: update_search_window(array, &start, &end, key);
52 | case 50: update_search_window(array, &start, &end, key);
53 | case 49: update_search_window(array, &start, &end, key);
54 | case 48: update_search_window(array, &start, &end, key);
55 | case 47: update_search_window(array, &start, &end, key);
56 | case 46: update_search_window(array, &start, &end, key);
57 | case 45: update_search_window(array, &start, &end, key);
58 | case 44: update_search_window(array, &start, &end, key);
59 | case 43: update_search_window(array, &start, &end, key);
60 | case 42: update_search_window(array, &start, &end, key);
61 | case 41: update_search_window(array, &start, &end, key);
62 | case 40: update_search_window(array, &start, &end, key);
63 | case 39: update_search_window(array, &start, &end, key);
64 | case 38: update_search_window(array, &start, &end, key);
65 | case 37: update_search_window(array, &start, &end, key);
66 | case 36: update_search_window(array, &start, &end, key);
67 | case 35: update_search_window(array, &start, &end, key);
68 | case 34: update_search_window(array, &start, &end, key);
69 | case 33: update_search_window(array, &start, &end, key);
70 | case 32: update_search_window(array, &start, &end, key);
71 | case 31: update_search_window(array, &start, &end, key);
72 | case 30: update_search_window(array, &start, &end, key);
73 | case 29: update_search_window(array, &start, &end, key);
74 | case 28: update_search_window(array, &start, &end, key);
75 | case 27: update_search_window(array, &start, &end, key);
76 | case 26: update_search_window(array, &start, &end, key);
77 | case 25: update_search_window(array, &start, &end, key);
78 | case 24: update_search_window(array, &start, &end, key);
79 | case 23: update_search_window(array, &start, &end, key);
80 | case 22: update_search_window(array, &start, &end, key);
81 | case 21: update_search_window(array, &start, &end, key);
82 | case 20: update_search_window(array, &start, &end, key);
83 | case 19: update_search_window(array, &start, &end, key);
84 | case 18: update_search_window(array, &start, &end, key);
85 | case 17: update_search_window(array, &start, &end, key);
86 | case 16: update_search_window(array, &start, &end, key);
87 | case 15: update_search_window(array, &start, &end, key);
88 | case 14: update_search_window(array, &start, &end, key);
89 | case 13: update_search_window(array, &start, &end, key);
90 | case 12: update_search_window(array, &start, &end, key);
91 | case 11: update_search_window(array, &start, &end, key);
92 | case 10: update_search_window(array, &start, &end, key);
93 | case 9: update_search_window(array, &start, &end, key);
94 | case 8: update_search_window(array, &start, &end, key);
95 | case 7: update_search_window(array, &start, &end, key);
96 | case 6: update_search_window(array, &start, &end, key);
97 | case 5: update_search_window(array, &start, &end, key);
98 | case 4: update_search_window(array, &start, &end, key);
99 | case 3: update_search_window(array, &start, &end, key);
100 | case 2: update_search_window(array, &start, &end, key);
101 | case 1: update_search_window(array, &start, &end, key);
102 | case 0: {}
103 | }
104 |
105 | /* this is equivalent to the bitwise operations below
106 | if (is_empty_mask) {
107 | return -1;
108 | } else if (array[start] == key) {
109 | return start;
110 | } else if (array[start] < key) {
111 | return -end - 1;
112 | } else if (array[end] > key) {
113 | return -start - 1;
114 | } else {
115 | return 0;
116 | }
117 | */
118 | int is_match_mask = -(array[start] == key);
119 | int is_after_start = -(array[start] < key);
120 | int is_before_end = -(array[end] > key);
121 |
122 | return (is_empty_mask & -1) | (is_non_empty_mask & (
123 | (is_match_mask & start) | (~is_match_mask & (
124 | (is_after_start & (-end - 1)) |
125 | (~is_after_start & (is_before_end & (-start - 1)))))));
126 | }
127 |
--------------------------------------------------------------------------------
/src/bbsearch.h:
--------------------------------------------------------------------------------
1 | #ifndef BBSEARCH_H
2 | #define BBSEARCH_H
3 |
4 | #include
5 | #include
6 |
7 | #ifdef __cplusplus
8 | extern "C" {
9 | #endif
10 |
11 | extern ptrdiff_t bbsearch(int32_t *array, size_t size, int32_t key);
12 |
13 | #ifdef __cplusplus
14 | }
15 | #endif
16 |
17 | #endif //BBSEARCH_H
18 |
--------------------------------------------------------------------------------
/test/test_bbsearch.cpp:
--------------------------------------------------------------------------------
1 | #include "gtest/gtest.h"
2 | #include "bbsearch.h"
3 |
4 | #include
5 | #include
6 | #include
7 | #include
8 | #include
9 | #include
10 | #include
11 |
12 | static const int TOTAL_TRIALS = 20000;
13 |
14 | static int compare_int32(const void * a, const void * b) {
15 | if (*(int32_t *) a < *(int32_t *) b) {
16 | return -1;
17 | } else if (*(int32_t *) a > *(int32_t *) b) {
18 | return 1;
19 | } else {
20 | return 0;
21 | }
22 | }
23 |
24 | TEST(BranchlessBinarySearchTest, RandomSparse) {
25 | std::default_random_engine generator;
26 | std::vector array;
27 | for (int trial = 0; trial < TOTAL_TRIALS; ++trial) {
28 | array.clear();
29 |
30 | for (auto i = std::uniform_int_distribution(0, 1000)(generator); i >= 0; --i) {
31 | array.push_back(generator());
32 | }
33 |
34 | int32_t key = array[0];
35 | std::sort(array.begin(), array.end());
36 |
37 | ptrdiff_t index = bbsearch(array.data(), array.size(), key);
38 | ASSERT_EQ(key, array[index]);
39 | }
40 | }
41 |
42 | TEST(BranchlessBinarySearchTest, RandomDense) {
43 | std::default_random_engine generator;
44 | std::vector array;
45 |
46 | for (int trial = 0; trial < TOTAL_TRIALS; ++trial) {
47 | array.clear();
48 |
49 | for (auto i = std::uniform_int_distribution(0, 1000)(generator); i >= 0; --i) {
50 | array.push_back(generator() % 100);
51 | }
52 |
53 | int32_t key = array[0];
54 | std::sort(array.begin(), array.end());
55 |
56 | ptrdiff_t index = bbsearch(array.data(), array.size(), key);
57 | ASSERT_EQ(key, array[index]);
58 | }
59 | }
60 |
61 | TEST(BranchlessBinarySearchTest, Empty) {
62 | std::vector array;
63 | ptrdiff_t index = bbsearch(array.data(), array.size(), 42);
64 | ASSERT_EQ(-1, index);
65 | }
66 |
67 | TEST(BranchlessBinarySearchTest, MissingSparse) {
68 | std::default_random_engine generator;
69 | std::vector array;
70 | for (int trial = 0; trial < TOTAL_TRIALS; ++trial) {
71 | array.clear();
72 | std::set set;
73 |
74 | for (auto i = std::uniform_int_distribution(0, 1000)(generator); i >= 0; --i) {
75 | auto val = generator();
76 | set.insert(val);
77 | array.push_back(val);
78 | }
79 |
80 | std::sort(array.begin(), array.end());
81 |
82 | int32_t key;
83 | do {
84 | key = generator();
85 | } while (set.find(key) != set.end());
86 |
87 | ptrdiff_t index = bbsearch(array.data(), array.size(), key);
88 |
89 | ASSERT_LT(index, 0);
90 | ptrdiff_t insertion_index = -(index + 1);
91 | if (insertion_index > 0) {
92 | ASSERT_GE(key, array[insertion_index - 1]);
93 | }
94 | if (insertion_index < array.size()) {
95 | ASSERT_LE(key,array[insertion_index]);
96 | }
97 | }
98 | }
99 |
100 | TEST(BranchlessBinarySearchTest, MissingDense) {
101 | std::default_random_engine generator;
102 | std::vector array;
103 | for (int trial = 0; trial < TOTAL_TRIALS; ++trial) {
104 | array.clear();
105 | int32_t key = generator() % 100;
106 |
107 | for (auto i = std::uniform_int_distribution(0, 1000)(generator); i >= 0; --i) {
108 | int32_t val;
109 | while ((val = generator()) == key);
110 | array.push_back(val);
111 | }
112 |
113 | std::sort(array.begin(), array.end());
114 |
115 | ptrdiff_t index = bbsearch(array.data(), array.size(), key);
116 |
117 | ASSERT_LT(index, 0);
118 | ptrdiff_t insertion_index = -(index + 1);
119 | if (insertion_index > 0) {
120 | ASSERT_GE(key, array[insertion_index - 1]);
121 | }
122 | if (insertion_index < array.size()) {
123 | ASSERT_LE(key,array[insertion_index]);
124 | }
125 | }
126 | }
127 |
128 | TEST(BranchlessBinarySearchTest, BenchmarkSparse) {
129 | std::default_random_engine generator;
130 | std::vector array;
131 |
132 | for (int i = 0; i < 65536; ++i) {
133 | array.push_back(i);
134 | }
135 |
136 | int64_t branchless_nanos = 0;
137 | int64_t branchless_trials = 0;
138 | int64_t builtin_nanos = 0;
139 | int64_t builtin_trials = 0;
140 |
141 | int use_branchless = 0;
142 |
143 | for (int trial = 0; trial < 1000000; ++trial) {
144 | int32_t key = std::uniform_int_distribution(0, array.size() - 1)(generator);
145 |
146 | auto begin = std::chrono::high_resolution_clock::now();
147 | if (use_branchless) {
148 | bbsearch(array.data(), array.size(), key);
149 | } else {
150 | bsearch(&key, array.data(), array.size(), sizeof(array[0]), compare_int32);
151 | }
152 | auto end = std::chrono::high_resolution_clock::now();
153 |
154 | if (use_branchless) {
155 | branchless_nanos += std::chrono::duration_cast(end-begin).count();
156 | ++branchless_trials;
157 | } else {
158 | builtin_nanos += std::chrono::duration_cast(end-begin).count();
159 | ++builtin_trials;
160 | }
161 | use_branchless = !use_branchless;
162 | }
163 |
164 | printf("mean branchless time: %ldns; mean builtin time: %ldns\n",
165 | branchless_nanos / branchless_trials,
166 | builtin_nanos / builtin_trials);
167 | }
168 |
169 | int main(int argc, char **argv) {
170 | ::testing::InitGoogleTest(&argc, argv);
171 | return RUN_ALL_TESTS();
172 | }
173 |
--------------------------------------------------------------------------------