├── .github ├── CONTRIBUTING.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ └── main.yml ├── .gitignore ├── .package ├── LICENSE ├── README.md ├── bgen.h ├── docs ├── API.md ├── SPATIAL_BTREE.md └── assets │ ├── anim.html │ ├── hilbert.js │ ├── none.js │ ├── rects.c │ ├── rects.html │ ├── sbtree.html │ ├── spatial-animation-dark.gif │ ├── spatial-animation-light.gif │ ├── spatial-hilbert-dark.png │ ├── spatial-hilbert-light.png │ ├── spatial-none-dark.png │ ├── spatial-none-light.png │ ├── spatial-normal-dark.png │ ├── spatial-normal-light.png │ ├── spatial-zorder-dark.png │ ├── spatial-zorder-light.png │ └── zorder.js ├── examples ├── README.md ├── deque.c ├── example.c ├── iteration.c ├── map.c ├── priority_queue.c ├── queue.c ├── set.c ├── spatial.c ├── stack.c └── vector.c └── tests ├── README.md ├── bench.sh ├── bench_b.c ├── bench_s.c ├── build.sh ├── cities.h ├── cov.sh ├── curve.h ├── dist.h ├── loop_run.sh ├── points.h ├── run.sh ├── test.sh ├── test_base.h ├── test_bsearch.c ├── test_counted.c ├── test_linear.c ├── test_nested.c ├── test_spatial1.c ├── test_spatial2.c ├── test_spatial2x.c ├── test_spatial3.c ├── test_vector.c └── testutils.h /.github/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ### Contributing 2 | 3 | - **[Bugs]** If you find a bug, file an issue. Include a detailed description and steps to reproduce the problem. 4 | 5 | - **[New features]** I don't accept new features without prior discussion. If you or your company needs a specialized feature, make sure to express your willingness to fund the work and maintenance. 6 | 7 | - **[Pull requests]** Please do not open a pull request without filing an issue and/or discussing it with me beforehand. 8 | 9 | - **[Support]** My software is free and comes with no warranty. If you need priority support, contact me directly. 10 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | Please do not open a pull request without first filing an issue and/or discussing the feature directly with me. 2 | 3 | ### Please ensure you adhere to every item in this list 4 | 5 | - [ ] This PR was pre-approved by the project maintainer 6 | - [ ] I have self-reviewed the code 7 | - [ ] I have added all necessary tests 8 | 9 | ### Describe your changes 10 | 11 | Please provide detailed description of the changes. 12 | 13 | ### Issue number and link 14 | 15 | Pull request require a prior issue with discussion. 16 | Include the issue number of link here. 17 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: Vanilla C CI 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | jobs: 10 | build: 11 | 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v3 16 | - name: test 17 | run: tests/run.sh 18 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .vscode 3 | *.dSYM 4 | *.out 5 | *.profraw 6 | *.profdata 7 | *.log 8 | *.out.js 9 | *.out.worker.js 10 | *.out.wasm 11 | -------------------------------------------------------------------------------- /.package: -------------------------------------------------------------------------------- 1 | file bgen.h 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2024 Joshua J Baker 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Bgen 2 | 3 | [![api reference](https://img.shields.io/badge/api-reference-blue.svg)](docs/API.md) 4 | 5 | Bgen is a [B-tree](https://en.wikipedia.org/wiki/B-tree) generator for C. 6 | It's small & fast and includes a variety of options for creating custom 7 | in-memory btree based collections. 8 | 9 | ## Features 10 | 11 | - Compile-time generation using preprocessor templates 12 | - Type-safe generic data structure 13 | - Single-file header with no dependencies 14 | - [Namespaces](#namespaces) 15 | - Support for [custom allocators](#custom-allocators) 16 | - Callback and loop-based [iteration](#iterators) 17 | - [Copy-on-write](#copy-on-write) with O(1) cloning. 18 | - Loads of useful [toggles and options](#options) 19 | - Enable specialized btrees 20 | - [Counted B-tree](#counted-b-tree) 21 | - [Vector B-tree](#vector-b-tree) 22 | - [Spatial B-tree](#spatial-b-tree) 23 | - Supports most C compilers (C99+). Clang, gcc, tcc, etc 24 | - Webassembly support with Emscripten (emcc) 25 | - Exhaustively [tested](tests/README.md) with 100% coverage 26 | - [Very fast](#performance) 🚀 27 | 28 | ## Goals 29 | 30 | - Give C programs high performance in-memory btrees 31 | - Provide a template system for optimized code generation 32 | - Allow for sane customizations and options 33 | - Make it possible to use one btree library for a variety of collection types, 34 | such as maps, sets, stacks, queues, lists, vectors, and spatial indexes. 35 | See the [examples](examples). 36 | 37 | It's a non-goal for bgen to provide disk-based functionality or a B+tree 38 | implementation. 39 | 40 | ## Using 41 | 42 | Just drop the "bgen.h" into your project and create your btree using the 43 | C preprocessor. 44 | 45 | ## Example 1 (Insert items) 46 | 47 | Insert items into a simple btree that only stores ints. 48 | 49 | ```c 50 | #include 51 | 52 | #define BGEN_NAME bt // The namespace for the btree structure. 53 | #define BGEN_TYPE int // The data type for all items in the btree 54 | #define BGEN_LESS return a < b; // A code fragment for comparing items 55 | #include "../bgen.h" // Include "bgen.h" to generate the btree 56 | 57 | int main() { 58 | // Create an empty btree instance. 59 | struct bt *tree = 0; 60 | 61 | // Insert some items into the btree 62 | bt_insert(&tree, 3, 0, 0); 63 | bt_insert(&tree, 8, 0, 0); 64 | bt_insert(&tree, 2, 0, 0); 65 | bt_insert(&tree, 5, 0, 0); 66 | 67 | // Print items in tree 68 | struct bt_iter *iter; 69 | bt_iter_init(&tree, &iter, 0); 70 | for (bt_iter_scan(iter); bt_iter_valid(iter); bt_iter_next(iter)) { 71 | int item; 72 | bt_iter_item(iter, &item); 73 | printf("%d ", item); 74 | } 75 | printf("\n"); 76 | 77 | // Delete an item 78 | bt_delete(&tree, 3, 0, 0); 79 | 80 | // Print again 81 | for (bt_iter_scan(iter); bt_iter_valid(iter); bt_iter_next(iter)) { 82 | int item; 83 | bt_iter_item(iter, &item); 84 | printf("%d ", item); 85 | } 86 | printf("\n"); 87 | 88 | bt_iter_release(iter); 89 | bt_clear(&tree, 0); 90 | return 0; 91 | } 92 | // Output: 93 | // 2 3 5 8 94 | // 2 5 8 95 | ``` 96 | 97 | ## Example 2 (Key-value map) 98 | 99 | Create a key-value map where the key is a string and value is an int. 100 | 101 | ```c 102 | #include 103 | #include 104 | #include 105 | #include 106 | 107 | struct pair { 108 | const char *key; 109 | int value; 110 | }; 111 | 112 | #define BGEN_NAME map 113 | #define BGEN_TYPE struct pair 114 | #define BGEN_COMPARE return strcmp(a.key, b.key); 115 | #include "../bgen.h" 116 | 117 | void print_map(const char *comment, struct map **map) { 118 | printf("%s", comment); 119 | struct map_iter *iter; 120 | map_iter_init(map, &iter, 0); 121 | for (map_iter_scan(iter); map_iter_valid(iter); map_iter_next(iter)) { 122 | struct pair pair; 123 | map_iter_item(iter, &pair); 124 | printf("[%s] = %d; ", pair.key, pair.value); 125 | } 126 | map_iter_release(iter); 127 | printf("\n"); 128 | } 129 | 130 | int main() { 131 | // Create a map of three (string, int) pairs 132 | struct map *map = 0; 133 | map_insert(&map, (struct pair){"GPU", 15}, 0, 0); 134 | map_insert(&map, (struct pair){"RAM", 20}, 0, 0); 135 | map_insert(&map, (struct pair){"CPU", 10}, 0, 0); 136 | print_map("1) Initial map: ", &map); 137 | 138 | // Get an existing item 139 | struct pair item; 140 | assert(map_get(&map, (struct pair){"GPU"}, &item, 0) == map_FOUND); 141 | printf("2) Get item: [%s] = %d;\n", item.key, item.value); 142 | 143 | // Update an existing item 144 | assert(map_insert(&map, (struct pair){"CPU", 25}, 0, 0) == map_REPLACED); 145 | // Insert a new item 146 | assert(map_insert(&map, (struct pair){"SSD", 30}, 0, 0) == map_INSERTED); 147 | print_map("3) Updated map: ", &map); 148 | assert(map_insert(&map, (struct pair){"UPS"}, 0, 0) == map_INSERTED); 149 | print_map("4) Updated map: ", &map); 150 | 151 | assert(map_delete(&map, (struct pair){.key="GPU"}, 0, 0) == map_DELETED); 152 | print_map("5) After delete: ", &map); 153 | 154 | return 0; 155 | } 156 | 157 | // Output: 158 | // 1) Initial map: [CPU] = 10; [GPU] = 15; [RAM] = 20; 159 | // 2) Get item: [GPU] = 15; 160 | // 3) Updated map: [CPU] = 25; [GPU] = 15; [RAM] = 20; [SSD] = 30; 161 | // 4) Updated map: [CPU] = 25; [GPU] = 15; [RAM] = 20; [SSD] = 30; [UPS] = 0; 162 | // 5) After delete: [CPU] = 25; [RAM] = 20; [SSD] = 30; [UPS] = 0; 163 | ``` 164 | 165 | ## Example 3 (Priority queue) 166 | 167 | Create two [priority queues](https://en.wikipedia.org/wiki/Priority_queue). 168 | One ordered by the maximum value and the other by the minimum value. 169 | 170 | ```c 171 | #include 172 | #include 173 | #include 174 | #include 175 | 176 | #define BGEN_NAME max_priority_queue 177 | #define BGEN_TYPE int 178 | #define BGEN_LESS return a < b; 179 | #include "../bgen.h" 180 | 181 | #define BGEN_NAME min_priority_queue 182 | #define BGEN_TYPE int 183 | #define BGEN_LESS return b < a; 184 | #include "../bgen.h" 185 | 186 | int main() { 187 | int data[] = { 1, 8, 5, 6, 3, 4, 0, 9, 7, 2 }; 188 | int n = sizeof(data)/sizeof(int); 189 | printf("data: "); 190 | for (int i = 0; i < n; i++) { 191 | printf("%d ", data[i]); 192 | } 193 | printf("\n"); 194 | 195 | struct max_priority_queue *max_priority_queue = 0; 196 | 197 | // Fill the priority queue. 198 | for (int i = 0; i < n; i++) { 199 | max_priority_queue_insert(&max_priority_queue, data[i], 0, 0); 200 | } 201 | 202 | printf("max_priority_queue: "); 203 | while (max_priority_queue_count(&max_priority_queue, 0) > 0) { 204 | int val; 205 | max_priority_queue_pop_front(&max_priority_queue, &val, 0); 206 | printf("%d ", val); 207 | } 208 | printf("\n"); 209 | 210 | struct min_priority_queue *min_priority_queue = 0; 211 | 212 | // Fill the priority queue. 213 | for (int i = 0; i < n; i++) { 214 | min_priority_queue_insert(&min_priority_queue, data[i], 0, 0); 215 | } 216 | 217 | printf("min_priority_queue: "); 218 | while (min_priority_queue_count(&min_priority_queue, 0) > 0) { 219 | int val; 220 | min_priority_queue_pop_front(&min_priority_queue, &val, 0); 221 | printf("%d ", val); 222 | } 223 | printf("\n"); 224 | 225 | 226 | return 0; 227 | } 228 | 229 | // Output: 230 | // data: 1 8 5 6 3 4 0 9 7 2 231 | // max_priority_queue: 0 1 2 3 4 5 6 7 8 9 232 | // min_priority_queue: 9 8 7 6 5 4 3 2 1 0 233 | ``` 234 | 235 | Check out the [examples](examples) directory for more examples, and 236 | the [API reference](docs/API.md) for the full list of operations. 237 | 238 | ## Options 239 | 240 | Bgen provides a bunch of options for customizing your btree. All options are 241 | set using the C preprocessor. 242 | 243 | | Option | Description | 244 | | :--------------------------- | :---------- | 245 | | BGEN_NAME `` | The [Namespace](#namespaces) | 246 | | BGEN_TYPE `` | The btree item type | 247 | | BGEN_FANOUT `` | Set the [fanout](#fanout) (max number of children per node) | 248 | | BGEN_LESS `` | Define a "less" [comparator](#comparators). Such as "a` | Define a "compare" [comparator](#comparators). Such as "ab" | 250 | | BGEN_MAYBELESSEQUAL `` | Define a [less-equal hint](#less-equal-hint) for complex compares (advanced) | 251 | | BGEN_MALLOC `` | Define [custom malloc](#custom-allocators) function | 252 | | BGEN_FREE `` | Define [custom free](#custom-allocators) function | 253 | | BGEN_BSEARCH | Enable [binary searching](#binary-search-or-linear-search) (otherwise [linear](#binary-search-or-linear-search)) | 254 | | BGEN_COW | Enable [copy-on-write](#copy-on-write) support | 255 | | BGEN_COUNTED | Enable [counted btree](#counted-b-tree) support | 256 | | BGEN_SPATIAL | Enable [spatial btree](#spatial-b-tree) support | 257 | | BGEN_NOORDER | Disable all ordering. (btree becomes a [dynamic array](#vector-b-tree)) | 258 | | BGEN_NOATOMICS | Disable atomics for [copy-on-write](#copy-on-write) (single threaded only) | 259 | | BGEN_NOHINTS | Disable path hints ([path hints](#path-hints) are only available for [bsearch](#binary-search-or-linear-search)) | 260 | | BGEN_ITEMCOPY `` | Define operation for [internally copying items](#item-copying-and-freeing) | 261 | | BGEN_ITEMFREE `` | Define operation for [internally freeing items](#item-copying-and-freeing) | 262 | | BGEN_DIMS `` | Define the number of dimensions for [spatial btree](#spatial-b-tree) | 263 | | BGEN_ITEMRECT `` | Define a rect filling operation for [spatial btree](#spatial-b-tree) | 264 | | BGEN_RTYPE `` | Define a rect coordinate type [spatial btree](#spatial-b-tree) (default double) | 265 | | BGEN_HEADER | Generate header declaration only. See [Header and source](#header-and-source) | 266 | | BGEN_SOURCE | Generate source declaration only. See [Header and source](#header-and-source) | 267 | 268 | ## Namespaces 269 | 270 | Each bgen btree will have its own namespace using the `BGEN_NAME` define. 271 | 272 | For example, the following will create a btree using the `users` namespace. 273 | 274 | ```c 275 | #define BGEN_NAME users 276 | #define BGEN_TYPE struct user 277 | #define BGEN_LESS return a.id < b.id; 278 | #include "bgen.h" 279 | ``` 280 | 281 | This will generate all the functions and types using the `users` prefix, such as: 282 | 283 | ```c 284 | struct users; // The btree type 285 | int users_get(struct users **root, struct user key, struct user *item, void *udata); 286 | int users_insert(struct users **root, struct user item, struct user *old, void *udata); 287 | int users_delete(struct users **root, struct user key, struct user *old, void *udata); 288 | ``` 289 | 290 | Many more functions will also be generated, see the [API](docs/API.md) for a complete list. 291 | 292 | It's also possible to generate multiple btrees in the same source file. 293 | 294 | ```c 295 | #define BGEN_NAME users 296 | #define BGEN_TYPE struct user 297 | #define BGEN_LESS return a.id < b.id; 298 | #include "bgen.h" 299 | 300 | #define BGEN_NAME orders 301 | #define BGEN_TYPE struct order 302 | #define BGEN_LESS return a.id < b.id; 303 | #include "bgen.h" 304 | 305 | #define BGEN_NAME events 306 | #define BGEN_TYPE struct event 307 | #define BGEN_LESS return a.id < b.id; 308 | #include "bgen.h" 309 | ``` 310 | 311 | For the remainder of this README, and unless otherwise specified, the prefix 312 | `bt` will be used as the namespace. 313 | 314 | ## Comparators 315 | 316 | Every btree requires one comparator, which is a code fragment that compares two 317 | items, using BGEN_LESS or BGEN_COMPARE. 318 | 319 | Bgen provides three variables to the code fragment `a`, `b`, and `udata`. 320 | The `a` and `b` variables are the items that need to be compared, and `udata` is 321 | optional [user data](#the-udata-parameter) that may be provided to any bgen 322 | operation. 323 | 324 | ```c 325 | #define BGEN_LESS return a < b; /* return true or false */ 326 | #define BGEN_COMPARE return a < b ? -1 : a > b; /* return -1, 0, 1 */ 327 | ``` 328 | 329 | It's up to the developer to choose which of the two is most appropriate. 330 | But in general, BGEN_LESS is a good choice for numeric comparisons and 331 | BGEN_COMPARE may be better suited for strings and more complex keys. 332 | 333 | ## Binary search or Linear search 334 | 335 | Bgen defaults to linear searching. This means that btree operations will 336 | perform internal searches by scanning the items one-by-one. This is often very 337 | cache-efficient, providing excellent performance for [small nodes](#fanout). 338 | 339 | Optionally the BGEN_BSEARCH may be used to enable binary searches instead of 340 | linear. This may be better for large nodes or where comparing items may be slow. 341 | 342 | Note that bgen automatically enables [path hints](#path-hints) when the 343 | BGEN_BSEARCH option is provided. 344 | 345 | ## Less-equal hint 346 | 347 | The BGEN_MAYBELESSEQUAL is a code fragment option that may be provided as an 348 | optimization to speed up linear searches for complex comparisons. 349 | More specifically for tuple-like items with composite keys, where the leading 350 | field in the tuple is numeric and the other fields are indirect such as a 351 | pointer to a string. 352 | 353 | Bgen provides three variables to the code fragment `a`, `b`, and `udata`. 354 | 355 | For example, let's say you have a btree index "status_users" btree that orders 356 | on the composite key (status,name). 357 | 358 | ```c 359 | struct status_user { 360 | int status; 361 | char *name; 362 | char *desc; 363 | }; 364 | 365 | int user_compare(struct user a, struct user b) { 366 | return a.status < b.status ? -1 : a.status > b.status ? 1 : 367 | strcmp(a.name, b.name); 368 | } 369 | 370 | #define BGEN_NAME status_users 371 | #define BGEN_TYPE struct status_user 372 | #define BGEN_COMPARE return user_compare(a, b); 373 | #define BGEN_MAYBELESSEQUAL return a.status <= b.status; 374 | #include "bgen.h" 375 | ``` 376 | 377 | With the BGEN_MAYBELESSEQUAL option, the btree will perform a quick linear 378 | search on status and fallback to the slower user_compare function when needed. 379 | 380 | Note that BGEN_MAYBELESSEQUAL is only for linear searches cannot be used in 381 | combination with BGEN_BSEARCH. 382 | 383 | ## Copy-on-write 384 | 385 | Bgen provides [copy-on-write](#copy-on-write) support when BGEN_COW is provided. 386 | If enabled, the `bt_clone()` function can make an instant O(1) copy of the 387 | btree. 388 | This implementation uses atomic reference counters to monitor the shared state 389 | of each node and preforms just-in-time copies of nodes for mutable operations, 390 | such as `bt_insert()` and `bt_delete()`. 391 | 392 | The `BGEN_NOATOMIC` option may be provided to disable atomics, instead using 393 | normal integers as reference counters. This may be needed for single-threaded 394 | programs, embedded environments, or webassembly. 395 | 396 | With BGEN_COW; while all mutable operations will perform copy-on-write 397 | internally, immutable operations such as `bt_get()` will not. 398 | It is possible to force the btree to perform copy-on-write for otherwise 399 | immutable operations by using the their `_mut()` alternatives. 400 | For example, `bt_get() / bt_get_mut()` and 401 | `bt_iter_init() / bt_iter_init_mut()`. 402 | 403 | ## Fanout 404 | 405 | The fanout is the maximum number of children an internal btree node may have. 406 | Bgen allows for setting the fanout using the BGEN_FANOUT option. 407 | The default is 16. 408 | 409 | Choosing the best fanout is dependent on a number of factors such as item size, 410 | key types, and system architecture. 411 | In general, 8, 16, or 32 are typically pretty good choices. 412 | 413 | ## Custom allocators 414 | 415 | The BGEN_MALLOC and BGEN_FREE can be used to provide a custom allocator for 416 | all btree operations. By default, the built-in `malloc()` and `free()` 417 | functions from `` are used. 418 | 419 | BGEN_MALLOC provides the `size` and `udata` variables. 420 | BGEN_FREE provides the `ptr`, the original `size`, and `udata` variables. 421 | 422 | ```c 423 | #define BGEN_MALLOC return mymalloc(size); 424 | #define BGEN_FREE myfree(ptr); 425 | ``` 426 | 427 | Bgen is designed for graceful error handling when malloc fails. 428 | All mutable btree operations such as `bt_insert()` may fail when attempting to 429 | allocate memory. It's generally a good idea to check for the `bt_NOMEM` 430 | [status code](#status-codes). 431 | 432 | ## Item copying and freeing 433 | 434 | When the `bt_copy()`, `bt_clone()`, and `bt_clear()` functions are 435 | used, the btree will internally copy and free nodes. 436 | With BGEN_ITEMFREE and BGEN_ITEMCOPY, it's possible to also have the btree copy 437 | and free items. 438 | 439 | This may be needed when items have internal memory allocations, such as strings 440 | or other heap-based fields, that require isolation per btree instance and to 441 | avoid memory corruptions such as double free errors. 442 | 443 | BGEN_ITEMCOPY provides the `item`, `copy`, and `udata` variables. 444 | BGEN_ITEMFREE provides the `item` and `udata` variables. 445 | 446 | For example: 447 | 448 | ```c 449 | struct user { 450 | int id; 451 | char *name; 452 | }; 453 | 454 | bool copy_user(struct user item, struct user *copy) { 455 | copy->name = malloc(strlen(item.name)+1); 456 | if (!copy->name) { 457 | return false; 458 | } 459 | strcpy(copy->name, item.name); 460 | copy->id = item->id; 461 | return true; 462 | } 463 | 464 | void free_user(struct user item) { 465 | free(item.name); 466 | } 467 | 468 | #define BGEN_NAME users 469 | #define BGEN_TYPE struct user 470 | #define BGEN_LESS a.id < b.id 471 | #define BGEN_ITEMCOPY return copy_user(item, copy); 472 | #define BGEN_ITEMFREE free_user(item); 473 | #include "bgen.h" 474 | ``` 475 | 476 | Now when `users_clear()` is called all items will also be freed with 477 | `free_user()`, and when `users_clone()` or `users_copy()` are called items will 478 | automatically be copied with `copy_user()`. 479 | 480 | The BGEN_ITEMCOPY expects a return value of `true` or `false`, where `false` 481 | means that there was an error such as out of memory. 482 | 483 | ## Path hints 484 | 485 | Bgen uses path hints when BGEN_BSEARCH is provided. 486 | It's an automatic search optimization which causes the btree to track the 487 | search path of every operation, using that path as a hint for the next 488 | operation. 489 | 490 | It can lead to better performance for common access patterns, where subsequent 491 | operations work on items that are typically nearby each other in the btree. 492 | 493 | For more information see the 494 | [original document](https://github.com/tidwall/btree/blob/master/PATH_HINT.md). 495 | 496 | This implementation uses a thread-local variable to manage the hint. 497 | Other than providing BGEN_BSEARCH, there are no additional requirements to make 498 | this feature work. 499 | 500 | To disable path hints, provide the BGEN_NOHINTS option. 501 | 502 | ## Iterators 503 | 504 | Iteration comes in two flavors, callback and loop-based. 505 | 506 | Callback iteration requires a callback function that will be called for each 507 | item in the iteration. 508 | 509 | For example, let's say you have a `users` btree that orders users on 510 | (last,first). 511 | 512 | ```c 513 | struct user { 514 | char *last; 515 | char *first; 516 | int age; 517 | }; 518 | 519 | int user_compare(struct user a, struct user b) { 520 | int cmp = strcmp(a.last, b.last); 521 | if (cmp == 0) { 522 | cmp = strcmp(a.first, b.first); 523 | } 524 | return cmp; 525 | } 526 | 527 | bool user_iter(struct user user, void *udata) { 528 | printf("%s %s (age=%d)\n", user.first, user.last, user.age); 529 | return true; 530 | } 531 | 532 | #define BGEN_NAME users 533 | #define BGEN_TYPE struct user 534 | #define BGEN_COMPARE { return user_compare(a, b); } 535 | #include "../bgen.h" 536 | ``` 537 | 538 | Callback iterators such as `bt_scan()` and `bt_seek()` are available. 539 | 540 | ```c 541 | bt_scan(&tree, user_iter, 0); 542 | ``` 543 | 544 | Loop iteration allows for keeping the iterator from leaving the current 545 | function. It takes a little more work to set up but is sometimes easier t 546 | manage the context of operation. 547 | 548 | ```c 549 | struct users_iter *iter; 550 | users_iter_init(&users, &iter, 0); 551 | users_iter_scan(iter); 552 | while (users_iter_valid(iter)) { 553 | users_iter_item(iter, &user); 554 | printf("%s %s (age=%d)\n", user.first, user.last, user.age); 555 | users_iter_next(iter); 556 | } 557 | users_iter_release(iter); 558 | ``` 559 | 560 | It's usually not safe to modify the btree while iterating. 561 | If you need to filter data then it's best to reset the iterator after 562 | each modification. 563 | 564 | ```c 565 | struct users_iter *iter; 566 | users_iter_init(&users, &iter, 0); 567 | users_iter_scan(iter); 568 | while (users_iter_valid(iter)) { 569 | users_iter_item(iter, &user); 570 | if (user.age >= 30 && user.age < 40) { 571 | users_delete(&users, user, 0, 0); 572 | users_iter_seek(iter, user); 573 | continue; 574 | } 575 | users_iter_next(iter); 576 | } 577 | users_iter_release(iter); 578 | ``` 579 | 580 | Make sure to call `bt_iter_release()` when you are done iterating; 581 | 582 | ## Status codes 583 | 584 | Most btree operations, such as `bt_get()` and `bt_insert()` return status 585 | codes that indicate the success of the operation. All status codes are prefixed 586 | with the same namespace as specified with BGEN_NAME. 587 | 588 | | Status | Description | 589 | | :------------- | :--- | 590 | | bt_INSERTED | New item was inserted | 591 | | bt_REPLACED | Item replaced an existing item | 592 | | bt_DELETED | Item was successfully deleted | 593 | | bt_FOUND | Item was successfully found | 594 | | bt_NOTFOUND | Item was not found | 595 | | bt_OUTOFORDER | Item cannot be inserted due to out of order | 596 | | bt_FINISHED | Callback iterator returned all items | 597 | | bt_STOPPED | Callback iterator was stopped early | 598 | | bt_COPIED | Tree was copied: `bt_clone()`, `bt_copy()` | 599 | | bt_NOMEM | Out of memory error | 600 | | bt_UNSUPPORTED | Operation not supported | 601 | 602 | It's always a good idea to check the return value of mutable btree operations to 603 | ensure it doesn't return an error. 604 | 605 | ## The udata parameter 606 | 607 | All bgen functions provide an optional `udata` parameter that may be used for 608 | user-defined data. What this data is used for is up to the developer. 609 | 610 | All operations, callbacks, and code fragments (such as BGEN_COMPARE and 611 | BGEN_LESS) provide a `udata` variable that is the same as what is passed to 612 | original btree function. 613 | 614 | ## Counted B-tree 615 | 616 | A [counted btree](https://www.chiark.greenend.org.uk/~sgtatham/algorithms/cbtree.html) 617 | allows for random access and modifications with O(log n) complexity. 618 | 619 | Adding the BGEN_COUNTED option enables this feature. 620 | 621 | This is pretty nice for programs that need to make changes using an index, 622 | rather than a key. It basically allows for functions like `bt_insert_at()`, 623 | `bt_delete_at()`, and `bt_get_at()` to modify and access items at any position. 624 | 625 | But it's worth noting that the `bt_insert_at()` operation still requires that 626 | items inserted at specific positions are in the correct order. 627 | The `bt_OUTOFORDER` error will be returned otherwise. 628 | 629 | ## Vector B-tree 630 | 631 | When the BGEN_COUNTED and BGEN_NOORDER options are both provided, bgen will 632 | generate a specialized btree that allows for both random access and storing 633 | items in any order. 634 | This effectively treats the btree like a dynamic array, aka a vector. 635 | 636 | Those familiar with vectors in other languages, such a Rust and C++, may know 637 | that appending and accessing items is fast but modifying is slow. 638 | 639 | With a bgen vector all operations have the same 640 | [time complexity](https://en.wikipedia.org/wiki/Time_complexity). 641 | 642 | | Operation | Bgen | Others | 643 | | :-------- | :------- | :----------- | 644 | | push_back | O(log n) | O(1) | 645 | | pop_back | O(log n) | O(1) | 646 | | get_at | O(log n) | O(1) | 647 | | push_front | O(log n) | O(n) | 648 | | pop_front | O(log n) | O(n) | 649 | | insert_at | O(log n) | O(n) | 650 | | delete_at | O(log n) | O(n) | 651 | 652 | Here's how to create a vector that stores ints. 653 | 654 | ```c 655 | #define BGEN_NAME vector 656 | #define BGEN_TYPE int 657 | #define BGEN_COUNTED 658 | #define BGEN_NOORDER 659 | #include "../bgen.h" 660 | ``` 661 | 662 | Now `vector_insert_at()`, `vector_delete_at()`, and `vector_get_at()` can be 663 | used to modify and access items at any position, in any order. 664 | 665 | For a more detailed example, check out the [examples](examples) directory. 666 | 667 | ## Spatial B-tree 668 | 669 | A [spatial btree](docs/SPATIAL_BTREE.md) allows for working with 670 | multidimensional data. 671 | 672 | Adding the BGEN_SPATIAL option enables this feature. 673 | 674 | Additionally, the BGEN_ITEMRECT needs to be provided, which is responsible 675 | for filling the 'min' and 'max' rectangle (bounding box) for each item. 676 | This rectangle is used by the btree for efficient spatial searching. 677 | 678 | ```c 679 | void point_rect(struct point point, double min[], double max[]) { 680 | min[0] = point.x; 681 | min[1] = point.y; 682 | max[0] = point.x; 683 | max[1] = point.y; 684 | } 685 | 686 | #define BGEN_NAME spatial 687 | #define BGEN_TYPE struct point 688 | #define BGEN_SPATIAL 689 | #define BGEN_ITEMRECT point_rect(item, min, max); 690 | #define BGEN_COMPARE return point_compare(a, b); 691 | #include "../bgen.h" 692 | ``` 693 | 694 | By default, a spatial btree is two dimensions and uses `double` as the rectangle 695 | coordinate type. 696 | 697 | These can be changed using BGEN_DIMS and BGEN_RTYPE. 698 | 699 | ```c 700 | #define BGEN_DIMS 3 // use three dimensions instead of two 701 | #define BGEN_RTYPE uint32_t // use uint32_t instead of double 702 | ``` 703 | 704 | Once enabled you can use the `bt_intersects` and `bt_nearby` iterators to 705 | efficiently searching intersecting rectangles and the performing the nearest 706 | neighbors operation ([kNN](https://en.wikipedia.org/wiki/K-nearest_neighbors_algorithm)). 707 | 708 | See the [spatial.c](examples/spatial.c) example from the [examples directory](examples). 709 | 710 | ## Header and source 711 | 712 | By default, bgen generates all the code as a static unit for the current source 713 | file that includes "bgen.h". 714 | 715 | This is great if all you need to access the btree from that one file. 716 | But if you want other c source files to access those same btree functions too 717 | then you'll use the `BGEN_HEADER` and `BGEN_SOURCE` options. 718 | 719 | For example, here we'll create a "users.h" and "users.c" where one generates 720 | only the header declarations and the other generates the code. 721 | 722 | ```c 723 | // users.h 724 | #ifndef USERS_H 725 | #define USERS_H 726 | 727 | struct user { 728 | int id; 729 | char *name; 730 | }; 731 | 732 | #define BGEN_NAME users 733 | #define BGEN_TYPE struct user 734 | #define BGEN_HEADER 735 | #include "../deps/bgen.h" 736 | 737 | #endif 738 | ``` 739 | 740 | ```c 741 | // users.c 742 | #include "users.h" 743 | 744 | #define BGEN_NAME users 745 | #define BGEN_TYPE struct user 746 | #define BGEN_LESS return a.id < b.id; 747 | #define BGEN_SOURCE 748 | #include "../deps/bgen.h" 749 | ``` 750 | 751 | 752 | ## Performance 753 | 754 | The following benchmarks compare the performance of bgen to the very fast 755 | [frozenca/btree](https://github.com/frozenca/BTree) for C++ and the built-in 756 | Rust B-tree. 757 | 758 | Also compared is the bgen spatial btree vs a standard r-tree with data inserted 759 | in hilbert order. 760 | 761 | *See the [tidwall/bgen-bench](https://github.com/tidwall/bgen-bench) project 762 | for more information* 763 | 764 | ### Details 765 | 766 | - Linux, AMD Ryzen 9 5950X 16-Core processor 767 | - CC=clang-17 CFLAGS=-ljemalloc 768 | - Items are simple 4-byte ints. 769 | 770 | Benchmarking 1000000 items, 50 times, taking the average result 771 | 772 | ## Bgen B-tree 773 | 774 | ``` 775 | insert(seq) 1,000,000 ops in 0.042 secs 41.8 ns/op 23,933,327 op/sec 776 | insert(rand) 1,000,000 ops in 0.087 secs 86.7 ns/op 11,539,702 op/sec 777 | get(seq) 1,000,000 ops in 0.030 secs 30.3 ns/op 32,989,495 op/sec 778 | get(rand) 1,000,000 ops in 0.078 secs 78.0 ns/op 12,814,152 op/sec 779 | delete(seq) 1,000,000 ops in 0.018 secs 17.7 ns/op 56,342,904 op/sec 780 | delete(rand) 1,000,000 ops in 0.096 secs 96.4 ns/op 10,369,073 op/sec 781 | reinsert(rand) 1,000,000 ops in 0.082 secs 82.4 ns/op 12,138,316 op/sec 782 | push_first 1,000,000 ops in 0.009 secs 8.6 ns/op 116,842,897 op/sec 783 | push_last 1,000,000 ops in 0.010 secs 9.8 ns/op 101,998,378 op/sec 784 | pop_first 1,000,000 ops in 0.012 secs 12.3 ns/op 81,491,602 op/sec 785 | pop_last 1,000,000 ops in 0.012 secs 12.1 ns/op 82,480,762 op/sec 786 | scan 1,000,000 ops in 0.002 secs 1.5 ns/op 665,448,960 op/sec 787 | scan_desc 1,000,000 ops in 0.002 secs 1.8 ns/op 561,393,712 op/sec 788 | iter_scan 1,000,000 ops in 0.004 secs 3.6 ns/op 280,244,979 op/sec 789 | iter_scan_desc 1,000,000 ops in 0.004 secs 4.0 ns/op 248,567,689 op/sec 790 | ``` 791 | 792 | ## Rust B-tree 793 | 794 | ``` 795 | insert(seq) 1,000,000 ops in 0.049 secs 48.6 ns/op 20,574,261 op/sec 796 | insert(rand) 1,000,000 ops in 0.105 secs 105.4 ns/op 9,489,152 op/sec 797 | get(seq) 1,000,000 ops in 0.034 secs 33.7 ns/op 29,706,515 op/sec 798 | get(rand) 1,000,000 ops in 0.095 secs 94.6 ns/op 10,568,904 op/sec 799 | delete(seq) 1,000,000 ops in 0.023 secs 22.6 ns/op 44,236,754 op/sec 800 | delete(rand) 1,000,000 ops in 0.116 secs 115.8 ns/op 8,635,239 op/sec 801 | reinsert(rand) 1,000,000 ops in 0.097 secs 97.1 ns/op 10,299,834 op/sec 802 | ``` 803 | 804 | ## C++ B-tree ([frozenca/btree](https://github.com/frozenca/BTree)) 805 | 806 | ``` 807 | insert(seq) 1,000,000 ops in 0.054 secs 54.2 ns/op 18,435,446 op/sec 808 | insert(rand) 1,000,000 ops in 0.088 secs 88.0 ns/op 11,369,690 op/sec 809 | get(seq) 1,000,000 ops in 0.030 secs 29.5 ns/op 33,894,683 op/sec 810 | get(rand) 1,000,000 ops in 0.080 secs 79.5 ns/op 12,573,739 op/sec 811 | delete(seq) 1,000,000 ops in 0.023 secs 23.2 ns/op 43,042,237 op/sec 812 | delete(rand) 1,000,000 ops in 0.113 secs 113.4 ns/op 8,815,550 op/sec 813 | reinsert(rand) 1,000,000 ops in 0.101 secs 100.9 ns/op 9,909,315 op/sec 814 | ``` 815 | 816 | ## Bgen Spatial B-tree 817 | 818 | Random geospatial points in Hilbert curve order. 819 | 820 | ``` 821 | insert(seq) 1,000,000 ops in 0.056 secs 55.6 ns/op 17,982,904 op/sec 822 | insert(rand) 1,000,000 ops in 0.133 secs 132.9 ns/op 7,524,517 op/sec 823 | search-item(seq) 1,000,000 ops in 0.086 secs 85.8 ns/op 11,655,348 op/sec 824 | search-item(rand) 1,000,000 ops in 0.259 secs 258.5 ns/op 3,867,919 op/sec 825 | search-1% 1,000 ops in 0.002 secs 1580.6 ns/op 632,651 op/sec 826 | search-5% 1,000 ops in 0.017 secs 17456.8 ns/op 57,284 op/sec 827 | search-10% 1,000 ops in 0.053 secs 53262.3 ns/op 18,775 op/sec 828 | ``` 829 | 830 | ## R-tree ([tidwall/rtree.c](https://github.com/tidwall/rtree.c)) 831 | 832 | Random geospatial points inserted in Hilbert order. 833 | 834 | ``` 835 | insert(seq) 1,000,000 ops in 0.088 secs 87.7 ns/op 11,399,120 op/sec 836 | insert(rand) 1,000,000 ops in 0.162 secs 162.1 ns/op 6,169,577 op/sec 837 | search-item(seq) 1,000,000 ops in 0.095 secs 94.9 ns/op 10,536,006 op/sec 838 | search-item(rand) 1,000,000 ops in 0.312 secs 312.1 ns/op 3,204,491 op/sec 839 | search-1% 1,000 ops in 0.002 secs 1953.0 ns/op 512,023 op/sec 840 | search-5% 1,000 ops in 0.017 secs 16968.3 ns/op 58,933 op/sec 841 | search-10% 1,000 ops in 0.054 secs 53888.1 ns/op 18,556 op/sec 842 | ``` 843 | 844 | ## Contributing 845 | 846 | Read [CONTRIBUTING.md](.github/CONTRIBUTING.md), but in general please 847 | do not open a PR without talking to me first. 848 | 849 | -------------------------------------------------------------------------------- /docs/API.md: -------------------------------------------------------------------------------- 1 | ## API 2 | 3 | C API for the [Bgen B-tree generator](https://github.com/tidwall/bgen). 4 | 5 | This document provides a description of the functions and types in the bgen.h source file. 6 | 7 | It's recommended to first read the more general overview in the project [README](https://github.com/tidwall/bgen). 8 | 9 | Below is a complete list of all function generated by bgen. 10 | The "bt" namespace and "bitem" item type are used to as a placeholder. 11 | These are both configurable by the developer using BGEN_NAME and BGEN_TYPE. 12 | 13 | ```c 14 | #define BGEN_NAME bt 15 | #define BGEN_TYPE bitem 16 | ``` 17 | 18 | Every btree is provided the following functions. 19 | 20 | ### Basic operations 21 | 22 | ```c 23 | /// Get an item 24 | /// Returns bt_FOUND or bt_NOTFOUND 25 | /// Returns bt_UNSUPPORTED when BGEN_NOORDER 26 | int bt_get(struct bt **root, bitem key, bitem *item_out, void *udata); 27 | 28 | /// Insert or replace an item 29 | /// Returns bt_INSERTED, bt_REPLACED 30 | /// Returns bt_UNSUPPORTED when BGEN_NOORDER 31 | /// Returns bt_NOMEM when out of memory 32 | int bt_insert(struct bt **root, bitem item, bitem *item_out, void *udata); 33 | 34 | /// Delete an item 35 | /// Returns bt_DELETED, bt_NOTFOUND 36 | /// Returns bt_UNSUPPORTED when BGEN_NOORDER 37 | /// Returns bt_NOMEM when out of memory 38 | int bt_delete(struct bt **root, bitem key, bitem *item_out, void *udata); 39 | 40 | /// Returns true if the item exists 41 | bool bt_contains(struct bt **root, bitem key, void *udata); 42 | 43 | /// Remove all items and free all btree resources. 44 | int bt_clear(struct bt **root, void *udata); 45 | ``` 46 | 47 | ### Queues & stack 48 | 49 | ```c 50 | /// Get the first (minumum) item in the btree 51 | /// Returns bt_FOUND or bt_NOTFOUND 52 | int bt_front(struct bt **root, bitem *item_out, void *udata); 53 | 54 | /// Get the last (maxiumum) item in the btree 55 | /// Returns bt_FOUND or bt_NOTFOUND 56 | int bt_back(struct bt **root, bitem *item_out, void *udata); 57 | 58 | /// Delete the first (minimum) item from the btree 59 | /// Returns bt_DELETED, bt_NOTFOUND 60 | /// Returns bt_NOMEM when out of memory 61 | int bt_pop_front(struct bt **root, bitem *item_out, void *udata); 62 | 63 | /// Delete the last (maximum) item from the btree 64 | /// Returns bt_DELETED, bt_NOTFOUND 65 | /// Returns bt_NOMEM when out of memory 66 | int bt_pop_back(struct bt **root, bitem *item_out, void *udata); 67 | 68 | /// Insert as the first (minimum) item of the btree 69 | /// Returns bt_INSERTED 70 | /// Returns bt_OUTOFORDER when item is not the minimum 71 | /// Returns bt_NOMEM when out of memory 72 | int bt_push_front(struct bt **root, bitem item, void *udata); 73 | 74 | /// Insert as the last (maximum) item of the btree 75 | /// 76 | /// This operation is optimized for bulk-loading. 77 | /// 78 | /// Returns bt_INSERTED 79 | /// Returns bt_OUTOFORDER when item is not the maximum 80 | /// Returns bt_NOMEM when out of memory 81 | int bt_push_back(struct bt **root, bitem item, void *udata); 82 | ``` 83 | 84 | ### Counted B-tree operations 85 | 86 | The following operations are available when BGEN_COUNTED is provided to the 87 | generator. See [Counted B-tree](#counted-b-tree) for more information. Also, 88 | when BGEN_NOORDER is provided, the btree effectively becomes a 89 | [Vector B-tree](#vector-b-tree). 90 | 91 | ```c 92 | /// Insert an item at index 93 | /// 94 | /// Unless BGEN_NOORDER is begin used, it's an error to attempt to insert an 95 | /// item that is out of order at the specified index. 96 | /// 97 | /// Returns bt_INSERTED 98 | /// Returns bt_OUTOFORDER when item is out of order for the index 99 | /// Returns bt_NOTFOUND when index is > btree count 100 | /// Returns bt_NOMEM when out of memory 101 | int bt_insert_at(struct bt **root, size_t index, int item, void *udata); 102 | 103 | /// Replace an item at index 104 | /// 105 | /// Unless BGEN_NOORDER is begin used, it's an error to attempt to replace an 106 | /// item with another that is out of order at the specified index. 107 | /// 108 | /// Returns bt_REPLACED 109 | /// Returns bt_OUTOFORDER when item is out of order for the index 110 | /// Returns bt_NOTFOUND when index is >= btree count 111 | /// Returns bt_NOMEM when out of memory 112 | int bt_replace_at(struct bt **root, size_t index, int item, int *item_out, void *udata); 113 | 114 | /// Delete an item at index 115 | /// Returns bt_DELETED 116 | /// Returns bt_NOTFOUND when index is >= btree count 117 | /// Returns bt_NOMEM when out of memory 118 | int bt_delete_at(struct bt **root, size_t index, int *item_out, void *udata); 119 | 120 | /// Get item at index 121 | /// Returns bt_FOUND or bt_NOTFOUND 122 | int bt_get_at(struct bt **root, size_t index, int *item_out, void *udata); 123 | 124 | /// Get the index for a key 125 | /// Returns bt_FOUND or bt_NOTFOUND 126 | /// Returns bt_UNSUPPORTED when BGEN_NOORDER 127 | int bt_index_of(struct bt **root, int key, size_t *index, void *udata); 128 | 129 | /// Returns the number of items in btree 130 | size_t bt_count(struct bt **root, void *udata); 131 | 132 | /// Seek to an position in the btree and iterate over each subsequent item. 133 | /// 134 | /// Each item is returned in the "iter" callback. 135 | /// Returning "false" from "iter" will stop the iteration. 136 | /// 137 | /// Returns bt_STOPPED or bt_FINISHED 138 | int bt_seek_at(struct bt **root, size_t index, 139 | bool(*iter)(bitem item, void *udata), void *udata); 140 | 141 | /// Seek to an position in the btree and iterate over each subsequent item, but 142 | /// in reverse order. 143 | /// 144 | /// Each item is returned in the "iter" callback. 145 | /// Returning "false" from "iter" will stop the iteration. 146 | /// 147 | /// Returns bt_STOPPED or bt_FINISHED 148 | int bt_seek_at_desc(struct bt **root, size_t index, 149 | bool(*iter)(bitem item, void *udata), void *udata); 150 | ``` 151 | 152 | ### Spatial B-tree operations 153 | 154 | The following operations are available when BGEN_SPATIAL is provided to the 155 | generator. See [Spatial B-tree](#spatial-b-tree) for more information. 156 | 157 | ```c 158 | /// Search the btree for items that intersect the provided rectangle and 159 | /// iterator over each item 160 | /// 161 | /// Each intersecting item is returned in the "iter" callback. 162 | /// Returning "false" from "iter" will stop the iteration. 163 | /// 164 | /// Returns bt_STOPPED or bt_FINISHED 165 | int bt_intersects(struct bt **root, double min[], double max[], 166 | bool(*iter)(bitem item, void *udata), void *udata); 167 | 168 | /// Performs a kNN operation on the btree 169 | /// 170 | /// It's expected that the caller provides their own the `dist` function, 171 | /// which is used to calculate a distance to rectangles and data. 172 | /// The "iter" callback will return all items from the minimum distance to 173 | /// maximum distance. 174 | /// 175 | /// Each item is returned to the "iter" callback. 176 | /// Returning "false" from "iter" will stop the iteration. 177 | /// 178 | /// Returns bt_STOPPED, bt_FINISHED 179 | /// Returns bt_NOMEM when out of memory 180 | /// 181 | /// There's an example showing how to use this with geospatial data included 182 | /// with the project repository. 183 | /// See https://github.com/tidwall/bgen/main/examples 184 | int bt_nearby(struct bt **root, void *target, 185 | double(*dist)(double min[], double max[], void *target, void *udata), 186 | bool(*iter)(bitem item, void *udata), void *udata); 187 | 188 | /// Get the minimum bounding rectangle of the btree 189 | /// 190 | /// This fills the "min" and "max" params. It's important that min/max have 191 | /// enough room to store the coordinates for all dimensions. 192 | void bt_rect(struct bt **root, double min[], double max[], void *udata); 193 | ``` 194 | 195 | ### Copying and cloning 196 | 197 | ```c 198 | /// Copy a btree 199 | /// This creates duplicate of the btree (deep copy). 200 | /// Returns bt_COPIED 201 | /// Returns bt_NOMEM when out of memory 202 | int bt_copy(struct bt **root, struct bt **newroot, void *udata); 203 | 204 | /// Copy a btree using copy-on-write 205 | /// This operation creates an instant snapshot of the btree and requires 206 | /// the BGEN_COW option. 207 | /// Returns bt_COPIED 208 | /// Returns bt_NOMEM when out of memory 209 | int bt_clone(struct bt **root, struct bt **newroot, void *udata); 210 | ``` 211 | 212 | ### Callback iteration 213 | 214 | ```c 215 | /// Iterate over every item in the btree. 216 | /// 217 | /// Each item is returned in the "iter" callback. 218 | /// Returning "false" from "iter" will stop the iteration. 219 | /// 220 | /// Returns bt_STOPPED or bt_FINISHED 221 | int bt_scan(struct bt **root, bool(*iter)(bitem item, void *udata), void *udata); 222 | 223 | /// Iterate over every item in the btree, but in reverse order 224 | /// 225 | /// Each item is returned in the "iter" callback. 226 | /// Returning "false" from "iter" will stop the iteration. 227 | /// 228 | /// Returns bt_STOPPED or bt_FINISHED 229 | int bt_scan_desc(struct bt **root, bool(*iter)(bitem item, void *udata), void *udata); 230 | 231 | /// Seek to a key in the btree and iterate over each subsequent item. 232 | /// 233 | /// Each item is returned in the "iter" callback. 234 | /// Returning "false" from "iter" will stop the iteration. 235 | /// 236 | /// Returns bt_STOPPED or bt_FINISHED 237 | int bt_seek(struct bt **root, bitem key, bool(*iter)(bitem item, void *udata), void *udata); 238 | 239 | /// Seek to a key in the btree and iterate over each subsequent item, but in 240 | /// reverse order. 241 | /// 242 | /// Each item is returned in the "iter" callback. 243 | /// Returning "false" from "iter" will stop the iteration. 244 | /// 245 | /// Returns bt_STOPPED or bt_FINISHED 246 | int bt_seek_desc(struct bt **root, bitem key, bool(*iter)(bitem item, void *udata), void *udata); 247 | 248 | /// Counted B-tree iterators. See their descriptions above. 249 | int bt_seek_at(struct bt **root, size_t index, 250 | bool(*iter)(bitem item, void *udata), void *udata); 251 | int bt_seek_at_desc(struct bt **root, size_t index, 252 | bool(*iter)(bitem item, void *udata), void *udata); 253 | 254 | /// Spatial B-tree iterators. See their descriptions above. 255 | int bt_intersects(struct bt **root, double min[], double max[], 256 | bool(*iter)(bitem item, void *udata), void *udata); 257 | int bt_nearby(struct bt **root, void *target, 258 | double(*dist)(double min[], double max[], void *target, void *udata), 259 | bool(*iter)(bitem item, void *udata), void *udata); 260 | ``` 261 | 262 | ### Loop iteration 263 | 264 | ```c 265 | /// Initialize an iterator 266 | /// Make sure to call bt_iter_release() when done iterating. 267 | void bt_iter_init(struct bt **root, struct bt_iter **iter, void *udata); 268 | 269 | /// Release the iterator when it's no longer needed 270 | void bt_iter_release(struct bt_iter *iter); 271 | 272 | /// Returns an error status code of the iterator, or zero if no error. 273 | int bt_iter_status(struct bt_iter *iter); 274 | 275 | /// Returns true if the iterator is valid and an item, using bt_iter_item() is 276 | /// available. 277 | bool bt_iter_valid(struct bt_iter *iter); 278 | 279 | /// Get the current iterator item. 280 | /// REQUIRED: iter_valid() and item != NULL 281 | void bt_iter_item(struct bt_iter *iter, bitem *item); 282 | 283 | /// Move to the next item 284 | /// REQUIRED: iter_valid() 285 | void bt_iter_next(struct bt_iter *iter); 286 | 287 | /// Seek to a key in the btree and iterate over each subsequent item. 288 | void bt_iter_seek(struct bt_iter *iter, bitem key); 289 | 290 | /// Seek to a key in the btree iterates over each subsequent item in reverse 291 | /// order. 292 | void bt_iter_seek_desc(struct bt_iter *iter, bitem key); 293 | 294 | /// Iterates over every item in the btree. 295 | void bt_iter_scan(struct bt_iter *iter); 296 | 297 | /// Iterates over every item in the btree in reverse order. 298 | void bt_iter_scan_desc(struct bt_iter *iter); 299 | 300 | /// Search the btree for items that intersect the provided rectangle and 301 | /// iterator over each item. 302 | void bt_iter_intersects(struct bt_iter *iter, double min[], double max[]); 303 | 304 | /// Performs a kNN operation on the btree 305 | /// 306 | /// It's expected that the caller provides their own the `dist` function, 307 | /// which is used to calculate a distance to rectangles and data. 308 | /// 309 | /// This operation will allocate memory, so make sure to check the iter_status() 310 | /// when done. And, *always* use iter_release(). 311 | /// 312 | /// There's an example showing how to use this with geospatial data included 313 | /// with the project repository. 314 | /// See https://github.com/tidwall/bgen/main/examples 315 | void bt_iter_nearby(struct bt_iter *iter, void *target, 316 | double(*dist)(double min[], double max[double], void *target, void *udata)); 317 | 318 | /// Seek to an position in the btree and iterate over each subsequent item. 319 | void bt_iter_seek_at(struct bt_iter *iter, size_t index); 320 | 321 | /// Seek to an position in the btree and iterate over each subsequent item, but 322 | /// in reverse order. 323 | void bt_iter_seek_at_desc(struct bt_iter *iter, size_t index); 324 | ``` 325 | 326 | See the iteration example in the [examples](examples) directory for usage. 327 | 328 | ### Utilties 329 | 330 | ```c 331 | /// Compares two item 332 | /// Returns -1 - "a" is less than "b" 333 | /// Returns 0 - "a" and "b" are equal 334 | /// Returns +1 - "a" is greater than "b" 335 | int bt_compare(bitem a, bitem b, void *udata); 336 | 337 | /// Returns true if "a" is less than "b" 338 | bool bt_less(bitem a, bitem b, void *udata); 339 | 340 | /// Returns the height of the btree 341 | size_t bt_height(struct bt **root, void *udata); 342 | 343 | /// Returns true if the btree is "sane" 344 | /// This operation should always return true. 345 | bool bt_sane(struct bt **root, void *udata); 346 | ``` 347 | 348 | ### General info 349 | 350 | ```c 351 | /// Returns the maximum number of items in a node. 352 | int bt_feat_maxitems(); 353 | 354 | /// Returns the minimum number of items in a node. 355 | int bt_feat_minitems(); 356 | 357 | /// Returns the maximum height of btree. 358 | int bt_feat_maxheight(); 359 | 360 | /// Returns the max number of children. 361 | int bt_feat_fanout(); 362 | 363 | /// Returns true if the btree is a Counted B-tree. 364 | bool bt_feat_counted(); 365 | 366 | /// Returns true if the btree is a Spatial B-tree. 367 | bool bt_feat_spatial(); 368 | 369 | /// Returns the number of dimensions for Spatial B-tree. 370 | int bt_feat_dims(); 371 | 372 | /// Returns true if the btree is ordered. 373 | bool bt_feat_ordered(); 374 | 375 | /// Returns true if copy-on-write is enabled. 376 | bool bt_feat_cow(); 377 | 378 | /// Returns true if atomic reference counters are enabled. 379 | bool bt_feat_atomics(); 380 | 381 | /// Returns true if binary-searching is enabled. 382 | bool bt_feat_bsearch(); 383 | 384 | /// Returns true if path hints are enabled. 385 | bool bt_feat_pathhint(); 386 | ``` 387 | 388 | ### Mutable read operations 389 | 390 | The following operations are available when BGEN_COW is used. 391 | See [Copy-on-write](#copy-on-write) for more information. 392 | 393 | ```c 394 | int bt_get_mut( ... ); 395 | int bt_get_at_mut( ... ); 396 | int bt_front_mut( ... ); 397 | int bt_back_mut( ... ); 398 | void bt_iter_init_mut( ... ); 399 | int bt_scan_mut( ... ); 400 | int bt_scan_desc_mut( ... ); 401 | int bt_seek_mut( ... ); 402 | int bt_seek_desc_mut( ... ); 403 | int bt_intersects_mut( ... ); 404 | int bt_nearby_mut( ... ); 405 | int bt_seek_at_mut( ... ); 406 | int bt_seek_at_desc_mut( ... ); 407 | ``` 408 | -------------------------------------------------------------------------------- /docs/SPATIAL_BTREE.md: -------------------------------------------------------------------------------- 1 | # Spatial B-trees 2 | 3 | The B-tree is an awesome 4 | data structure. Its primary purpose is to maintain an 5 | ordered list of items, where each operation (insert, delete, search) is 6 | guaranteed a time complexity of 7 | O(log n). 8 | 9 | This document describes a new way to extend an existing B-tree for the use of 10 | multidimensional data. There's currently a whole bunch of existing structures 11 | that can be used for spatial data, including the 12 | [R-tree](https://en.wikipedia.org/wiki/R-tree), 13 | [K-d tree](https://en.wikipedia.org/wiki/K-d_tree), 14 | [Quadtree](https://en.wikipedia.org/wiki/Quadtree), and 15 | [UB-tree](https://en.wikipedia.org/wiki/UB-tree"). 16 | 17 | This one is a different. 18 | 19 | ## The Algorithm 20 | 21 | A standard B-tree is an ordered tree-based data structure that stores its items 22 | in nodes. 23 | 24 | The B-tree has a single root node, which may have children nodes, and those 25 | children nodes may also have children nodes. 26 | 27 | 28 | 29 | 30 | Spatial B-tree Normal 31 | 32 | 33 | The only change is that we'll now store the bounding box (MBR) information for 34 | each child node. 35 | This bounding box will be expanded to include the entire child node tree along 36 | with the current branch level item at the same index as the child. 37 | 38 | 39 | 40 | 41 | Spatial B-tree Animation 42 | 43 | 44 | ## Operations 45 | 46 | - Insert: Same algorithm as the orginal B-tree. Except now 47 | each bounding box, from leaf to root, will be expanded to make room for 48 | the new item. 49 | - Delete: Also the same as the original, with adjustments from leaf to root. 50 | - Spatial search: Works like an R-tree, where you scan each rectangle 51 | (bounding box) and take intersecting children. 52 | 53 | ## Key Order 54 | 55 | The Spatial B-tree leaves the order of the items up to you. 56 | This means the quality and performance of searching the tree can vary greatly 57 | by the how this ordering is managed. 58 | 59 | For example, let's say you want to store geospatial points where each point 60 | is a tuple that contains at least three fields (id,lat,lon). In a standard 61 | B-tree you could order on 'id', but this may lead to subpar performance because 62 | the 'id' may not necessarily correspond with the (lat,lon). This will 63 | potentially leave points that are spatially far apart, nearby in the B-tree; 64 | and points that are spatially close, far apart in the B-tree. 65 | 66 | Ideally, for best performance you would use a space-filling curve algorithm, 67 | such as [Hilbert curve](https://en.wikipedia.org/wiki/Hilbert_curve) 68 | or [Z-order curve](https://en.wikipedia.org/wiki/Z-order_curve), to 69 | produce a curve value that would be stored along with the 'id'. So your tuple 70 | will look more like (curve,id,lat,lon), where the Spatial B-tree orders on 71 | (curve,id). 72 | 73 | Below is a visualization of different ordering strategies using a dataset 74 | of [10k cities](../tests/cities.h). 75 | 76 | ### No ordering 77 | 78 | Not great 79 | 80 | 81 | 82 | 83 | Spatial B-tree Normal 84 | 85 | 86 | ### Z-order 87 | 88 | Better 89 | 90 | 91 | 92 | 93 | Spatial B-tree Normal 94 | 95 | 96 | ### Hilbert 97 | 98 | Best 🚀 99 | 100 | 101 | 102 | 103 | Spatial B-tree Normal 104 | 105 | 106 | 107 | ## Comparison 108 | 109 | Structurally the Spatial B-tree is like the 110 | [Counted B-tree](https://www.chiark.greenend.org.uk/~sgtatham/algorithms/cbtree.html) 111 | but is functionally more similar to the 112 | [R-tree](https://en.wikipedia.org/wiki/R-tree) and the 113 | [UB-tree](https://en.wikipedia.org/wiki/UB-tree). 114 | 115 | ### R-tree 116 | 117 | Like the R-tree each child rectangle is the minimum bounding 118 | rectangle of the entire child tree. 119 | 120 | A difference is that the R-tree stores all items at the leaf level, just 121 | like a B+Tree. While the Spatial B-tree stores items in the branches and leaves, 122 | just like a standard B-tree. 123 | 124 | Another difference is that during insertion the R-tree and it's variants, such 125 | as the R*tree, go to great lengths to determine the best ordering of the branch 126 | rectangles and items. Whenever a new item is inserted into an R-tree, from root 127 | to leaf, a complicated algorithm is used to choose the best child node to insert 128 | the item into. Depending the quality of that algorithm, which isn't always 129 | identical with every implementations, the performance of inserting and 130 | searching can vary greatly. 131 | 132 | The Spatial B-tree on the other hand inserts items exactly like a standard 133 | B-tree, by ordering on the item's key. As [stated above](#key-order), 134 | this means that you must choose your keys wisely. 135 | 136 | One R-tree variant worth noting is the 137 | [Hilbert R-tree](https://en.wikipedia.org/wiki/Hilbert_R-tree), which 138 | stores items in linear order using a Hilbert curve. This provides excellent 139 | search performance compared to other R-trees, and its ordering of items is very 140 | similar to a Spatial B-tree using a Hilbert curve in its key. But the 141 | structure is a bit more complicated that a traditional R-tree, it must 142 | track both LHVs (Largest Hilbert Value) and MBRs (Minimum Bounding Rectangle) 143 | for leaves and branches. This leads to extra work to maintain. And insertions 144 | and deletions are generally less efficient than a Spatial B-tree. 145 | 146 | ### UB-tree 147 | The Spatial B-tree and UB-tree both store items linearly based on the key. 148 | 149 | The UB-tree stores all items in the leaves (just like the R-tree), while the 150 | Spatial B-tree stores items in branches and leaves, like a standard B-tree. 151 | 152 | Another difference is that the UB-tree is designed to order on a Z-order curve, 153 | while the Spatial B-tree doesn't care, leaving it up to you what the ordering 154 | is. This opens up the Spatial B-tree to different strategies, such as Z-order 155 | or Hilbert or something else. 156 | 157 | Also the UB-tree does not store the MBRs (Minimum Bounding Rectangle) and 158 | thus cannot scan the tree for intersections like an R-tree and Spatial B-tree. 159 | Instead it needs to use an algorithm which basically looks 160 | at ranges of the Z-curve to find nearby nodes that overlap a target area. 161 | Effectively working kind of like the 162 | [Geohash covers](https://www.ibm.com/docs/en/db2/11.5?topic=concepts-geohashes-geohash-covers) 163 | algorithm. 164 | 165 | In general the Spatial B-tree is designed to search like an R-tree but have the 166 | simplicity of a standard B-tree. 167 | 168 | One more thing, the Spatial B-tree and UB-tree guarantee stable ordering 169 | of items, meaning that no matter what the order of inserts and deletes for a 170 | specific set of items might be, those items will always be returned in the same 171 | order when searching. R-tree ordering is unstable. This may be an important 172 | detail if you desire deterministic results. 173 | 174 | ## Performance 175 | 176 | The Spatial B-tree is as fast as a standard B-tree for inserts and deletes, which 177 | generally beats the R-tree. And is as fast as a Hilbert R-tree for searches when 178 | using hilbert curves. 179 | 180 | Much depends on the quality of the implementation when measuring the performance of 181 | these kinds of data structures. 182 | 183 | Here are some [benchmark results](https://github.com/tidwall/bgen#performance) comparing the Spatial B-tree to an 184 | R-tree with hilbert ordered inserts. 185 | And here's a fast C library for calculating a hilbert curve. [tidwall/curve](https://github.com/tidwall/curve). 186 | 187 | ## Implementation 188 | 189 | You can use the Spatial B-tree today using the [bgen: B-tree generator for C](https://github.com/tidwall/bgen). 190 | -------------------------------------------------------------------------------- /docs/assets/anim.html: -------------------------------------------------------------------------------- 1 | 2 | 7 | 8 | 9 | 10 | 13 | 14 | 15 | 16 | 548 | 549 | -------------------------------------------------------------------------------- /docs/assets/rects.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include "../../tests/cities.h" 6 | #include "../../tests/curve.h" 7 | 8 | // #define ZORDER 9 | 10 | const char *order = "none"; 11 | 12 | void city_fillrect(struct city_entry city, double min[], double max[]) { 13 | min[0] = city.lon; 14 | min[1] = city.lat; 15 | max[0] = city.lon; 16 | max[1] = city.lat; 17 | } 18 | 19 | int city_compare(struct city_entry a, struct city_entry b) { 20 | double window[] = { -180, -90, 180, 90 }; 21 | uint32_t ac = 0; 22 | uint32_t bc = 0; 23 | if (strcmp(order, "hilbert") == 0) { 24 | ac = curve_hilbert(a.lon, a.lat, window); 25 | bc = curve_hilbert(b.lon, b.lat, window); 26 | } else if (strcmp(order, "zorder") == 0) { 27 | ac = curve_z(a.lon, a.lat, window); 28 | bc = curve_z(b.lon, b.lat, window); 29 | } 30 | return ac < bc ? -1 : ac > bc ? 1 : 31 | a.id < b.id ? -1 : a.id > b.id; 32 | } 33 | 34 | #define BGEN_NAME cities 35 | #define BGEN_TYPE struct city_entry 36 | #define BGEN_FANOUT 16 37 | #define BGEN_SPATIAL 38 | #define BGEN_ITEMRECT city_fillrect(item, min, max); 39 | #define BGEN_COMPARE return city_compare(a, b); 40 | #include "../../bgen.h" 41 | 42 | void print_rects(struct cities *node, int depth) { 43 | if (node->isleaf) { 44 | for (int i = 0; i < node->len; i++) { 45 | printf(" {\"depth\":%d,\"rect\":[%f,%f,%f,%f]},\n", depth, 46 | node->items[i].lon, node->items[i].lat, node->items[i].lon, 47 | node->items[i].lat); 48 | } 49 | } else { 50 | for (int i = 0; i <= node->len; i++) { 51 | double xmin = node->rects[i].min[0]; 52 | double ymin = node->rects[i].min[1]; 53 | double xmax = node->rects[i].max[0]; 54 | double ymax = node->rects[i].max[1]; 55 | printf(" {\"depth\":%d,\"rect\":[%f,%f,%f,%f]},\n", depth, xmin, ymin, 56 | xmax, ymax); 57 | print_rects(node->children[i], depth+1); 58 | if (i < node->len) { 59 | printf(" {\"depth\":%d,\"rect\":[%f,%f,%f,%f]},\n", depth, 60 | node->items[i].lon, node->items[i].lat, node->items[i].lon, 61 | node->items[i].lat); 62 | } 63 | } 64 | } 65 | } 66 | 67 | int main(int nargs, char *args[]) { 68 | if (nargs > 1) { 69 | order = args[1]; 70 | } 71 | struct cities *cities = 0; 72 | for (int i = 0; i < NCITIES; i++) { 73 | assert(cities_insert(&cities, all_cities[i], 0, 0) == cities_INSERTED); 74 | } 75 | printf("var _rects = [\n"); 76 | print_rects(cities, 0); 77 | printf("]\n"); 78 | 79 | return 0; 80 | } 81 | -------------------------------------------------------------------------------- /docs/assets/rects.html: -------------------------------------------------------------------------------- 1 | 2 | 7 | 8 | 9 | 10 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 143 | 144 | 145 | 146 | -------------------------------------------------------------------------------- /docs/assets/sbtree.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Spatial B-trees 6 | 22 | 23 | 24 |
25 |

Spatial B-trees

26 |

27 |

Introduction

28 | 29 |

30 | The B-tree is an awesome 31 | data structure. 32 | 33 | Its primary purpose is to maintain an 34 | ordered list of items, where each operation (insert, delete, search) is 35 | guaranteed a time complexity of 36 | O(log n). 37 | The B-tree pretty ubiquitous and a foundational structure in many of the most popular 38 | database software, such as Postgres, Sqlite, Mysql, etc. 39 |

40 | 41 |

42 | This document describes a new way to extend an existing B-tree for the use of 43 | multidimensional data. 44 |

45 | 46 |

47 | Currently there's a bunch of existing structures that can be used for spatial 48 | data, such as the 49 | R-tree, 50 | K-d tree, 51 | Quadtree, and 52 | UB-tree. 53 |

54 | 55 |

56 | This one is a different. 57 |

58 | 59 |

The Algorithm

60 | 61 |

62 | A standard B-tree is an ordered tree-based data structure that stores its items 63 | in nodes. 64 |

65 |

66 | The B-tree has a single root node, which may have children nodes, and those 67 | children nodes may also have children nodes. 68 |

69 |
 70 |                              +-------------+
 71 |                              | • 10 • 18 • |
 72 |                              +-|----|----|-+
 73 |                                |    |    |
 74 |             +------------------+    |    +---------------------+
 75 |             |                       |                          |
 76 |             v                       v                          v
 77 |       +-----------+             +--------+              +-------------+
 78 |       | • 3 . 6 • |             | • 14 • |              | • 20 • 22 • |
 79 |       +-|---|---|-+             +-|----|-+              +-|----|----|-+
 80 |         |   |   |                 |    |                  |    |    |  
 81 |    +----+  ++   +---+          +--+    +----+         +---+  +-+    +--+
 82 |    |       |        |          |            |         |      |         |
 83 |    v       v        v          v            v         v      v         v
 84 | +-----+ +-----+ +-------+ +----------+ +----------+ +----+ +----+ +----------+
 85 | | 1 2 | | 4 5 | | 7 8 9 | | 11 12 13 | | 15 16 17 | | 19 | | 21 | | 23 24 25 |
 86 | +-----+ +-----+ +-------+ +----------+ +----------+ +----+ +----+ +----------+
 87 | 
88 | 89 |

90 | The only change is that we will now store bounding box information for each 91 | child. This bounding box will be expanded to include all children nodes and 92 | the current branch level item. 93 |

94 | 95 |

96 | For example, here we use a blue box to represent the branch level boxes. 97 |

98 | 99 |
100 |                              +-------------+
101 |                              | • 10 • 18  |
102 |                              +-|----|----|-+
103 |                                |    |    |
104 |             +------------------+    |    +---------------------+
105 |             |                       |                          |
106 |             v                       v                          v
107 |       +-----------+             +--------+              +-------------+
108 |       | • 3 • 6  |             | • 14  |              | • 20 • 22  |
109 |       +-|---|---|-+             +-|----|-+              +-|----|----|-+
110 |         |   |   |                 |    |                  |    |    |  
111 |    +----+  ++   +---+          +--+    +----+         +---+  +-+    +--+
112 |    |       |        |          |            |         |      |         |
113 |    v       v        v          v            v         v      v         v
114 | +-----+ +-----+ +-------+ +----------+ +----------+ +----+ +----+ +----------+
115 | | 1 2 | | 4 5 | | 7 8 9 | | 11 12 13 | | 15 16 17 | | 19 | | 21 | | 23 24 25 |
116 | +-----+ +-----+ +-------+ +----------+ +----------+ +----+ +----+ +----------+
117 | 
118 | 119 |

Operations

120 |
    121 |
  • Insert: Same algorithm as the orginal B-tree. Except now 122 | each bounding box, from leaf to root, will be expanded to make room for 123 | the new item. 124 |
  • Delete: Also the same as the original, with adjustments from leaf to root. 125 |
  • Spatial search: This works like an R-tree, where you scan each retangle 126 | (bounding box) and take intersecting children. 127 |
128 | 129 | 130 |

Key Order

131 | 132 |

133 | The Spatial B-tree leaves the order of the items up to you. 134 | This means the quality and performance of searching the tree can vary greatly 135 | by the how this ordering is managed. 136 |

137 | 138 |

139 | For example, let's say you want to store geospatial points where each points 140 | is a tuple that contains at least three fields (id,lat,lon). Normally you 141 | would order on 'id', but this may lead to subpar performance because the 'id' 142 | may not necessarily correspond with the (lat,lon). This will potentially leave 143 | points that are spatally far apart, nearby in the B-tree; and points that are 144 | spatially close, far apart in the B-tree. 145 |

146 | 147 |

148 | Ideally, for best performance you would use a space-filling curve algorithm, 149 | such as Hilbert curve 150 | or Z-order curve, to 151 | produce a curve value that would be stored along with the 'id'. So your tuple 152 | will look more like (curve,id,lat,lon), where the Spatial B-tree orders on 153 | (curve,id). 154 |

155 | 156 | 157 | 158 | 159 | 160 | 161 |

Comparison

162 | 163 |

164 | Structurally the Spatial B-tree is like the 165 | Counted B-tree, 166 | but functionally it similar to the 167 | R-tree and the 168 | UB-tree. 169 |

170 | 171 |

R-tree

172 |

173 | It's like an R-tree, each child rectangle is the minimum bounding 174 | rectangle of the entire child tree. 175 |

176 | 177 |

178 | A difference is that the R-tree stores all items at the leaf level, just 179 | like a B+Tree. While the Spatial B-tree stores items in the branches and leaves, 180 | just like a standard B-tree. 181 |

182 | 183 |

184 | Another difference is that during insertion the R-tree and it's variants, such 185 | as the R*tree, go to great lengths to determine the best ordering of the branch 186 | rectangles and items. Whenever a new item is inserted into an R-tree, from root 187 | to leaf, a complicated algorithm is used to choose the best child node to insert 188 | the item into. Depending the quality of that algorithm, which isn't always 189 | identical with every implementations, the performance of inserting and 190 | searching can vary greatly. 191 |

192 | 193 |

194 | The Spatial B-tree on the otherhand inserts items exactly like a standard 195 | B-tree, by ordering on the item's key. As stated above, 196 | this means that you must choose your keys wisely. 197 |

198 | 199 |

200 | One R-tree variant worth noting is the 201 | Hilbert R-tree, which 202 | stores items in linear order using a Hilbert curve. This provides excellent 203 | search performance compared to other R-trees, and its ordering of items is very 204 | similar to a Spatial B-tree using a Hilbert curve in its key. But the 205 | structure is a bit more complicated that a traditional R-tree, it must 206 | track both LHVs (Largest Hilbert Value) and MBRs (Minimum Bounding Rectangle) 207 | for leaves and branches. This leads to extra work to maintain. And insertions 208 | and deletions are generally less efficent than a Spatial B-tree. 209 |

210 | 211 | 212 |

UB-tree

213 |

214 | It's similar to a UB-tree because both the Spatial B-tree and UB-tree both 215 | will store the items linearly based on the key. 216 |

217 | 218 |

219 | Just like the R-tree, the UB-tree stores all items in the leaves, while the 220 | Spatial B-tree stores items in branches and leaves, like a standard B-tree. 221 |

222 | 223 |

224 | Another difference is that the UB-tree is designed to order on a Z-order curve, 225 | while the Spatial B-tree doesn't care, leaving it up to you what the ordering 226 | is. This opens up the Spatial B-tree to different strategies, such as Z-order 227 | or Hilbert or something else. 228 |

229 | 230 |

231 | Also the UB-tree does not store the MBRs (Minimum Bounding Rectangle) and 232 | thus cannot scan the tree for intersections like an R-tree and Spatial B-tree. 233 | Instead it needs to use an algorithm, called GetNextZ, which basically looks 234 | at ranges of the Z-curve to find nearby nodes that overlap a target area. 235 | Effectivally working like something between a 236 | Quadtree search and a 237 | Geohash covers 238 | algorithm. 239 |

240 | 241 |

242 | In general the Spatial B-tree is designed to search like an Hilbert R-tree 243 | but have the simplicity of a standard B-tree. 244 |

245 | 246 |

247 | One more thing, the Spatial B-tree and UB-tree guarantee stable ordering 248 | of items. This means that no matter what the order of inserts and deletes of a 249 | set of items is, those items will always be returned in the same order when 250 | searching. An R-tree ordering is unstable. This may be an important detail 251 | if you desire deterministic results. 252 |

253 | 254 |

Implementation

255 | 256 |

257 | You can use the Spatial B-tree today using the 258 | Bgen B-tree Generator for C. 259 |

260 | 261 |
262 | 263 |

264 | (comments to josh@tile38.com)
265 | (thanks to Simon Tatham for his 266 | Counted B-tree, which inspired this structure)
267 | (last modified on Wed Oct 30 18:13:22 2024) 268 |

269 | 270 | 271 | 272 | -------------------------------------------------------------------------------- /docs/assets/spatial-animation-dark.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidwall/bgen/eac5dcdc33495fec5e5b6520336c9293a2d360bb/docs/assets/spatial-animation-dark.gif -------------------------------------------------------------------------------- /docs/assets/spatial-animation-light.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidwall/bgen/eac5dcdc33495fec5e5b6520336c9293a2d360bb/docs/assets/spatial-animation-light.gif -------------------------------------------------------------------------------- /docs/assets/spatial-hilbert-dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidwall/bgen/eac5dcdc33495fec5e5b6520336c9293a2d360bb/docs/assets/spatial-hilbert-dark.png -------------------------------------------------------------------------------- /docs/assets/spatial-hilbert-light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidwall/bgen/eac5dcdc33495fec5e5b6520336c9293a2d360bb/docs/assets/spatial-hilbert-light.png -------------------------------------------------------------------------------- /docs/assets/spatial-none-dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidwall/bgen/eac5dcdc33495fec5e5b6520336c9293a2d360bb/docs/assets/spatial-none-dark.png -------------------------------------------------------------------------------- /docs/assets/spatial-none-light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidwall/bgen/eac5dcdc33495fec5e5b6520336c9293a2d360bb/docs/assets/spatial-none-light.png -------------------------------------------------------------------------------- /docs/assets/spatial-normal-dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidwall/bgen/eac5dcdc33495fec5e5b6520336c9293a2d360bb/docs/assets/spatial-normal-dark.png -------------------------------------------------------------------------------- /docs/assets/spatial-normal-light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidwall/bgen/eac5dcdc33495fec5e5b6520336c9293a2d360bb/docs/assets/spatial-normal-light.png -------------------------------------------------------------------------------- /docs/assets/spatial-zorder-dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidwall/bgen/eac5dcdc33495fec5e5b6520336c9293a2d360bb/docs/assets/spatial-zorder-dark.png -------------------------------------------------------------------------------- /docs/assets/spatial-zorder-light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tidwall/bgen/eac5dcdc33495fec5e5b6520336c9293a2d360bb/docs/assets/spatial-zorder-light.png -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | ## Bgen Examples 2 | 3 | This directory contains various example of using [bgen](https://github.com/tidwall/bgen), a B-tree generator for C. 4 | 5 | The following examples show how a bgen btree can be used for various collection 6 | types. 7 | 8 | - [deque.c](deque.c) - Double-ended queue 9 | - [map.c](map.c) - Collection of key-value pairs, sorted by keys, keys are unique 10 | - [set.c](set.c) - Collection of unique keys 11 | - [priority_queue.c](priority_queue.c) - Priority queue 12 | - [queue.c](queue.c) - Queue (FIFO data structure) 13 | - [stack.c](stack.c) - Stack (LIFO data structure) 14 | - [spatial.c](spatial.c) - Spatial B-tree. Example using geospatial data. 15 | - [vector.c](vector.c) - Vector B-tree. Dynamic array, unsorted. 16 | - [iteration.c](iteration.c) - Shows how to iterate over items in btree 17 | -------------------------------------------------------------------------------- /examples/deque.c: -------------------------------------------------------------------------------- 1 | // cc examples/deque.c && ./a.out 2 | // Adapted from https://en.cppreference.com/w/cpp/container/deque 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #define BGEN_NAME deque 9 | #define BGEN_TYPE int 10 | #define BGEN_NOORDER 11 | #include "../bgen.h" 12 | 13 | int main() { 14 | 15 | int data[] = { 7, 5, 16, 8 }; 16 | int n = sizeof(data)/sizeof(int); 17 | 18 | // Create a deque containing integers 19 | struct deque *deque = 0; 20 | for (int i = 0; i < n; i++) { 21 | deque_push_back(&deque, data[i], 0); 22 | } 23 | 24 | // Add an integer to the beginning and end of the deque 25 | deque_push_front(&deque, 13, 0); 26 | deque_push_back(&deque, 25, 0); 27 | 28 | 29 | // Iterate and print values of deque 30 | struct deque_iter *iter; 31 | deque_iter_init(&deque, &iter, 0); 32 | deque_iter_scan(iter); 33 | for (; deque_iter_valid(iter); deque_iter_next(iter)) { 34 | int item; 35 | deque_iter_item(iter, &item); 36 | printf("%d ", item); 37 | } 38 | deque_iter_release(iter); 39 | printf("\n"); 40 | 41 | return 0; 42 | } 43 | 44 | // Output: 45 | // 13 7 5 16 8 25 46 | -------------------------------------------------------------------------------- /examples/example.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define BGEN_NAME bt // The namespace for the btree structure. 4 | #define BGEN_TYPE int // The data type for all items in the btree 5 | #define BGEN_LESS return a < b; // A code fragment for comparing items 6 | #include "../bgen.h" // Include "bgen.h" to generate the btree 7 | 8 | int main() { 9 | // Create an empty btree instance. 10 | struct bt *tree = 0; 11 | 12 | // Insert some items into the btree 13 | bt_insert(&tree, 3, 0, 0); 14 | bt_insert(&tree, 8, 0, 0); 15 | bt_insert(&tree, 2, 0, 0); 16 | bt_insert(&tree, 5, 0, 0); 17 | 18 | // Print items in tree 19 | struct bt_iter *iter; 20 | bt_iter_init(&tree, &iter, 0); 21 | for (bt_iter_scan(iter); bt_iter_valid(iter); bt_iter_next(iter)) { 22 | int item; 23 | bt_iter_item(iter, &item); 24 | printf("%d ", item); 25 | } 26 | printf("\n"); 27 | 28 | // Delete an item 29 | bt_delete(&tree, 3, 0, 0); 30 | 31 | // Print again 32 | for (bt_iter_scan(iter); bt_iter_valid(iter); bt_iter_next(iter)) { 33 | int item; 34 | bt_iter_item(iter, &item); 35 | printf("%d ", item); 36 | } 37 | printf("\n"); 38 | 39 | bt_iter_release(iter); 40 | 41 | bt_clear(&tree, 0); 42 | return 0; 43 | } 44 | -------------------------------------------------------------------------------- /examples/iteration.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | struct user { 6 | char *last; 7 | char *first; 8 | int age; 9 | }; 10 | 11 | int user_compare(struct user a, struct user b) { 12 | int cmp = strcmp(a.last, b.last); 13 | if (cmp == 0) { 14 | cmp = strcmp(a.first, b.first); 15 | } 16 | return cmp; 17 | } 18 | 19 | bool user_iter(struct user user, void *udata) { 20 | printf("%s %s (age=%d)\n", user.first, user.last, user.age); 21 | return true; 22 | } 23 | 24 | #define BGEN_NAME users 25 | #define BGEN_TYPE struct user 26 | #define BGEN_COMPARE return user_compare(a, b); 27 | #include "../bgen.h" 28 | 29 | int main() { 30 | // Create a new btree. 31 | struct users *users = 0; 32 | 33 | // Load some users into the btree. 34 | users_insert(&users, (struct user){ .first="Dale", .last="Murphy", .age=44 }, 0, 0); 35 | users_insert(&users, (struct user){ .first="Roger", .last="Craig", .age=68 }, 0, 0); 36 | users_insert(&users, (struct user){ .first="Jane", .last="Murphy", .age=47 }, 0, 0); 37 | 38 | struct user user; 39 | int status; 40 | printf("\n-- get some users --\n"); 41 | users_get(&users, (struct user){ .first="Jane", .last="Murphy" }, &user, 0); 42 | printf("%s age=%d\n", user.first, user.age); 43 | 44 | users_get(&users, (struct user){ .first="Roger", .last="Craig" }, &user, 0); 45 | printf("%s age=%d\n", user.first, user.age); 46 | 47 | users_get(&users, (struct user){ .first="Dale", .last="Murphy" }, &user, 0); 48 | printf("%s age=%d\n", user.first, user.age); 49 | 50 | status = users_get(&users, (struct user){ .first="Tom", .last="Buffalo" }, &user, 0); 51 | printf("%s\n", status==users_FOUND?"exists":"not exists"); 52 | 53 | printf("\n-- iterate over all users --\n"); 54 | users_scan(&users, user_iter, 0); 55 | 56 | printf("\n-- iterate beginning with last name `Murphy` --\n"); 57 | users_seek(&users, (struct user){ .first="", .last="Murphy" }, user_iter, NULL); 58 | 59 | printf("\n-- loop iterator (same as previous) --\n"); 60 | struct users_iter *iter; 61 | users_iter_init(&users, &iter, 0); 62 | users_iter_seek(iter, (struct user){.first="", .last="Murphy"}); 63 | while (users_iter_valid(iter)) { 64 | users_iter_item(iter, &user); 65 | printf("%s %s (age=%d)\n", user.first, user.last, user.age); 66 | users_iter_next(iter); 67 | } 68 | users_iter_release(iter); 69 | 70 | return 0; 71 | } 72 | 73 | // Output: 74 | // 75 | // -- get some users -- 76 | // Jane age=47 77 | // Roger age=68 78 | // Dale age=44 79 | // not exists 80 | // 81 | // -- iterate over all users -- 82 | // Roger Craig (age=68) 83 | // Dale Murphy (age=44) 84 | // Jane Murphy (age=47) 85 | // 86 | // -- iterate beginning with last name `Murphy` -- 87 | // Dale Murphy (age=44) 88 | // Jane Murphy (age=47) 89 | // 90 | // -- loop iterator (same as previous) -- 91 | // Dale Murphy (age=44) 92 | // Jane Murphy (age=47) 93 | -------------------------------------------------------------------------------- /examples/map.c: -------------------------------------------------------------------------------- 1 | // cc examples/map.c && ./a.out 2 | // Adapted from https://en.cppreference.com/w/cpp/container/map 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | struct pair { 9 | const char *key; 10 | int value; 11 | }; 12 | 13 | #define BGEN_NAME map 14 | #define BGEN_TYPE struct pair 15 | #define BGEN_COMPARE return strcmp(a.key, b.key); 16 | #include "../bgen.h" 17 | 18 | void print_map(const char *comment, struct map **map) { 19 | printf("%s", comment); 20 | struct map_iter *iter; 21 | map_iter_init(map, &iter, 0); 22 | for (map_iter_scan(iter); map_iter_valid(iter); map_iter_next(iter)) { 23 | struct pair pair; 24 | map_iter_item(iter, &pair); 25 | printf("[%s] = %d; ", pair.key, pair.value); 26 | } 27 | map_iter_release(iter); 28 | printf("\n"); 29 | } 30 | 31 | int main() { 32 | // Create a map of three (string, int) pairs 33 | struct map *map = 0; 34 | map_insert(&map, (struct pair){"GPU", 15}, 0, 0); 35 | map_insert(&map, (struct pair){"RAM", 20}, 0, 0); 36 | map_insert(&map, (struct pair){"CPU", 10}, 0, 0); 37 | print_map("1) Initial map: ", &map); 38 | 39 | // Get an existing item 40 | struct pair item; 41 | assert(map_get(&map, (struct pair){"GPU"}, &item, 0) == map_FOUND); 42 | printf("2) Get item: [%s] = %d;\n", item.key, item.value); 43 | 44 | // Update an existing item 45 | assert(map_insert(&map, (struct pair){"CPU", 25}, 0, 0) == map_REPLACED); 46 | // Insert a new item 47 | assert(map_insert(&map, (struct pair){"SSD", 30}, 0, 0) == map_INSERTED); 48 | print_map("3) Updated map: ", &map); 49 | assert(map_insert(&map, (struct pair){"UPS"}, 0, 0) == map_INSERTED); 50 | print_map("4) Updated map: ", &map); 51 | 52 | assert(map_delete(&map, (struct pair){.key="GPU"}, 0, 0) == map_DELETED); 53 | print_map("5) After delete: ", &map); 54 | 55 | return 0; 56 | } 57 | 58 | // Output: 59 | // 1) Initial map: [CPU] = 10; [GPU] = 15; [RAM] = 20; 60 | // 2) Get item: [GPU] = 15; 61 | // 3) Updated map: [CPU] = 25; [GPU] = 15; [RAM] = 20; [SSD] = 30; 62 | // 4) Updated map: [CPU] = 25; [GPU] = 15; [RAM] = 20; [SSD] = 30; [UPS] = 0; 63 | // 5) After delete: [CPU] = 25; [RAM] = 20; [SSD] = 30; [UPS] = 0; 64 | -------------------------------------------------------------------------------- /examples/priority_queue.c: -------------------------------------------------------------------------------- 1 | // cc examples/priority_queue.c && ./a.out 2 | // Adapted from https://en.cppreference.com/w/cpp/container/priority_queue 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #define BGEN_NAME max_priority_queue 10 | #define BGEN_TYPE int 11 | #define BGEN_LESS return a < b; 12 | #include "../bgen.h" 13 | 14 | #define BGEN_NAME min_priority_queue 15 | #define BGEN_TYPE int 16 | #define BGEN_LESS return b < a; 17 | #include "../bgen.h" 18 | 19 | int main() { 20 | int data[] = { 1, 8, 5, 6, 3, 4, 0, 9, 7, 2 }; 21 | int n = sizeof(data)/sizeof(int); 22 | printf("data: "); 23 | for (int i = 0; i < n; i++) { 24 | printf("%d ", data[i]); 25 | } 26 | printf("\n"); 27 | 28 | struct max_priority_queue *max_priority_queue = 0; 29 | 30 | // Fill the priority queue. 31 | for (int i = 0; i < n; i++) { 32 | max_priority_queue_insert(&max_priority_queue, data[i], 0, 0); 33 | } 34 | 35 | printf("max_priority_queue: "); 36 | while (max_priority_queue_count(&max_priority_queue, 0) > 0) { 37 | int val; 38 | max_priority_queue_pop_front(&max_priority_queue, &val, 0); 39 | printf("%d ", val); 40 | } 41 | printf("\n"); 42 | 43 | struct min_priority_queue *min_priority_queue = 0; 44 | 45 | // Fill the priority queue. 46 | for (int i = 0; i < n; i++) { 47 | min_priority_queue_insert(&min_priority_queue, data[i], 0, 0); 48 | } 49 | 50 | printf("min_priority_queue: "); 51 | while (min_priority_queue_count(&min_priority_queue, 0) > 0) { 52 | int val; 53 | min_priority_queue_pop_front(&min_priority_queue, &val, 0); 54 | printf("%d ", val); 55 | } 56 | printf("\n"); 57 | 58 | 59 | return 0; 60 | } 61 | 62 | // Output: 63 | // data: 1 8 5 6 3 4 0 9 7 2 64 | // max_priority_queue: 0 1 2 3 4 5 6 7 8 9 65 | // min_priority_queue: 9 8 7 6 5 4 3 2 1 0 66 | -------------------------------------------------------------------------------- /examples/queue.c: -------------------------------------------------------------------------------- 1 | // cc examples/queue.c && ./a.out 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define BGEN_NAME queue 8 | #define BGEN_TYPE int 9 | #define BGEN_NOORDER 10 | #include "../bgen.h" 11 | 12 | int main() { 13 | struct queue *queue = 0; 14 | 15 | queue_push_back(&queue, 0, 0); // pushes 0 16 | queue_push_back(&queue, 2, 0); // q = 0 2 17 | queue_push_back(&queue, 1, 0); // q = 0 2 1 18 | queue_push_back(&queue, 3, 0); // q = 0 2 1 3 19 | 20 | int val; 21 | 22 | queue_front(&queue, &val, 0); 23 | assert(val == 0); 24 | queue_back(&queue, &val, 0); 25 | assert(val == 3); 26 | assert(queue_count(&queue, 0) == 4); 27 | 28 | // Remove the first element, 0 29 | queue_pop_front(&queue, &val, 0); 30 | assert(val == 0); 31 | assert(queue_count(&queue, 0) == 3); 32 | 33 | // Print and remove all elements. 34 | printf("queue: "); 35 | for (; queue_count(&queue, 0) > 0; queue_pop_front(&queue, 0, 0)) { 36 | queue_front(&queue, &val, 0); 37 | printf("%d ", val); 38 | } 39 | printf("\n"); 40 | 41 | assert(queue_count(&queue, 0) == 0); 42 | return 0; 43 | } 44 | 45 | // Output: 46 | // queue: 2 1 3 47 | -------------------------------------------------------------------------------- /examples/set.c: -------------------------------------------------------------------------------- 1 | // cc examples/set.c && ./a.out 2 | // Adapted from https://en.cppreference.com/w/cpp/container/set 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #define BGEN_NAME set 9 | #define BGEN_TYPE int 10 | #define BGEN_LESS return a < b; 11 | #include "../bgen.h" 12 | 13 | void print_set(struct set **set) { 14 | struct set_iter *iter; 15 | set_iter_init(set, &iter, 0); 16 | printf("{ "); 17 | for (set_iter_scan(iter); set_iter_valid(iter); set_iter_next(iter)) { 18 | int item; 19 | set_iter_item(iter, &item); 20 | printf("%d ", item); 21 | } 22 | set_iter_release(iter); 23 | printf("}"); 24 | } 25 | 26 | int main() { 27 | int data[] = { 1, 5, 3 }; 28 | int n = sizeof(data)/sizeof(int); 29 | 30 | struct set *set = 0; 31 | for (int i = 0; i < n; i++) { 32 | set_insert(&set, data[i], 0, 0); 33 | } 34 | print_set(&set); 35 | printf("\n"); 36 | 37 | set_insert(&set, 2, 0, 0); 38 | print_set(&set); 39 | printf("\n"); 40 | 41 | int keys[] = { 3, 4 }; 42 | for (int i = 0; i < 2; i++) { 43 | print_set(&set); 44 | if (set_contains(&set, keys[i], 0)) { 45 | printf(" does contain %d\n", keys[i]); 46 | } else { 47 | printf(" doesn't contain %d\n", keys[i]); 48 | } 49 | } 50 | printf("\n"); 51 | 52 | return 0; 53 | } 54 | 55 | // Output: 56 | // { 1 3 5 } 57 | // { 1 2 3 5 } 58 | // { 1 2 3 5 } does contain 3 59 | // { 1 2 3 5 } doesn't contain 4 60 | -------------------------------------------------------------------------------- /examples/spatial.c: -------------------------------------------------------------------------------- 1 | // cc examples/spatial.c && ./a.out 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | // Support functions found in the tests directory 9 | #include "../tests/cities.h" 10 | #include "../tests/curve.h" 11 | #include "../tests/dist.h" 12 | 13 | struct city { 14 | uint32_t curve; 15 | const char *city; 16 | double lat; 17 | double lon; 18 | }; 19 | 20 | int city_compare(struct city a, struct city b) { 21 | return a.curve < b.curve ? -1 : a.curve > b.curve ? 1 : 22 | strcmp(a.city, b.city); 23 | } 24 | 25 | void city_rect(struct city city, double min[], double max[]) { 26 | min[0] = city.lon, min[1] = city.lat; 27 | max[0] = city.lon, max[1] = city.lat; 28 | } 29 | 30 | #define BGEN_NAME cities 31 | #define BGEN_TYPE struct city 32 | #define BGEN_SPATIAL 33 | #define BGEN_ITEMRECT city_rect(item, min, max); 34 | #define BGEN_COMPARE return city_compare(a, b); 35 | #include "../bgen.h" 36 | 37 | struct point { 38 | double lat; 39 | double lon; 40 | }; 41 | 42 | double calcdist(double min[], double max[], void *target, void *udata) { 43 | struct point *point = target; 44 | return point_rect_dist(point->lat, point->lon, min[1], min[0], max[1], 45 | max[0]); 46 | } 47 | 48 | int main() { 49 | // Load a bunch of city entries into a spatial B-tree. 50 | // Use a hilbert curve for spatial ordering. 51 | struct cities *cities = 0; 52 | double window[] = { -180, -90, 180, 90 }; 53 | for (int i = 0; i < NCITIES; i++) { 54 | struct city city = { 55 | .curve = curve_hilbert(all_cities[i].lon, all_cities[i].lat, window), 56 | .city = all_cities[i].city, 57 | .lat = all_cities[i].lat, 58 | .lon = all_cities[i].lon, 59 | }; 60 | cities_insert(&cities, city, 0, 0); 61 | } 62 | 63 | assert(cities_count(&cities, 0) == NCITIES); 64 | printf("Inserted %zu cities\n", cities_count(&cities, 0)); 65 | 66 | // Find all cities in rectangle 67 | double min[] = { -113, 33 }; 68 | double max[] = { -111, 34 }; 69 | printf("Cities inside rectangle ((%.0f %0.f) (%.0f %0.f)):\n", 70 | min[0], min[1], max[0], max[1]); 71 | struct cities_iter *iter; 72 | cities_iter_init(&cities, &iter, 0); 73 | cities_iter_intersects(iter, min, max); 74 | while (cities_iter_valid(iter)) { 75 | struct city city; 76 | cities_iter_item(iter, &city); 77 | printf("- %s\n", city.city); 78 | cities_iter_next(iter); 79 | } 80 | cities_iter_release(iter); 81 | printf("\n"); 82 | 83 | // Find nearest 10 cities to (-113, 33) 84 | // This uses a kNN operation 85 | struct point point = { .lon = -113, .lat = 33 }; 86 | printf("Top 10 cities nearby point (%.0f %0.f):\n", point.lon, point.lat); 87 | cities_iter_init(&cities, &iter, 0); 88 | cities_iter_nearby(iter, &point, calcdist); 89 | int n = 0; 90 | while (n < 10 && cities_iter_valid(iter)) { 91 | struct city city; 92 | cities_iter_item(iter, &city); 93 | printf("- %s\n", city.city); 94 | cities_iter_next(iter); 95 | n++; 96 | } 97 | cities_iter_release(iter); 98 | printf("\n"); 99 | 100 | return 0; 101 | } 102 | 103 | // Output: 104 | // Cities inside rectangle ((-113 33) (-111 34)): 105 | // - Chandler 106 | // - Scottsdale 107 | // - Mesa 108 | // - Phoenix 109 | // - Glendale 110 | // 111 | // Top 10 cities nearby point (-113 33): 112 | // - Glendale 113 | // - Phoenix 114 | // - Chandler 115 | // - Scottsdale 116 | // - Mesa 117 | // - San Luis Rio Colorado 118 | // - Tucson 119 | // - Mexicali 120 | // - Heroica Nogales 121 | // - Ensenada -------------------------------------------------------------------------------- /examples/stack.c: -------------------------------------------------------------------------------- 1 | // cc examples/stack.c && ./a.out 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define BGEN_NAME stack 8 | #define BGEN_TYPE int 9 | #define BGEN_NOORDER 10 | #include "../bgen.h" 11 | 12 | int main() { 13 | struct stack *stack = 0; 14 | 15 | stack_push_back(&stack, 0, 0); // pushes 0 16 | stack_push_back(&stack, 2, 0); // q = 0 2 17 | stack_push_back(&stack, 1, 0); // q = 0 2 1 18 | stack_push_back(&stack, 3, 0); // q = 0 2 1 3 19 | 20 | int val; 21 | 22 | stack_front(&stack, &val, 0); 23 | assert(val == 0); 24 | stack_back(&stack, &val, 0); 25 | assert(val == 3); 26 | assert(stack_count(&stack, 0) == 4); 27 | 28 | // Remove the back element, 3 29 | stack_pop_back(&stack, &val, 0); 30 | assert(val == 3); 31 | assert(stack_count(&stack, 0) == 3); 32 | 33 | // Print and remove all elements. 34 | printf("stack: "); 35 | for (; stack_count(&stack, 0) > 0; stack_pop_back(&stack, 0, 0)) { 36 | stack_back(&stack, &val, 0); 37 | printf("%d ", val); 38 | } 39 | printf("\n"); 40 | 41 | assert(stack_count(&stack, 0) == 0); 42 | 43 | return 0; 44 | } 45 | 46 | // Output: 47 | // stack: 1 2 0 48 | -------------------------------------------------------------------------------- /examples/vector.c: -------------------------------------------------------------------------------- 1 | // cc examples/vector.c && ./a.out 2 | // Adapted from https://en.cppreference.com/w/cpp/container/vector 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #define BGEN_NAME vector 9 | #define BGEN_TYPE int 10 | #define BGEN_COUNTED 11 | #define BGEN_NOORDER 12 | #include "../bgen.h" 13 | 14 | int main() { 15 | int data[] = { 8, 4, 5, 9 }; 16 | int n = sizeof(data)/sizeof(int); 17 | 18 | // Create a vector containing integers 19 | struct vector *vector = 0; 20 | for (int i = 0; i < n; i++) { 21 | vector_push_back(&vector, data[i], 0); 22 | } 23 | 24 | // Add two more integers to vector 25 | vector_push_back(&vector, 6, 0); 26 | vector_push_back(&vector, 9, 0); 27 | 28 | // Overwrite element at position 2 29 | vector_replace_at(&vector, 2, -1, 0, 0); 30 | 31 | // Insert an item in the middle of the vector 32 | vector_insert_at(&vector, 2, 7, 0); 33 | 34 | // Delete an item in the middle of the vector 35 | vector_delete_at(&vector, 1, 0, 0); 36 | 37 | // Print out the vector 38 | for (int i = 0; i < vector_count(&vector, 0); i++) { 39 | int item; 40 | vector_get_at(&vector, i, &item, 0); 41 | printf("%d ", item); 42 | } 43 | printf("\n"); 44 | return 0; 45 | } 46 | 47 | // Output: 48 | // 8 7 -1 9 6 9 49 | -------------------------------------------------------------------------------- /tests/README.md: -------------------------------------------------------------------------------- 1 | # Testing 2 | 3 | Tests can be run from the project's root directory. 4 | 5 | ```bash 6 | tests/run.sh 7 | ``` 8 | 9 | This will run all tests using the system's default compiler. 10 | 11 | If [Clang](https://clang.llvm.org) is your compiler then you will also be 12 | provided with memory address sanitizing and code coverage. 13 | 14 | If you need Valgrind you can provide `VALGRIND=1`. 15 | 16 | ### Examples 17 | 18 | ```bash 19 | tests/run.sh # defaults 20 | CC=clang-17 tests/run.sh # use alternative compiler 21 | CC=emcc tests/run.sh # test WebAssembly using Emscripten 22 | CC="zig cc" tests/run.sh # test with the Zig C compiler 23 | CFLAGS="-O3" tests/run.sh # use custom cflags 24 | NOSANS=1 tests/run.sh # do not use sanitizers 25 | VALGRIND=1 tests/run.sh # use valgrind on all tests 26 | ``` -------------------------------------------------------------------------------- /tests/bench.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | cd $(dirname "${BASH_SOURCE[0]}") 5 | 6 | if [[ "$CC" == "" ]]; then 7 | CC=cc 8 | fi 9 | 10 | if [[ "$CXX" == "" ]]; then 11 | CXX=c++ 12 | fi 13 | 14 | if [[ "$G" == "" ]]; then 15 | export G=5 16 | fi 17 | 18 | echo "tidwall/bgen" 19 | $CC -O3 $CFLAGS bench_b.c 20 | ./a.out 21 | 22 | echo 23 | echo "tidwall/bgen (spatial)" 24 | $CC -O3 $CFLAGS bench_s.c 25 | ./a.out 26 | -------------------------------------------------------------------------------- /tests/bench_b.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "testutils.h" 3 | 4 | 5 | #define M 16 6 | 7 | int N = 1000000; 8 | int G = 50; 9 | int C = 0; // -1 = worse-case, 0 = average, +1 = best-case 10 | 11 | // #define COW 12 | // #define COUNTED 13 | // #define SPATIAL 14 | // #define NOATOMIC 15 | // #define BSEARCH 16 | // #define NOPATHHINT 17 | // #define PATHHINT 18 | // #define USECOMPARE 19 | 20 | #define BGEN_NAME kv 21 | #define BGEN_TYPE int 22 | #define BGEN_MALLOC return malloc0(size); 23 | #define BGEN_FREE free0(ptr); 24 | #ifdef COW 25 | #define BGEN_COW 26 | #endif 27 | #ifdef COUNTED 28 | #define BGEN_COUNTED 29 | #endif 30 | #ifdef SPATIAL 31 | #define BGEN_SPATIAL 32 | #endif 33 | #ifdef NOATOMIC 34 | #define BGEN_NOATOMIC 35 | #endif 36 | #ifdef BSEARCH 37 | #define BGEN_BSEARCH 38 | #endif 39 | #ifdef NOPATHHINT 40 | #define BGEN_NOPATHHINT 41 | #endif 42 | #ifdef USEPATHHINT 43 | #define BGEN_PATHHINT 44 | #endif 45 | #define BGEN_FANOUT M 46 | // #define BGEN_ITEMRECT { min[0] = item; min[1] = item; max[0] = item; max[1] = item; } 47 | #ifdef USECOMPARE 48 | #define BGEN_COMPARE { return a < b ? -1 : a > b; } 49 | #else 50 | #define BGEN_LESS { return a < b; } 51 | #endif 52 | #include "../bgen.h" 53 | 54 | static bool iter_scan(int item, void *udata) { 55 | double *sum = udata; 56 | (*sum) += item; 57 | return true; 58 | } 59 | 60 | #define reset_tree() { \ 61 | kv_clear(&tree, 0); \ 62 | shuffle(keys, N); \ 63 | for (int i = 0; i < N; i++) { \ 64 | kv_insert(&tree, keys[i], &val, 0); \ 65 | } \ 66 | }(void)0 67 | 68 | #define run_op(label, nruns, preop, op) {\ 69 | double gelapsed = C < 1 ? 0 : 9999; \ 70 | double gstart = now(); \ 71 | double gmstart = mtotal; \ 72 | double gmtotal = 0; \ 73 | for (int g = 0; g < (nruns); g++) { \ 74 | printf("\r%-20s", label); \ 75 | printf("%d/%d ", g+1, (nruns)); \ 76 | fflush(stdout); \ 77 | preop \ 78 | double start = now(); \ 79 | size_t mstart = mtotal; \ 80 | op \ 81 | double elapsed = now()-start; \ 82 | if (C == -1) { \ 83 | if (elapsed > gelapsed) { \ 84 | gelapsed = elapsed; \ 85 | } \ 86 | } else if (C == 0) { \ 87 | gelapsed += elapsed; \ 88 | } else { \ 89 | if (elapsed < gelapsed) { \ 90 | gelapsed = elapsed; \ 91 | } \ 92 | } \ 93 | if (mtotal > mstart) { \ 94 | gmtotal += mtotal-mstart; \ 95 | } \ 96 | } \ 97 | printf("\r"); \ 98 | printf("%-19s", label); \ 99 | if (C == 0) { \ 100 | gelapsed /= nruns; \ 101 | } \ 102 | bench_print_mem(N, gstart, gstart+gelapsed, \ 103 | gmstart, gmstart+(gmtotal/(nruns))); \ 104 | assert(kv_sane(&tree, 0)); \ 105 | reset_tree(); \ 106 | }(void)0 107 | 108 | int main(void) { 109 | if (getenv("N")) { 110 | N = atoi(getenv("N")); 111 | } 112 | if (getenv("G")) { 113 | G = atoi(getenv("G")); 114 | } 115 | if (getenv("C")) { 116 | C = atoi(getenv("C")); 117 | } 118 | printf("Benchmarking %d items, %d times, taking the %s result\n", 119 | N, G, C == -1 ? "worst": C == 0 ? "average" : "best"); 120 | 121 | // _kv_internal_print_feats(stdout); 122 | 123 | seedrand(); 124 | double asum = 0; 125 | int *keys = malloc(N * sizeof(int)); 126 | assert(keys); 127 | for (int i = 0; i < N; i++) { 128 | keys[i] = i*10; 129 | asum += keys[i]; 130 | } 131 | assert(asum > 0); 132 | 133 | struct kv *tree = 0; 134 | int val; 135 | 136 | run_op("insert(seq)", G, { 137 | kv_clear(&tree, 0); 138 | sort(keys, N); 139 | },{ 140 | for (int i = 0; i < N; i++) { 141 | assert(kv_insert(&tree, keys[i], &val, 0) == kv_INSERTED); 142 | } 143 | }); 144 | 145 | run_op("insert(rand)", G, { 146 | kv_clear(&tree, 0); 147 | shuffle(keys, N); 148 | },{ 149 | for (int i = 0; i < N; i++) { 150 | assert(kv_insert(&tree, keys[i], &val, 0) == kv_INSERTED); 151 | } 152 | }); 153 | 154 | run_op("get(seq)", G, { 155 | sort(keys, N); 156 | }, { 157 | for (int i = 0; i < N; i++) { 158 | assert(kv_get(&tree, keys[i], &val, 0) == kv_FOUND); 159 | } 160 | }); 161 | 162 | run_op("get(rand)", G, { 163 | shuffle(keys, N); 164 | }, { 165 | for (int i = 0; i < N; i++) { 166 | assert(kv_get(&tree, keys[i], &val, 0) == kv_FOUND); 167 | } 168 | }); 169 | 170 | run_op("delete(seq)", G, { 171 | reset_tree(); 172 | sort(keys, N); 173 | }, { 174 | for (int i = 0; i < N; i++) { 175 | assert(kv_delete(&tree, keys[i], &val, 0) == kv_DELETED); 176 | } 177 | }); 178 | 179 | run_op("delete(rand)", G, { 180 | reset_tree(); 181 | shuffle(keys, N); 182 | }, { 183 | for (int i = 0; i < N; i++) { 184 | assert(kv_delete(&tree, keys[i], &val, 0) == kv_DELETED); 185 | } 186 | }); 187 | 188 | // return 0; 189 | 190 | run_op("reinsert(rand)", G, { 191 | shuffle(keys, N); 192 | },{ 193 | for (int i = 0; i < N; i++) { 194 | kv_insert(&tree, keys[i], &val, 0); 195 | } 196 | }); 197 | 198 | if (kv_feat_cow()) { 199 | struct kv *tree2 = 0; 200 | run_op("reinsert-cow(rand)", G, { 201 | kv_clear(&tree2, 0); 202 | kv_clone(&tree, &tree2, 0); 203 | shuffle(keys, N); 204 | },{ 205 | for (int i = 0; i < N; i++) { 206 | assert(kv_insert(&tree, keys[i], &val, 0) == kv_REPLACED); 207 | } 208 | }); 209 | kv_clear(&tree2, 0); 210 | } 211 | 212 | 213 | if (kv_feat_cow()) { 214 | struct kv *tree2 = 0; 215 | run_op("get_mut-cow(rand)", G, { 216 | kv_clear(&tree2, 0); 217 | kv_clone(&tree, &tree2, 0); 218 | shuffle(keys, N); 219 | },{ 220 | for (int i = 0; i < N; i++) { 221 | assert(kv_get_mut(&tree, keys[i], &val, 0) == kv_FOUND); 222 | } 223 | }); 224 | kv_clear(&tree2, 0); 225 | } 226 | 227 | 228 | if (kv_feat_counted()) { 229 | run_op("get_at(seq)", G, {},{ 230 | for (int i = 0; i < N; i++) { 231 | assert(kv_get_at(&tree, i, &val, 0) == kv_FOUND); 232 | } 233 | }); 234 | 235 | run_op("get_at(rand)", G, { 236 | shuffle(keys, N); 237 | }, { 238 | // keys are index * 10 239 | for (int i = 0; i < N; i++) { 240 | assert(kv_get_at(&tree, keys[i]/10, &val, 0) == kv_FOUND); 241 | } 242 | }); 243 | 244 | run_op("delete_at(head)", G, { 245 | reset_tree(); 246 | }, { 247 | for (int i = 0; i < N; i++) { 248 | assert(kv_delete_at(&tree, 0, &val, 0) == kv_DELETED); 249 | } 250 | }); 251 | 252 | run_op("delete_at(mid)", G, { 253 | reset_tree(); 254 | }, { 255 | for (int i = 0; i < N; i++) { 256 | assert(kv_delete_at(&tree, (N-i)/2, &val, 0) == kv_DELETED); 257 | } 258 | }); 259 | 260 | run_op("delete_at(tail)", G, { 261 | reset_tree(); 262 | }, { 263 | for (int i = 0; i < N; i++) { 264 | assert(kv_delete_at(&tree, (N-i)-1, &val, 0) == kv_DELETED); 265 | } 266 | }); 267 | 268 | 269 | int *delidxs = malloc(N*sizeof(int)); 270 | assert(delidxs); 271 | 272 | run_op("delete_at(rand)", G, { 273 | reset_tree(); 274 | int i = 0; 275 | int count = N; 276 | while (count > 0) { 277 | int index = rand() % count; 278 | count--; 279 | delidxs[i++] = index; 280 | } 281 | }, { 282 | for (int i = 0; i < N; i++) { 283 | assert(kv_delete_at(&tree, delidxs[i], &val, 0) == kv_DELETED); 284 | } 285 | }); 286 | free(delidxs); 287 | } 288 | 289 | run_op("push_first", G, { 290 | kv_clear(&tree, 0); 291 | sort(keys, N); 292 | }, { 293 | for (int i = N-1; i >= 0; i--) { 294 | assert(kv_push_front(&tree, keys[i], 0) == kv_INSERTED); 295 | } 296 | }); 297 | 298 | run_op("push_last", G, { 299 | kv_clear(&tree, 0); 300 | sort(keys, N); 301 | }, { 302 | for (int i = 0; i < N; i++) { 303 | assert(kv_push_back(&tree, keys[i], 0) == kv_INSERTED); 304 | } 305 | }); 306 | 307 | run_op("pop_first", G, { 308 | reset_tree(); 309 | }, { 310 | for (int i = 0; i < N; i++) { 311 | assert(kv_pop_front(&tree, 0, 0) == kv_DELETED); 312 | } 313 | }); 314 | 315 | run_op("pop_last", G, { 316 | reset_tree(); 317 | }, { 318 | for (int i = 0; i < N; i++) { 319 | assert(kv_pop_back(&tree, 0, 0) == kv_DELETED); 320 | } 321 | }); 322 | 323 | run_op("scan", G, { 324 | reset_tree(); 325 | }, { 326 | double bsum = 0; 327 | kv_scan(&tree, iter_scan, &bsum); 328 | assert(asum == bsum); 329 | }); 330 | 331 | run_op("scan_desc", G, { 332 | reset_tree(); 333 | }, { 334 | double bsum = 0; 335 | kv_scan_desc(&tree, iter_scan, &bsum); 336 | assert(asum == bsum); 337 | }); 338 | 339 | run_op("iter_scan", G, { 340 | reset_tree(); 341 | }, { 342 | double bsum = 0; 343 | struct kv_iter *iter; 344 | kv_iter_init(&tree, &iter, 0); 345 | kv_iter_scan(iter); 346 | for (int i = 0; i < N; i++) { 347 | assert(kv_iter_valid(iter)); 348 | kv_iter_item(iter, &val); 349 | bsum += val; 350 | kv_iter_next(iter); 351 | } 352 | kv_iter_release(iter); 353 | assert(asum == bsum); 354 | }); 355 | 356 | run_op("iter_scan_desc", G, { 357 | reset_tree(); 358 | }, { 359 | double bsum = 0; 360 | struct kv_iter *iter; 361 | kv_iter_init(&tree, &iter, 0); 362 | kv_iter_scan_desc(iter); 363 | for (int i = 0; i < N; i++) { 364 | assert(kv_iter_valid(iter)); 365 | kv_iter_item(iter, &val); 366 | bsum += val; 367 | kv_iter_next(iter); 368 | } 369 | kv_iter_release(iter); 370 | assert(asum == bsum); 371 | }); 372 | 373 | return 0; 374 | } 375 | -------------------------------------------------------------------------------- /tests/bench_s.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "testutils.h" 3 | #include "curve.h" 4 | 5 | 6 | #define M 32 7 | 8 | int N = 1000000; 9 | int G = 50; 10 | int C = 0; // -1 = worse-case, 0 = average, +1 = best-case 11 | 12 | // #define COW 13 | // #define COUNTED 14 | // #define SPATIAL 15 | // #define NOATOMIC 16 | // #define BSEARCH 17 | // #define NOPATHHINT 18 | // #define PATHHINT 19 | // #define USECOMPARE 20 | 21 | struct point { 22 | uint32_t curve; 23 | int id; 24 | double x; 25 | double y; 26 | }; 27 | 28 | 29 | static bool item_less(struct point a, struct point b) { 30 | return a.curve < b.curve ? true : 31 | a.curve > b.curve ? false : 32 | a.id < b.id; 33 | } 34 | 35 | static int item_compare(struct point a, struct point b) { 36 | return a.curve < b.curve ? -1 : 37 | a.curve > b.curve ? 1 : 38 | a.id < b.id ? -1 : 39 | a.id > b.id; 40 | } 41 | 42 | static int compare_points(const void *a, const void *b) { 43 | const struct point *pa = a; 44 | const struct point *pb = b; 45 | return item_less(*pa, *pb) ? -1 : 46 | item_less(*pb, *pa) ? 1 : 47 | 0; 48 | } 49 | 50 | static void sort_points(struct point array[], size_t numels) { 51 | qsort(array, numels, sizeof(struct point), compare_points); 52 | } 53 | 54 | static void shuffle_points(struct point array[], size_t numels) { 55 | shuffle0(array, numels, sizeof(struct point)); 56 | } 57 | 58 | static void item_rect(double min[], double max[], struct point point) { 59 | min[0] = point.x; 60 | min[1] = point.y; 61 | max[0] = point.x; 62 | max[1] = point.y; 63 | } 64 | 65 | 66 | #define BGEN_NAME kv 67 | #define BGEN_TYPE struct point 68 | #define BGEN_MALLOC return malloc0(size); 69 | #define BGEN_FREE free0(ptr); 70 | #ifdef COW 71 | #define BGEN_COW 72 | #endif 73 | #ifdef COUNTED 74 | #define BGEN_COUNTED 75 | #endif 76 | #ifdef SPATIAL 77 | #define BGEN_SPATIAL 78 | #endif 79 | #ifdef NOATOMIC 80 | #define BGEN_NOATOMIC 81 | #endif 82 | #ifdef BSEARCH 83 | #define BGEN_BSEARCH 84 | #endif 85 | #ifdef NOPATHHINT 86 | #define BGEN_NOPATHHINT 87 | #endif 88 | #ifdef USEPATHHINT 89 | #define BGEN_PATHHINT 90 | #endif 91 | #define BGEN_FANOUT M 92 | #define BGEN_SPATIAL 93 | #define BGEN_ITEMRECT { item_rect(min, max, item); } 94 | #define BGEN_MAYBELESSEQUAL { return a.curve <= b.curve; } 95 | #define BGEN_COMPARE { return item_compare(a, b); } 96 | #include "../bgen.h" 97 | 98 | static bool iter_scan(int item, void *udata) { 99 | double *sum = udata; 100 | (*sum) += item; 101 | return true; 102 | } 103 | 104 | #define reset_tree() { \ 105 | kv_clear(&tree, 0); \ 106 | shuffle_points(keys, N); \ 107 | for (int i = 0; i < N; i++) { \ 108 | kv_insert(&tree, keys[i], &val, 0); \ 109 | } \ 110 | }(void)0 111 | 112 | #define run_op(label, nops, nruns, preop, op) {\ 113 | double gelapsed = C < 1 ? 0 : 9999; \ 114 | double gstart = now(); \ 115 | double gmstart = mtotal; \ 116 | double gmtotal = 0; \ 117 | for (int g = 0; g < (nruns); g++) { \ 118 | printf("\r%-19s", label); \ 119 | printf(" %d/%d ", g+1, (nruns)); \ 120 | fflush(stdout); \ 121 | preop \ 122 | double start = now(); \ 123 | size_t mstart = mtotal; \ 124 | op \ 125 | double elapsed = now()-start; \ 126 | if (C == -1) { \ 127 | if (elapsed > gelapsed) { \ 128 | gelapsed = elapsed; \ 129 | } \ 130 | } else if (C == 0) { \ 131 | gelapsed += elapsed; \ 132 | } else { \ 133 | if (elapsed < gelapsed) { \ 134 | gelapsed = elapsed; \ 135 | } \ 136 | } \ 137 | if (mtotal > mstart) { \ 138 | gmtotal += mtotal-mstart; \ 139 | } \ 140 | } \ 141 | printf("\r"); \ 142 | printf("%-19s", label); \ 143 | if (C == 0) { \ 144 | gelapsed /= nruns; \ 145 | } \ 146 | bench_print_mem(nops, gstart, gstart+gelapsed, \ 147 | gmstart, gmstart+(gmtotal/(nruns))); \ 148 | assert(kv_sane(&tree, 0)); \ 149 | reset_tree(); \ 150 | }(void)0 151 | 152 | 153 | struct iiter0ctx { 154 | int id; 155 | bool found; 156 | }; 157 | 158 | bool iter_one(struct point point, void *udata) { 159 | struct iiter0ctx *ctx = udata; 160 | if (point.id == ctx->id) { 161 | ctx->found = true; 162 | return false; 163 | } 164 | return true; 165 | } 166 | 167 | bool iter_many(struct point point, void *udata) { 168 | int *count = udata; 169 | (*count)++; 170 | return true; 171 | } 172 | 173 | int main(void) { 174 | if (getenv("N")) { 175 | N = atoi(getenv("N")); 176 | } 177 | if (getenv("G")) { 178 | G = atoi(getenv("G")); 179 | } 180 | if (getenv("C")) { 181 | C = atoi(getenv("C")); 182 | } 183 | printf("Benchmarking %d items, %d times, taking the %s result\n", 184 | N, G, C == -1 ? "worst": C == 0 ? "average" : "best"); 185 | 186 | // _kv_internal_print_feats(stdout); 187 | 188 | seedrand(); 189 | int nkeys = N; 190 | struct point *keys = malloc(nkeys*sizeof(struct point)); 191 | assert(keys); 192 | for (int i = 0; i < nkeys; i++) { 193 | struct point point; 194 | point.id = i; 195 | point.x = rand_double()*360.0-180.0; 196 | point.y = rand_double()*180.0-90.0; 197 | point.curve = curve_hilbert(point.x, point.y, 198 | (double[4]){-180, -90, 180, 90}); 199 | // point.curve = curve_z(point.y, point.x); 200 | keys[i] = point; 201 | } 202 | shuffle_points(keys, nkeys); 203 | 204 | struct kv *tree = 0; 205 | struct point val; 206 | int sum = 0; 207 | 208 | run_op("insert(seq)", N, G, { 209 | kv_clear(&tree, 0); 210 | sort_points(keys, nkeys); 211 | },{ 212 | for (int i = 0; i < N; i++) { 213 | assert(kv_insert(&tree, keys[i], &val, 0) == kv_INSERTED); 214 | } 215 | }); 216 | run_op("insert(rand)", N, G, { 217 | kv_clear(&tree, 0); 218 | shuffle_points(keys, nkeys); 219 | },{ 220 | for (int i = 0; i < N; i++) { 221 | assert(kv_insert(&tree, keys[i], &val, 0) == kv_INSERTED); 222 | } 223 | }); 224 | 225 | printf("== using callbacks ==\n"); 226 | 227 | double coord[2]; 228 | run_op("search-item(seq)", N, G, { 229 | sort_points(keys, nkeys); 230 | }, { 231 | for (int i = 0; i < nkeys; i++) { 232 | coord[0] = keys[i].x; 233 | coord[1] = keys[i].y; 234 | struct iiter0ctx ctx = { .id = keys[i].id }; 235 | kv_intersects(&tree, coord, coord, iter_one, &ctx); 236 | assert(ctx.found); 237 | } 238 | }); 239 | run_op("search-item(rand)", N, G, { 240 | shuffle_points(keys, nkeys); 241 | }, { 242 | for (int i = 0; i < nkeys; i++) { 243 | coord[0] = keys[i].x; 244 | coord[1] = keys[i].y; 245 | struct iiter0ctx ctx = { .id = keys[i].id }; 246 | kv_intersects(&tree, coord, coord, iter_one, &ctx); 247 | assert(ctx.found); 248 | } 249 | }); 250 | 251 | sum = 0; 252 | run_op("search-1%%", 1000, G, {}, { 253 | for (int i = 0; i < 1000; i++) { 254 | const double p = 0.01; 255 | double min[2]; 256 | double max[2]; 257 | min[0] = rand_double() * 360.0 - 180.0; 258 | min[1] = rand_double() * 180.0 - 90.0; 259 | max[0] = min[0] + 360.0*p; 260 | max[1] = min[1] + 180.0*p; 261 | int res = 0; 262 | kv_intersects(&tree, min, max, iter_many, &res); 263 | sum += res; 264 | } 265 | }); 266 | // printf("%d\n", sum); 267 | 268 | sum = 0; 269 | run_op("search-5%%", 1000, G, {}, { 270 | for (int i = 0; i < 1000; i++) { 271 | const double p = 0.05; 272 | double min[2]; 273 | double max[2]; 274 | min[0] = rand_double() * 360.0 - 180.0; 275 | min[1] = rand_double() * 180.0 - 90.0; 276 | max[0] = min[0] + 360.0*p; 277 | max[1] = min[1] + 180.0*p; 278 | int res = 0; 279 | kv_intersects(&tree, min, max, iter_many, &res); 280 | sum += res; 281 | } 282 | }); 283 | // printf("%d\n", sum); 284 | 285 | sum = 0; 286 | run_op("search-10%%", 1000, G, {}, { 287 | for (int i = 0; i < 1000; i++) { 288 | const double p = 0.10; 289 | double min[2]; 290 | double max[2]; 291 | min[0] = rand_double() * 360.0 - 180.0; 292 | min[1] = rand_double() * 180.0 - 90.0; 293 | max[0] = min[0] + 360.0*p; 294 | max[1] = min[1] + 180.0*p; 295 | int res = 0; 296 | kv_intersects(&tree, min, max, iter_many, &res); 297 | sum += res; 298 | } 299 | }); 300 | // printf("%d\n", sum); 301 | 302 | 303 | 304 | /////////////////////////////////////////////////////////////////////////// 305 | struct kv_iter *iter; 306 | kv_iter_init(&tree, &iter, 0); 307 | printf("== using iterators ==\n"); 308 | run_op("search-item(seq)", N, G, { 309 | sort_points(keys, nkeys); 310 | }, { 311 | for (int i = 0; i < nkeys; i++) { 312 | coord[0] = keys[i].x; 313 | coord[1] = keys[i].y; 314 | struct iiter0ctx ctx = { .id = keys[i].id }; 315 | kv_iter_intersects(iter, coord, coord); 316 | while (kv_iter_valid(iter)) { 317 | struct point point; 318 | kv_iter_item(iter, &point); 319 | if (!iter_one(point, &ctx)) { 320 | break; 321 | } 322 | kv_iter_next(iter); 323 | } 324 | assert(ctx.found); 325 | } 326 | }); 327 | run_op("search-item(rand)", N, G, { 328 | shuffle_points(keys, nkeys); 329 | }, { 330 | for (int i = 0; i < nkeys; i++) { 331 | coord[0] = keys[i].x; 332 | coord[1] = keys[i].y; 333 | struct iiter0ctx ctx = { .id = keys[i].id }; 334 | kv_iter_intersects(iter, coord, coord); 335 | while (kv_iter_valid(iter)) { 336 | struct point point; 337 | kv_iter_item(iter, &point); 338 | if (!iter_one(point, &ctx)) { 339 | break; 340 | } 341 | kv_iter_next(iter); 342 | } 343 | assert(ctx.found); 344 | } 345 | }); 346 | 347 | sum = 0; 348 | run_op("search-1%%", 1000, G, { 349 | }, { 350 | for (int i = 0; i < 1000; i++) { 351 | const double p = 0.01; 352 | double min[2]; 353 | double max[2]; 354 | min[0] = rand_double() * 360.0 - 180.0; 355 | min[1] = rand_double() * 180.0 - 90.0; 356 | max[0] = min[0] + 360.0*p; 357 | max[1] = min[1] + 180.0*p; 358 | int res = 0; 359 | kv_iter_intersects(iter, min, max); 360 | while (kv_iter_valid(iter)) { 361 | struct point point; 362 | kv_iter_item(iter, &point); 363 | if (!iter_many(point, &res)) { 364 | break; 365 | } 366 | kv_iter_next(iter); 367 | } 368 | sum += res; 369 | } 370 | }); 371 | // printf("%d\n", sum); 372 | 373 | sum = 0; 374 | run_op("search-5%%", 1000, G, { 375 | }, { 376 | for (int i = 0; i < 1000; i++) { 377 | const double p = 0.05; 378 | double min[2]; 379 | double max[2]; 380 | min[0] = rand_double() * 360.0 - 180.0; 381 | min[1] = rand_double() * 180.0 - 90.0; 382 | max[0] = min[0] + 360.0*p; 383 | max[1] = min[1] + 180.0*p; 384 | int res = 0; 385 | kv_iter_intersects(iter, min, max); 386 | while (kv_iter_valid(iter)) { 387 | struct point point; 388 | kv_iter_item(iter, &point); 389 | if (!iter_many(point, &res)) { 390 | break; 391 | } 392 | kv_iter_next(iter); 393 | } 394 | sum += res; 395 | } 396 | }); 397 | // printf("%d\n", sum); 398 | 399 | sum = 0; 400 | run_op("search-10%%", 1000, G, { 401 | }, { 402 | for (int i = 0; i < 1000; i++) { 403 | const double p = 0.10; 404 | double min[2]; 405 | double max[2]; 406 | min[0] = rand_double() * 360.0 - 180.0; 407 | min[1] = rand_double() * 180.0 - 90.0; 408 | max[0] = min[0] + 360.0*p; 409 | max[1] = min[1] + 180.0*p; 410 | int res = 0; 411 | kv_iter_intersects(iter, min, max); 412 | while (kv_iter_valid(iter)) { 413 | struct point point; 414 | kv_iter_item(iter, &point); 415 | if (!iter_many(point, &res)) { 416 | break; 417 | } 418 | kv_iter_next(iter); 419 | } 420 | sum += res; 421 | } 422 | }); 423 | kv_iter_release(iter); 424 | 425 | // printf("%d\n", sum); 426 | 427 | return 0; 428 | } 429 | -------------------------------------------------------------------------------- /tests/build.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | cd $(dirname "${BASH_SOURCE[0]}") 5 | 6 | if [[ "$CC" == "" ]]; then 7 | CC=cc 8 | fi 9 | CCVERSHEAD="$($CC --version | head -n 1)" 10 | if [[ "$CCVERSHEAD" == "" ]]; then 11 | exit 1 12 | fi 13 | if [[ "$CCVERSHEAD" == *"Emscripten"* ]]; then 14 | NOSANS=1 15 | EMCC=1 16 | fi 17 | if [[ "$VALGRIND" == "1" || "$CC" == *"zig"* ]]; then 18 | NOSANS=1 19 | fi 20 | if [[ "$CCVERSHEAD" == *"clang"* && "$NOSANS" != "1" ]]; then 21 | CFLAGS="-O0 -g3 $CFLAGS" 22 | CFLAGS="-fno-omit-frame-pointer $CFLAGS" 23 | CFLAGS="-fprofile-instr-generate $CFLAGS" 24 | CFLAGS="-fcoverage-mapping $CFLAGS" 25 | CFLAGS="-fsanitize=undefined $CFLAGS" 26 | CFLAGS="-fno-inline $CFLAGS" 27 | if [[ "$RACE" == "1" ]]; then 28 | CFLAGS="$CFLAGS -fsanitize=thread" 29 | else 30 | CFLAGS="-fsanitize=address $CFLAGS" 31 | fi 32 | fi 33 | CFLAGS="-Wall -Wextra -Wextra $CFLAGS" 34 | CFLAGS="-pedantic -fstrict-aliasing $CFLAGS" 35 | # CFLAGS="-Wsign-compare -Wsign-conversion -Wshadow $CFLAGS" 36 | if [[ "$EMCC" == "1" ]]; then 37 | CFLAGS="$CFLAGS -pthread -sINITIAL_MEMORY=134610944" 38 | fi 39 | 40 | printf "\e[2m%s\e[0m\n" "$CC $CFLAGS $1.c" 41 | 42 | echo $CC $CFLAGS $1.c >> test.log 43 | $CC $CFLAGS $1.c 44 | 45 | if [[ "$EMCC" == "1" ]]; then 46 | mv a.out.js $1.out.js 47 | # mv a.out.wasm $1.out.wasm 48 | else 49 | mv a.out $1.out 50 | fi 51 | -------------------------------------------------------------------------------- /tests/cov.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | cfile=../bgen.h 4 | 5 | set -e 6 | cd $(dirname "${BASH_SOURCE[0]}") 7 | 8 | if [[ "$COVREGIONS" == "" ]]; then 9 | COVREGIONS="false" 10 | fi 11 | if [[ "$CC" == "" ]]; then 12 | CC=cc 13 | fi 14 | CCVERSHEAD="$($CC --version | head -n 1)" 15 | if [[ "$CCVERSHEAD" == "" ]]; then 16 | exit 17 | fi 18 | if [[ "$CCVERSHEAD" == *"clang"* ]]; then 19 | CLANGVERS="$(echo "$CCVERSHEAD" | awk '{print $4}' | awk -F'[ .]+' '{print $1}')" 20 | INSTALLDIR="$($CC --version | grep InstalledDir)" 21 | INSTALLDIR="${INSTALLDIR#* }" 22 | else 23 | exit 24 | fi 25 | if [[ -f "$INSTALLDIR/llvm-profdata-$CLANGVERS" ]]; then 26 | llvm_profdata=$INSTALLDIR/llvm-profdata-$CLANGVERS 27 | elif [[ -f "$INSTALLDIR/llvm-profdata" ]]; then 28 | llvm_profdata=$INSTALLDIR/llvm-profdata 29 | else 30 | echo "llvm-profdata missing" 31 | exit 1 32 | fi 33 | if [[ -f "$INSTALLDIR/llvm-cov-$CLANGVERS" ]]; then 34 | llvm_cov=$INSTALLDIR/llvm-cov-$CLANGVERS 35 | elif [[ -f "$INSTALLDIR/llvm-cov" ]]; then 36 | llvm_cov=$INSTALLDIR/llvm-cov 37 | else 38 | echo "llvm-cov missing" 39 | exit 1 40 | fi 41 | 42 | # echo $llvm_profdata 43 | $llvm_profdata merge $1.profraw -o $1.profdata 44 | $llvm_cov report $1.out $cfile -ignore-filename-regex=.test. \ 45 | -j=4 \ 46 | -show-functions=true \ 47 | -instr-profile=$1.profdata > /tmp/$1.cov.sum.txt 48 | 49 | # RED='\033[0;31m' 50 | # NC='\033[0m' # No Color 51 | 52 | covered="$(cat /tmp/$1.cov.sum.txt | grep TOTAL | awk '{ print $7; }')" 53 | if [[ "$covered" == "100.00%" ]]; then 54 | covered="100%" 55 | printf "%s \e[1;32m%s\e[0m\n" "$1" "$covered" 56 | else 57 | printf "%s \e[1;31m%s\e[0m\n" "$1" "$covered" 58 | fi 59 | $llvm_cov show $1.out $cfile -ignore-filename-regex=.test. \ 60 | -j=4 \ 61 | -show-regions=true \ 62 | -show-expansions=$COVREGIONS \ 63 | -show-line-counts-or-regions=true \ 64 | -instr-profile=$1.profdata -format=html > /tmp/$1.cov.html 65 | 66 | if [[ "$covered" != "100%" ]]; then 67 | echo "details: file:///tmp/$1.cov.html" 68 | echo "summary: file:///tmp/$1.cov.sum.txt" 69 | fi 70 | 71 | -------------------------------------------------------------------------------- /tests/curve.h: -------------------------------------------------------------------------------- 1 | #ifndef CURVE_H 2 | #define CURVE_H 3 | 4 | #include 5 | 6 | //------------------------------------------------------------------------------ 7 | // Hilbert Curve Encoding 8 | // From (Public Domain): https://github.com/rawrunprotected/hilbert_curves 9 | //------------------------------------------------------------------------------ 10 | static uint32_t curve_hilbert(double x, double y, double window[4]) { 11 | // Prepare inputs by converting doubles to ints 12 | x = (x - window[0]) / (window[2]-window[0]); 13 | y = (y - window[1]) / (window[3]-window[1]); 14 | uint16_t xint = (x < 0.0 ? 0.0 : x > 1.0 ? 1.0 : x) * 0xFFFF; 15 | uint16_t yint = (y < 0.0 ? 0.0 : y > 1.0 ? 1.0 : y) * 0xFFFF; 16 | // Initial prefix scan round, prime with x and y 17 | uint16_t a = xint ^ yint; 18 | uint16_t b = 0xFFFF ^ a; 19 | uint16_t c = 0xFFFF ^ (xint | yint); 20 | uint16_t d = xint & (yint ^ 0xFFFF); 21 | uint16_t A = a | (b >> 1); 22 | uint16_t B = (a >> 1) ^ a; 23 | uint16_t C = ((c >> 1) ^ (b & (d >> 1))) ^ c; 24 | uint16_t D = ((a & (c >> 1)) ^ (d >> 1)) ^ d; 25 | a = A, b = B, c = C, d = D; 26 | A = ((a & (a >> 2)) ^ (b & (b >> 2))); 27 | B = ((a & (b >> 2)) ^ (b & ((a ^ b) >> 2))); 28 | C ^= ((a & (c >> 2)) ^ (b & (d >> 2))); 29 | D ^= ((b & (c >> 2)) ^ ((a ^ b) & (d >> 2))); 30 | a = A, b = B, c = C, d = D; 31 | A = ((a & (a >> 4)) ^ (b & (b >> 4))); 32 | B = ((a & (b >> 4)) ^ (b & ((a ^ b) >> 4))); 33 | C ^= ((a & (c >> 4)) ^ (b & (d >> 4))); 34 | D ^= ((b & (c >> 4)) ^ ((a ^ b) & (d >> 4))); 35 | // Final round and projection 36 | a = A, b = B, c = C, d = D; 37 | C ^= ((a & (c >> 8)) ^ (b & (d >> 8))); 38 | D ^= ((b & (c >> 8)) ^ ((a ^ b) & (d >> 8))); 39 | // Undo transformation prefix scan 40 | a = C ^ (C >> 1); 41 | b = D ^ (D >> 1); 42 | // Recover index bits 43 | uint32_t i0 = xint ^ yint; 44 | uint32_t i1 = b | (0xFFFF ^ (i0 | a)); 45 | // Interleave the index bits into a fully formed curve 46 | i0 = (i0 | (i0 << 8)) & 0x00ff00ff; 47 | i0 = (i0 | (i0 << 4)) & 0x0f0f0f0f; 48 | i0 = (i0 | (i0 << 2)) & 0x33333333; 49 | i0 = (i0 | (i0 << 1)) & 0x55555555; 50 | i1 = (i1 | (i1 << 8)) & 0x00ff00ff; 51 | i1 = (i1 | (i1 << 4)) & 0x0f0f0f0f; 52 | i1 = (i1 | (i1 << 2)) & 0x33333333; 53 | i1 = (i1 | (i1 << 1)) & 0x55555555; 54 | return (i1 << 1) | i0; 55 | } 56 | 57 | static uint32_t curve_z(double x, double y, double window[4]) { 58 | // Prepare inputs by converting doubles to ints 59 | x = (x - window[0]) / (window[2]-window[0]); 60 | y = (y - window[1]) / (window[3]-window[1]); 61 | uint16_t xint = (x < 0.0 ? 0.0 : x > 1.0 ? 1.0 : x) * 0xFFFF; 62 | uint16_t yint = (y < 0.0 ? 0.0 : y > 1.0 ? 1.0 : y) * 0xFFFF; 63 | uint32_t i0 = xint; 64 | uint32_t i1 = yint; 65 | // Interleave the index bits into a fully formed curve 66 | i0 = (i0 | (i0 << 8)) & 0x00ff00ff; 67 | i0 = (i0 | (i0 << 4)) & 0x0f0f0f0f; 68 | i0 = (i0 | (i0 << 2)) & 0x33333333; 69 | i0 = (i0 | (i0 << 1)) & 0x55555555; 70 | i1 = (i1 | (i1 << 8)) & 0x00ff00ff; 71 | i1 = (i1 | (i1 << 4)) & 0x0f0f0f0f; 72 | i1 = (i1 | (i1 << 2)) & 0x33333333; 73 | i1 = (i1 | (i1 << 1)) & 0x55555555; 74 | return (i1 << 1) | i0; 75 | } 76 | 77 | #endif 78 | -------------------------------------------------------------------------------- /tests/dist.h: -------------------------------------------------------------------------------- 1 | #ifndef DIST_H 2 | #define DIST_H 3 | 4 | #include 5 | 6 | static double haversine(double alat, double alon, double blat, 7 | double blon) 8 | { 9 | double φ1 = alat * (M_PI/180); // φ, λ in radians 10 | double φ2 = blat * (M_PI/180); 11 | double Δφ = (blat-alat) * (M_PI/180); 12 | double Δλ = (blon-alon) * (M_PI/180); 13 | double Δφ2 = sin(Δφ/2); 14 | double Δλ2 = sin(Δλ/2); 15 | double a = Δφ2 * Δφ2 + cos(φ1) * cos(φ2) * Δλ2 * Δλ2; 16 | double c = 2 * atan2(sqrt(a), sqrt(1-a)); 17 | double d = c * 6371e3; // in metres 18 | return d; 19 | } 20 | 21 | // distance on the unit sphere computed using Haversine formula 22 | static double haversine_unit_rad(double φa, double λa, double φb, double λb) { 23 | if (φa == φb && λa == λb) { 24 | return 0; 25 | } 26 | double Δφ = φa - φb; 27 | double Δλ = λa - λb; 28 | double sinΔφ = sin(Δφ / 2); 29 | double sinΔλ = sin(Δλ / 2); 30 | double cosφa = cos(φa); 31 | double cosφb = cos(φb); 32 | return 2 * asin(sqrt(sinΔφ*sinΔφ+sinΔλ*sinΔλ*cosφa*cosφb)); 33 | } 34 | 35 | // Algorithm from: 36 | // Schubert, E., Zimek, A., & Kriegel, H.-P. (2013). 37 | // Geodetic Distance Queries on R-Trees for Indexing Geographic Data. 38 | // Lecture Notes in Computer Science, 146–164. 39 | // doi:10.1007/978-3-642-40235-7_9 40 | static double point_rect_dist_geodetic_rad(double φq, double λq, double φl, 41 | double λl, double φh, double λh) 42 | { 43 | double twoΠ = 2 * M_PI; 44 | double halfΠ = M_PI / 2; 45 | 46 | // Simple case, point or invalid rect 47 | if (φl >= φh && λl >= λh) { 48 | return haversine_unit_rad(φl, λl, φq, λq); 49 | } 50 | 51 | if (λl <= λq && λq <= λh) { 52 | // q is between the bounding meridians of r 53 | // hence, q is north, south or within r 54 | if (φl <= φq && φq <= φh) { // Inside 55 | return 0; 56 | } 57 | 58 | if (φq < φl) { // South 59 | return φl - φq; 60 | } 61 | 62 | return φq - φh; // North 63 | } 64 | 65 | // determine if q is closer to the east or west edge of r to select edge for 66 | // tests below 67 | double Δλe = λl - λq; 68 | double Δλw = λq - λh; 69 | if (Δλe < 0) { 70 | Δλe += twoΠ; 71 | } 72 | if (Δλw < 0) { 73 | Δλw += twoΠ; 74 | } 75 | double Δλ; // distance to closest edge 76 | double λedge; // longitude of closest edge 77 | if (Δλe <= Δλw) { 78 | Δλ = Δλe; 79 | λedge = λl; 80 | } else { 81 | Δλ = Δλw; 82 | λedge = λh; 83 | } 84 | 85 | double sinΔλ = sin(Δλ); 86 | double cosΔλ = cos(Δλ); 87 | double tanφq = tan(φq); 88 | 89 | if (Δλ >= halfΠ) { 90 | // If Δλ > 90 degrees (1/2 pi in radians) we're in one of the corners 91 | // (NW/SW or NE/SE depending on the edge selected). Compare against the 92 | // center line to decide which case we fall into 93 | double φmid = (φh + φl) / 2; 94 | if (tanφq >= tan(φmid)*cosΔλ) { 95 | return haversine_unit_rad(φq, λq, φh, λedge); // North corner 96 | } 97 | return haversine_unit_rad(φq, λq, φl, λedge); // South corner 98 | } 99 | 100 | if (tanφq >= tan(φh)*cosΔλ) { 101 | return haversine_unit_rad(φq, λq, φh, λedge); // North corner 102 | } 103 | 104 | if (tanφq <= tan(φl)*cosΔλ) { 105 | return haversine_unit_rad(φq, λq, φl, λedge); // South corner 106 | } 107 | 108 | // We're to the East or West of the rect, compute distance using cross-track 109 | // Note that this is a simplification of the cross track distance formula 110 | // valid since the track in question is a meridian. 111 | return asin(cos(φq) * sinΔλ); 112 | } 113 | 114 | static double point_rect_dist(double lat, double lon, double minlat, 115 | double minlon, double maxlat, double maxlon) 116 | { 117 | return point_rect_dist_geodetic_rad( 118 | lat*(M_PI/180), lon*(M_PI/180), 119 | minlat*(M_PI/180), minlon*(M_PI/180), 120 | maxlat*(M_PI/180), maxlon*(M_PI/180) 121 | ); 122 | } 123 | 124 | 125 | #endif 126 | -------------------------------------------------------------------------------- /tests/loop_run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | cd $(dirname "${BASH_SOURCE[0]}") 5 | 6 | ./build.sh test_btree 7 | 8 | export MallocNanoZone=0 9 | 10 | while : 11 | do 12 | ./test_btree.out 13 | done -------------------------------------------------------------------------------- /tests/run.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | cd $(dirname "${BASH_SOURCE[0]}") 5 | 6 | OK=0 7 | FAILS= 8 | finish() { 9 | rm -fr *.o 10 | rm -fr *.out 11 | rm -fr *.test 12 | rm -fr *.profraw 13 | rm -fr *.dSYM 14 | rm -fr *.profdata 15 | rm -fr *.c.worker.js 16 | rm -fr *.c.wasm 17 | rm -fr *.out.js 18 | rm -fr *.out.wasm 19 | if [[ "$OK" != "1" ]]; then 20 | echo "FAIL" 21 | fi 22 | } 23 | trap finish EXIT 24 | 25 | rm -rf *.out *.profraw *.profdata *.log 26 | 27 | if [[ "$1" == "bench" ]]; then 28 | ./bench.sh 29 | OK=1 30 | exit 31 | fi 32 | 33 | if [[ "$SEED" == "" ]]; then 34 | export SEED=$RANDOM$RANDOM$RANDOM 35 | fi 36 | echo "SEED=$SEED" 37 | 38 | if [[ "$RACE" != "1" ]]; then 39 | echo "For data race check: 'RACE=1 run.sh'" 40 | fi 41 | 42 | if [[ "$1" != "" ]]; then 43 | ./test.sh $1 44 | else 45 | # test coverage files 46 | for f in test_*.c; do 47 | f="$(echo "$f" | cut -f 1 -d '.')" 48 | if [[ "$(cat $f.c | grep "#define IGNORE")" == "" ]]; then 49 | if [[ "$(cat $f.c | grep "#define NOCOV")" == "" ]]; then 50 | ./test.sh $f 1 51 | fi 52 | fi 53 | done 54 | # test all the non coverage files 55 | for f in test_*.c; do 56 | f="$(echo "$f" | cut -f 1 -d '.')" 57 | if [[ "$(cat $f.c | grep "#define IGNORE")" == "" ]]; then 58 | if [[ "$(cat $f.c | grep "#define NOCOV")" != "" ]]; then 59 | ./test.sh $f 0 60 | fi 61 | fi 62 | done 63 | fi 64 | 65 | OK=1 66 | echo PASSED 67 | 68 | # ./cov.sh 69 | -------------------------------------------------------------------------------- /tests/test.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | set -e 4 | cd $(dirname "${BASH_SOURCE[0]}") 5 | 6 | rm -f a.out 7 | rm -f a.out.js 8 | rm -f a.out.wasm 9 | rm -f $1.out 10 | rm -f $1.out.js 11 | rm -f $1.out.wasm 12 | rm -f default.profraw 13 | rm -f $1.profraw 14 | 15 | ./build.sh "$1" 16 | 17 | export MallocNanoZone=0 18 | if [[ "$VALGRIND" == "1" ]]; then 19 | valgrind --leak-check=yes ./$1.out 20 | elif [[ -f "$1.out.js" ]]; then 21 | node ./$1.out.js 22 | else 23 | ./$1.out 24 | fi 25 | 26 | if [[ $(cat $1.c | grep "#define NOCOV") == "" ]]; then 27 | if [[ -f default.profraw ]]; then 28 | mv default.profraw $1.profraw 29 | ./cov.sh $1 30 | fi 31 | # else 32 | # echo covered: ignored 33 | fi 34 | -------------------------------------------------------------------------------- /tests/test_bsearch.c: -------------------------------------------------------------------------------- 1 | // The actual work is done in "test_base.h" 2 | #define TESTNAME "bsearch" 3 | #define BSEARCH 4 | #include "test_base.h" 5 | -------------------------------------------------------------------------------- /tests/test_counted.c: -------------------------------------------------------------------------------- 1 | // The actual work is done in "test_base.h" 2 | #define TESTNAME "counted" 3 | #define COUNTED 4 | #define LINEAR 5 | #include "test_base.h" 6 | -------------------------------------------------------------------------------- /tests/test_linear.c: -------------------------------------------------------------------------------- 1 | // The actual work is done in "test_base.h" 2 | #define TESTNAME "linear" 3 | #define LINEAR 4 | #include "test_base.h" 5 | -------------------------------------------------------------------------------- /tests/test_nested.c: -------------------------------------------------------------------------------- 1 | // Tests nested trees. 2 | 3 | #define TESTNAME "nested" 4 | #define NOCOV // Not a base. ignore coverage 5 | 6 | #include "testutils.h" 7 | 8 | #define BGEN_BTREE 9 | #define BGEN_NAME bt1 10 | #define BGEN_TYPE int 11 | #define BGEN_COW 12 | #define BGEN_MALLOC { return malloc0(size); } 13 | #define BGEN_FREE { free0(ptr); } 14 | #define BGEN_LESS { return a < b; } 15 | #include "../bgen.h" 16 | 17 | struct col { 18 | atomic_int rc; 19 | char *name; 20 | struct bt1 *tree; 21 | }; 22 | 23 | struct col *col_new(void) { 24 | struct col *col = malloc0(sizeof(struct col)); 25 | assert(col); 26 | memset(col, 0, sizeof(struct col)); 27 | col->name = malloc0(100); 28 | assert(col->name); 29 | col->name[0] = '\0'; 30 | return col; 31 | } 32 | 33 | void col_free(struct col *col, void *udata) { 34 | bt1_clear(&col->tree, udata); 35 | free0(col->name); 36 | free0(col); 37 | } 38 | 39 | bool col_copy(struct col *col, struct col **copy, void *udata) { 40 | struct col *col2 = col_new(); 41 | strcpy(col2->name, col->name); 42 | if (bt1_clone(&col->tree, &col2->tree, udata) != bt1_COPIED) { 43 | col_free(col2, udata); 44 | return false; 45 | } 46 | *copy = col2; 47 | return true; 48 | } 49 | 50 | #define BGEN_BTREE 51 | #define BGEN_NAME bt0 52 | #define BGEN_TYPE struct col* /* pointer to a collection */ 53 | #define BGEN_COW 54 | #define BGEN_MALLOC { return malloc0(size); } 55 | #define BGEN_FREE { free0(ptr); } 56 | #define BGEN_ITEMCOPY { return col_copy(item, copy, udata); } 57 | #define BGEN_ITEMFREE { col_free(item, udata); } 58 | #define BGEN_COMPARE { return strcmp(a->name, b->name); } 59 | #include "../bgen.h" 60 | 61 | void test_clone(void) { 62 | testinit(); 63 | struct bt0 *tree = 0; 64 | for (int i = 0; i < 1000; i++) { 65 | struct col *col = col_new(); 66 | snprintf(col->name, 100, "col:%d", i); 67 | for (int j = 0; j < 1000; j++) { 68 | assert(bt1_insert(&col->tree, j, 0, 0) == bt1_INSERTED); 69 | } 70 | assert(bt0_insert(&tree, col, 0, 0) == bt0_INSERTED); 71 | } 72 | // clone the root 73 | struct bt0 *tree2 = 0; 74 | assert(bt0_clone(&tree, &tree2, 0) == bt0_COPIED); 75 | struct col *col; 76 | assert(bt0_delete(&tree2, &(struct col){.name="col:750"}, &col, 0) == bt0_DELETED); 77 | col_free(col, 0); 78 | assert(bt0_delete(&tree, &(struct col){.name="col:750"}, &col, 0) == bt0_DELETED); 79 | col_free(col, 0); 80 | bt0_clear(&tree2, 0); 81 | bt0_clear(&tree, 0); 82 | checkmem(); 83 | } 84 | 85 | int main(void) { 86 | initrand(); 87 | test_clone(); 88 | return 0; 89 | } 90 | -------------------------------------------------------------------------------- /tests/test_spatial1.c: -------------------------------------------------------------------------------- 1 | // The actual work is done in "test_base.h" 2 | #define TESTNAME "spatial1" 3 | #define SPATIAL 4 | #define LINEAR 5 | #define DIMS 1 6 | #include "test_base.h" 7 | -------------------------------------------------------------------------------- /tests/test_spatial2.c: -------------------------------------------------------------------------------- 1 | // The actual work is done in "test_base.h" 2 | #define TESTNAME "spatial2" 3 | #define SPATIAL 4 | #define LINEAR 5 | #define DIMS 2 6 | #include "test_base.h" 7 | -------------------------------------------------------------------------------- /tests/test_spatial2x.c: -------------------------------------------------------------------------------- 1 | #define TESTNAME "spatial2x" 2 | #define NOCOV 3 | #include "testutils.h" 4 | // #include "cities.h" 5 | #include "curve.h" 6 | 7 | struct point { 8 | int id; 9 | uint32_t curve; 10 | double x; 11 | double y; 12 | }; 13 | 14 | void point_rect(struct point point, double min[], double max[]) { 15 | min[0] = point.x; 16 | max[0] = point.y; 17 | min[1] = point.x; 18 | max[1] = point.y; 19 | } 20 | 21 | int point_compare(struct point a, struct point b) { 22 | return a.curve < b.curve ? -1 : a.curve > b.curve ? 1 : 23 | a.id < b.id ? -1 : a.id > b.id; 24 | } 25 | 26 | static int compare_points(const void *a, const void *b) { 27 | return point_compare(*(struct point*)a, *(struct point*)b); 28 | } 29 | 30 | static void sort_points(struct point *array, size_t numels) { 31 | qsort(array, numels, sizeof(struct point), compare_points); 32 | } 33 | 34 | static void shuffle_points(struct point *array, size_t numels) { 35 | shuffle0(array, numels, sizeof(struct point)); 36 | } 37 | 38 | #define BGEN_NAME kv 39 | #define BGEN_TYPE struct point 40 | #define BGEN_FANOUT 4 41 | #define BGEN_COUNTED 42 | #define BGEN_SPATIAL 43 | #define BGEN_ITEMRECT { point_rect(item, min, max); } 44 | #define BGEN_MAYBELESSEQUAL { return a.curve <= b.curve; } 45 | #define BGEN_COMPARE { return point_compare(a, b); } 46 | #include "../bgen.h" 47 | 48 | 49 | void pitem(struct point item, FILE *file, void *udata) { 50 | (void)udata; 51 | fprintf(file, "(%d %f %f)", item.id, item.x, item.y); 52 | } 53 | 54 | void prtype(double rtype, FILE *file, void *udata) { 55 | (void)udata; 56 | fprintf(file, "%.0f", rtype); 57 | } 58 | 59 | void tree_print(struct kv **root) { 60 | _kv_internal_print(root, stdout, pitem, prtype, 0); 61 | } 62 | 63 | 64 | bool intersects(double amin[], double amax[], double bmin[], double bmax[]) { 65 | int bits = 0; 66 | for (int i = 0; i < 2; i++) { 67 | bits |= bmin[i] > amax[i]; 68 | bits |= bmax[i] < amin[i]; 69 | } 70 | return bits == 0; 71 | } 72 | 73 | struct iiter_ctx { 74 | double *min; 75 | double *max; 76 | struct point *results; 77 | int count; 78 | }; 79 | 80 | bool iiter(struct point point, void *udata) { 81 | struct iiter_ctx *ctx = udata; 82 | // printf("%d: id=%d point=( %f %f )\n", ctx->count, point.id, point.x, point.y); 83 | ctx->results[ctx->count++] = point; 84 | return true; 85 | } 86 | 87 | bool siter(struct point point, void *udata) { 88 | struct iiter_ctx *ctx = udata; 89 | double min[2], max[2]; 90 | point_rect(point, min, max); 91 | if (intersects(min, max, ctx->min, ctx->max)) { 92 | if (!iiter(point, udata)) { 93 | return false; 94 | } 95 | } 96 | return true; 97 | } 98 | 99 | bool iiter_ctx_equal(struct iiter_ctx a, struct iiter_ctx b) { 100 | if (a.count != b.count) { 101 | return false; 102 | } 103 | for (int i = 0; i < a.count; i++) { 104 | if (point_compare(a.results[i], b.results[i]) != 0) { 105 | // printf("%d %d\n", a.results[i].id, b.results[i].id); 106 | return false; 107 | } 108 | } 109 | return true; 110 | } 111 | 112 | void test_intersects(void) { 113 | testinit(); 114 | struct kv *tree = 0; 115 | double start = now(); 116 | // int run = 0; 117 | while (now() - start < 1.0) { 118 | int npoints; 119 | switch (rand()%10) { 120 | case 0: 121 | npoints = rand_double()*10; 122 | break; 123 | case 1: 124 | npoints = rand_double()*1000; 125 | break; 126 | default: 127 | npoints = rand_double()*500; 128 | } 129 | 130 | // printf("\033[1mRUN %d\033[0m\n", run); 131 | struct point *points = malloc(sizeof(struct point)*npoints); 132 | assert(points); 133 | double window[4] = { -180.0, -90.0, 180.0, 90.0 }; 134 | for (int i = 0; i < npoints; i++) { 135 | points[i].id = i; 136 | points[i].x = rand_double() * 360.0 - 180.0; 137 | points[i].y = rand_double() * 180.0 - 90.0; 138 | points[i].curve = curve_hilbert(points[i].x, points[i].y, window); 139 | } 140 | shuffle_points(points, npoints); 141 | struct point val; 142 | for (int i = 0; i < npoints; i++) { 143 | val.id = -1; 144 | assert(kv_insert(&tree, points[i], &val, 0) == kv_INSERTED); 145 | } 146 | struct point *results1 = malloc(sizeof(struct point)*npoints); 147 | assert(results1); 148 | struct point *results2 = malloc(sizeof(struct point)*npoints); 149 | assert(results2); 150 | struct point *results3 = malloc(sizeof(struct point)*npoints); 151 | assert(results3); 152 | for (int i = 0; i < 100; i++) { 153 | // printf("\033[1;33m== %d ==\033[0m\n", i); 154 | double min[] = { 155 | rand_double() * 360.0 - 180.0, 156 | rand_double() * 180.0 - 90.0 157 | }; 158 | double max[] = { 159 | min[0] + rand_double() * 10.0, 160 | min[1] + rand_double() * 10.0 161 | }; 162 | // printf("\033[1;33m( %f %f %f %f )\033[0m\n", min[0], min[1], max[0], max[1]); 163 | // printf("\033[1;34m>>> scan\033[0m\n"); 164 | struct iiter_ctx ctx1 = { .min = min, .max = max, .results = results1 }; 165 | kv_scan(&tree, siter, &ctx1); 166 | // printf("\033[1;34m>>> intersects\033[0m\n"); 167 | struct iiter_ctx ctx2 = { .min = min, .max = max, .results = results2 }; 168 | kv_intersects(&tree, min, max, iiter, &ctx2); 169 | // printf("\033[1;34m>>> iter_intersects\033[0m\n"); 170 | struct iiter_ctx ctx3 = { .min = min, .max = max, .results = results3 }; 171 | struct kv_iter *iter; 172 | kv_iter_init(&tree, &iter, 0); 173 | kv_iter_intersects(iter, min, max); 174 | for (; kv_iter_valid(iter); kv_iter_next(iter)) { 175 | struct point point; 176 | kv_iter_item(iter, &point); 177 | siter(point, &ctx3); 178 | } 179 | kv_iter_release(iter); 180 | assert(iiter_ctx_equal(ctx1, ctx2)); 181 | if (!iiter_ctx_equal(ctx2, ctx3)) { 182 | tree_print(&tree); 183 | } 184 | assert(iiter_ctx_equal(ctx2, ctx3)); 185 | } 186 | free(results1); 187 | free(results2); 188 | free(results3); 189 | free(points); 190 | kv_clear(&tree, 0); 191 | // run++; 192 | } 193 | checkmem(); 194 | } 195 | 196 | // void city_fillrect(struct city_entry city, double min[], double max[]) { 197 | // min[0] = city.lon; 198 | // min[1] = city.lat; 199 | // max[0] = city.lon; 200 | // max[1] = city.lat; 201 | // } 202 | 203 | // #define BGEN_NAME cities 204 | // #define BGEN_TYPE struct city_entry 205 | // #define BGEN_FANOUT 16 206 | // #define BGEN_SPATIAL 207 | // #define BGEN_ITEMRECT city_fillrect(item, min, max); 208 | // #define BGEN_LESS return a.id < b.id; 209 | // #include "../bgen.h" 210 | 211 | 212 | // void print_rects(struct cities *node, int depth) { 213 | // if (node->isleaf) { 214 | // return; 215 | // } 216 | // for (int i = 0; i <= node->len; i++) { 217 | // double xmin = node->rects[i].min[0]; 218 | // double ymin = node->rects[i].min[1]; 219 | // double xmax = node->rects[i].max[0]; 220 | // double ymax = node->rects[i].max[1]; 221 | // for (int j = 0; j < depth; j++) { 222 | // printf(" "); 223 | // } 224 | // printf("(%f %f %f %f)\n", xmin, ymin, xmax, ymax); 225 | // print_rects(node->children[i], depth+1); 226 | // } 227 | // } 228 | 229 | // void test_svg(void) { 230 | // testinit(); 231 | // struct cities *cities = 0; 232 | // for (int i = 0; i < NCITIES; i++) { 233 | // assert(cities_insert(&cities, all_cities[i], 0, 0) == cities_INSERTED); 234 | // } 235 | 236 | // print_rects(cities, 0); 237 | 238 | // // all_cities 239 | // checkmem(); 240 | 241 | // } 242 | 243 | int main(void) { 244 | initrand(); 245 | test_intersects(); 246 | // test_svg(); 247 | return 0; 248 | } 249 | -------------------------------------------------------------------------------- /tests/test_spatial3.c: -------------------------------------------------------------------------------- 1 | // The actual work is done in "test_base.h" 2 | #define TESTNAME "spatial3" 3 | #define SPATIAL 4 | #define LINEAR 5 | #define DIMS 3 6 | #include "test_base.h" 7 | -------------------------------------------------------------------------------- /tests/test_vector.c: -------------------------------------------------------------------------------- 1 | #define TESTNAME "vector" 2 | #define NOCOV // Not a base. ignore coverage 3 | #include "testutils.h" 4 | 5 | #define BGEN_NAME kv 6 | #define BGEN_TYPE int 7 | #define BGEN_COW 8 | #define BGEN_COUNTED 9 | #define BGEN_ASSERT 10 | #define BGEN_FANOUT 16 11 | #define BGEN_MALLOC return malloc0(size); 12 | #define BGEN_FREE free0(ptr); 13 | #define BGEN_NOORDER 14 | #include "../bgen.h" 15 | 16 | static __thread int val = -1; 17 | static __thread struct kv *tree = 0; 18 | static __thread int *keys = 0; 19 | static __thread int nkeys = 1000; // do not change this value 20 | static __thread int asum = 0; 21 | 22 | void initkeys(void) { 23 | keys = (int*)malloc(nkeys * sizeof(int)); 24 | assert(keys); 25 | for (int i = 0; i < nkeys; i++) { 26 | keys[i] = i*10; 27 | asum += keys[i]; 28 | } 29 | } 30 | 31 | void pitem(int item, FILE *file, void *udata) { 32 | (void)udata; 33 | fprintf(file, "%d", item); 34 | } 35 | 36 | void prtype(double rtype, FILE *file, void *udata) { 37 | (void)udata; 38 | fprintf(file, "%.0f", rtype); 39 | } 40 | 41 | void tree_print(struct kv **root) { 42 | _kv_internal_print(root, stdout, pitem, prtype, 0); 43 | } 44 | 45 | void tree_print_dim(struct kv **root) { 46 | printf("\033[2m"); 47 | tree_print(root); 48 | printf("\033[0m"); 49 | } 50 | 51 | void tree_fill(void) { 52 | shuffle(keys, nkeys); 53 | assert(kv_insert(&tree, keys[0], 0, 0) == kv_UNSUPPORTED); 54 | for (int i = 0; i < nkeys; i++) { 55 | assert(kv_push_back(&tree, keys[i], 0) == kv_INSERTED); 56 | } 57 | } 58 | 59 | void test_basic(void) { 60 | testinit(); 61 | 62 | tree_fill(); 63 | assert(kv_sane(&tree, 0)); 64 | 65 | for (int i = 0; i < nkeys; i++) { 66 | val = -1; 67 | assert(kv_get_at(&tree, i, &val, 0) == kv_FOUND); 68 | // printf("%d %d\n", val, keys[i]); 69 | assert(val == keys[i]); 70 | } 71 | 72 | for (int i = 0; i < nkeys; i++) { 73 | val = -1; 74 | assert(kv_get_at(&tree, i, &val, 0) == kv_FOUND); 75 | int val2 = -1; 76 | assert(kv_replace_at(&tree, i, keys[i]+1, &val2, 0) == kv_REPLACED); 77 | assert(val == val2); 78 | assert(kv_sane(&tree, 0)); 79 | } 80 | 81 | kv_clear(&tree, 0); 82 | checkmem(); 83 | 84 | } 85 | 86 | int main(void) { 87 | initrand(); 88 | initkeys(); 89 | 90 | test_basic(); 91 | 92 | free(keys); 93 | 94 | return 0; 95 | } 96 | -------------------------------------------------------------------------------- /tests/testutils.h: -------------------------------------------------------------------------------- 1 | #ifndef TESTUTILS_H 2 | #define TESTUTILS_H 3 | 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #pragma GCC diagnostic push 15 | #pragma GCC diagnostic ignored "-Wunused-function" 16 | 17 | int64_t crand(void) { 18 | uint64_t seed = 0; 19 | FILE *urandom = fopen("/dev/urandom", "r"); 20 | assert(urandom); 21 | assert(fread(&seed, sizeof(uint64_t), 1, urandom)); 22 | fclose(urandom); 23 | return (int64_t)(seed>>1); 24 | } 25 | 26 | static void seedrand(void) { 27 | srand(crand()); 28 | } 29 | 30 | static void shuffle0(void *array, size_t numels, size_t elsize) { 31 | if (numels < 2) return; 32 | char tmp[512]; 33 | assert(sizeof(tmp) >= elsize); 34 | char *arr = (char*)array; 35 | for (size_t i = 0; i < numels - 1; i++) { 36 | int j = i + rand() / (RAND_MAX / (numels - i) + 1); 37 | memcpy(tmp, arr + j * elsize, elsize); 38 | memcpy(arr + j * elsize, arr + i * elsize, elsize); 39 | memcpy(arr + i * elsize, tmp, elsize); 40 | } 41 | } 42 | 43 | static double rand_double(void) { 44 | return (double)rand() / (double)RAND_MAX; 45 | } 46 | 47 | static int compare_ints(const void *a, const void *b) { 48 | return *(int*)a < *(int*)b ? -1 : *(int*)a > *(int*)b; 49 | } 50 | 51 | static void sort(int *array, size_t numels) { 52 | qsort(array, numels, sizeof(int), compare_ints); 53 | } 54 | 55 | static void shuffle(int *array, size_t numels) { 56 | shuffle0(array, numels, sizeof(int)); 57 | } 58 | 59 | static double now(void) { 60 | struct timespec now; 61 | clock_gettime(CLOCK_MONOTONIC, &now); 62 | return (now.tv_sec*1e9 + now.tv_nsec) / 1e9; 63 | } 64 | 65 | static char *commaize(unsigned long long n) { 66 | char s1[64]; 67 | char *s2 = (char*)malloc(64); 68 | assert(s2); 69 | memset(s2, 0, sizeof(64)); 70 | snprintf(s1, sizeof(s1), "%llu", n); 71 | int i = strlen(s1)-1; 72 | int j = 0; 73 | while (i >= 0) { 74 | if (j%3 == 0 && j != 0) { 75 | memmove(s2+1, s2, strlen(s2)+1); 76 | s2[0] = ','; 77 | } 78 | memmove(s2+1, s2, strlen(s2)+1); 79 | s2[0] = s1[i]; 80 | i--; 81 | j++; 82 | } 83 | return s2; 84 | } 85 | 86 | #define bench_print_mem_rounds_scale(n, start, end, mstart, mend, rounds, scale) { \ 87 | double fscale = (double)(n) / (double)(scale); \ 88 | double elapsed = ((end) - (start)) * fscale; \ 89 | double nsop = elapsed/(double)(n)/(double)(rounds)*1e9; \ 90 | char *pops = commaize((n)); \ 91 | char *psec = commaize((double)(n)*(double)(rounds)/elapsed); \ 92 | printf("%10s ops in %7.3f secs %8.1f ns/op %13s op/sec", \ 93 | pops, elapsed, nsop, psec); \ 94 | if ((mstart) < (mend)) { \ 95 | printf(" %7.2f bytes/op", (double)((mend)-(mstart))/(double)(n)); \ 96 | } \ 97 | if ((n) != (scale)) { \ 98 | printf(" *"); \ 99 | } \ 100 | printf("\n"); \ 101 | } 102 | 103 | #define bench_print_mem(n, start, end, mstart, mend) { \ 104 | bench_print_mem_rounds_scale(n, start, end, mstart, mend, 1, n); \ 105 | } 106 | 107 | #define bench_print_rounds(n, start, end, rounds) { \ 108 | bench_print_mem_rounds_scale(n, start, end, 0, 0, rounds, n); \ 109 | } 110 | 111 | #define bench_print(n, start, end) { \ 112 | bench_print_mem_rounds_scale(n, start, end, 0, 0, 1, n); \ 113 | } 114 | 115 | #define bench_print_scale(n, start, end, scale) { \ 116 | bench_print_mem_rounds_scale(n, start, end, 0, 0, 1, scale); \ 117 | } 118 | 119 | 120 | static atomic_size_t nallocs = 0; 121 | static atomic_size_t mtotal = 0; 122 | 123 | static void *malloc0(size_t size) { 124 | char *cptr = (char*)malloc(16+size); 125 | if (!cptr) { 126 | return 0; 127 | } 128 | *(size_t*)cptr = size; 129 | atomic_fetch_add(&mtotal, size); 130 | atomic_fetch_add(&nallocs, 1); 131 | return cptr+16; 132 | } 133 | 134 | static void *realloc0(void *ptr, size_t size) { 135 | if (!ptr) { 136 | return malloc0(size); 137 | } 138 | char *cptr = (char*)realloc(((char*)ptr)-16, 16+size); 139 | if (!cptr) { 140 | return 0; 141 | } 142 | atomic_fetch_sub(&mtotal, *(size_t*)cptr); 143 | atomic_fetch_sub(&mtotal, size); 144 | *(size_t*)cptr = size; 145 | return cptr+16; 146 | } 147 | 148 | static void free0(void *ptr) { 149 | if (!ptr) { 150 | return; 151 | } 152 | char *cptr = ((char*)ptr)-16; 153 | atomic_fetch_sub(&mtotal, *(size_t*)cptr); 154 | atomic_fetch_sub(&nallocs, 1); 155 | free(((char*)ptr)-16); 156 | } 157 | 158 | static void *calloc0(size_t n, size_t size) { 159 | // return calloc(n, size); 160 | void *ptr = malloc0(size*n); 161 | if (!ptr) { 162 | return 0; 163 | } 164 | memset(ptr, 0, size*n); 165 | return ptr; 166 | } 167 | 168 | static void checkmem(void) { 169 | if (atomic_load(&nallocs) > 0 || atomic_load(&mtotal) > 0) { 170 | fprintf(stderr, "test failed: %d unfreed allocations, %d bytes\n", 171 | (int)atomic_load(&nallocs), (int)atomic_load(&mtotal)); 172 | exit(1); 173 | } 174 | } 175 | 176 | static void initrand(void) { 177 | uint64_t seed; 178 | if (getenv("SEED")) { 179 | seed = strtoull(getenv("SEED"), 0, 10); 180 | } else { 181 | seed = crand(); 182 | printf("SEED=%llu\n", (unsigned long long)seed); 183 | } 184 | // printf("SEED=%llu\n", (unsigned long long)seed); 185 | srand(seed); 186 | } 187 | 188 | #ifdef TESTNAME 189 | static const char *testname = TESTNAME; 190 | #else 191 | static const char *testname = ""; 192 | #endif 193 | 194 | #define testinit() { \ 195 | (void)now, (void)commaize, (void)realloc0, (void)calloc0, (void)free0; \ 196 | (void)seedrand, (void)sort, (void)shuffle, (void)checkmem; \ 197 | (void)rand_double; \ 198 | if (strstr(__func__, "test_") == __func__) { \ 199 | char name[250]; \ 200 | snprintf(name, sizeof(name), "test_%s_%s", testname, \ 201 | strstr(__func__, "test_")+5); \ 202 | fprintf(stderr, "%s\n", name); \ 203 | } else { \ 204 | fprintf(stderr, "%s\n", __func__); \ 205 | } \ 206 | } 207 | 208 | #endif 209 | --------------------------------------------------------------------------------