├── .gitignore ├── .gitmodules ├── LICENSE ├── Makefile ├── README.md ├── include ├── bplus.h └── private │ ├── compressor.h │ ├── errors.h │ ├── pages.h │ ├── tree.h │ ├── utils.h │ ├── values.h │ └── writer.h ├── src ├── bplus.c ├── pages.c ├── utils.c ├── values.c └── writer.c └── test ├── bench-basic.cc ├── bench-bulk.cc ├── bench-multithread-get.cc ├── test-api.cc ├── test-bulk.cc ├── test-corruption.cc ├── test-range.cc ├── test-reopen.cc ├── test-threaded-rw.cc └── test.h /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.d 3 | *.a 4 | *.dSYM/ 5 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "external/snappy"] 2 | path = external/snappy 3 | url = https://github.com/google/snappy 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This software is licensed under the MIT License. 2 | 3 | Copyright (C) 2016 National Cheng Kung University, Taiwan. 4 | Copyright (C) 2012 Fedor Indutny. 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a 7 | copy of this software and associated documentation files (the 8 | "Software"), to deal in the Software without restriction, including 9 | without limitation the rights to use, copy, modify, merge, publish, 10 | distribute, sublicense, and/or sell copies of the Software, and to permit 11 | persons to whom the Software is furnished to do so, subject to the 12 | following conditions: 13 | 14 | The above copyright notice and this permission notice shall be included 15 | in all copies or substantial portions of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 18 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 20 | NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 21 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 22 | OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 23 | USE OR OTHER DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Configurable options 2 | # MODE = release | debug (default: debug) 3 | # SNAPPY = 0 | 1 (default: 1) 4 | # 5 | CSTDFLAG = --std=c99 -pedantic -Wall -Wextra -Wno-unused-parameter 6 | CPPFLAGS += -fPIC -Iinclude -Iexternal/snappy 7 | CPPFLAGS += -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 8 | CPPFLAGS += -D_XOPEN_SOURCE=500 -D_DARWIN_C_SOURCE 9 | LDFLAGS += -lpthread 10 | 11 | ifeq ($(MODE),release) 12 | CPPFLAGS += -O3 13 | DEFINES += -DNDEBUG 14 | else 15 | CFLAGS += -g 16 | endif 17 | 18 | # run make with SNAPPY=0 to turn it off 19 | ifneq ($(SNAPPY),0) 20 | DEFINES += -DBP_USE_SNAPPY=1 21 | else 22 | DEFINES += -DBP_USE_SNAPPY=0 23 | endif 24 | 25 | all: external/snappy/config.status bplus.a 26 | 27 | external/snappy/config.status: 28 | (git submodule init && git submodule update && cd external/snappy) 29 | (cd external/snappy && ./autogen.sh && ./configure) 30 | 31 | OBJS = 32 | 33 | ifneq ($(SNAPPY),0) 34 | OBJS += external/snappy/snappy-sinksource.o 35 | OBJS += external/snappy/snappy.o 36 | OBJS += external/snappy/snappy-c.o 37 | endif 38 | 39 | OBJS += src/utils.o 40 | OBJS += src/writer.o 41 | OBJS += src/values.o 42 | OBJS += src/pages.o 43 | OBJS += src/bplus.o 44 | 45 | deps := $(OBJS:%.o=%.o.d) 46 | 47 | bplus.a: $(OBJS) 48 | $(AR) rcs bplus.a $(OBJS) 49 | 50 | src/%.o: src/%.c 51 | $(CC) $(CFLAGS) $(CSTDFLAG) $(CPPFLAGS) $(DEFINES) \ 52 | -o $@ -MMD -MF $@.d -c $< 53 | 54 | external/snappy/%.o: external/snappy/%.cc 55 | $(CC) $(CFLAGS) $(CPPFLAGS) -c $< -o $@ 56 | 57 | TESTS = 58 | TESTS += test/test-api 59 | TESTS += test/test-reopen 60 | TESTS += test/test-range 61 | TESTS += test/test-corruption 62 | TESTS += test/test-bulk 63 | TESTS += test/test-threaded-rw 64 | TESTS += test/bench-basic 65 | TESTS += test/bench-bulk 66 | TESTS += test/bench-multithread-get 67 | 68 | check: $(TESTS) 69 | @test/test-api 70 | @test/test-reopen 71 | @test/test-range 72 | @test/test-bulk 73 | @test/test-corruption 74 | @test/test-threaded-rw 75 | 76 | test/%: test/%.cc bplus.a 77 | $(CXX) $(CFLAGS) $(CPPFLAGS) $< -o $@ bplus.a $(LDFLAGS) 78 | 79 | clean: 80 | @rm -f bplus.a 81 | @rm -f $(OBJS) $(TESTS) $(deps) 82 | 83 | .PHONY: all check clean 84 | 85 | -include $(deps) 86 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # What is B+ Tree? 2 | A B-tree is a self-balancing tree data structure that maintains sorted data and allows searches, sequential access, insertions, and deletions in logarithmic time. The B-tree generalizes the binary search tree, allowing for nodes with more than two children. 3 | 4 | For more info [Click](https://en.wikipedia.org/wiki/B-tree#:~:text=In%20computer%20science%2C%20a%20B,with%20more%20than%20two%20children) . 5 | 6 | # Implementation of B+ tree using C 7 | 8 | ## Depedency 9 | The implementation depends on Google's [snappy](https://github.com/google/snappy) library for fast compression. 10 | 11 | ## Build 12 | ```bash 13 | make MODE=release 14 | ``` 15 | optionally, you can execute test suite: 16 | ```bash 17 | make check 18 | ``` 19 | 20 | ## Usage 21 | 22 | ```C 23 | #include 24 | #include 25 | 26 | #include "bplus.h" 27 | 28 | int main(void) { 29 | bp_db_t db; 30 | 31 | /* Open database */ 32 | bp_open(&db, "/tmp/1.bp"); 33 | 34 | /* Set some value */ 35 | bp_sets(&db, "key", "value"); 36 | 37 | /* Get some value */ 38 | bp_value_t value; 39 | bp_gets(&db, "key", &value); 40 | fprintf(stdout, "%s\n", value.value); 41 | free(value.value) 42 | 43 | /* Close database */ 44 | bp_close(&db); 45 | } 46 | ``` 47 | 48 | ## Benchmarks 49 | 50 | One-threaded read/write (in non-empty database): 51 | 52 | ``` 53 | 100000 items in db 54 | write : 9865.357599 ops/sec 55 | read : 57383.402903 ops/sec 56 | ``` 57 | 58 | Multi-threaded read (2 cores, 4 threads): 59 | 60 | ``` 61 | 100000 items in db 62 | get : 128841.821540 ops/sec 63 | ``` 64 | 65 | Compaction/Removal: 66 | 67 | ``` 68 | 500000 items in db 69 | compact: 23.143330s 70 | remove : 16827.957592 ops/sec 71 | ``` 72 | 73 | And bulk insertion ~ `120000 items/sec` . 74 | 75 | ## Advanced build options 76 | 77 | ```bash 78 | make MODE=debug # build with enabled assertions 79 | make SNAPPY=0 # build without snappy (no compression will be used) 80 | ``` 81 | 82 | ## Licensing 83 | --------- 84 | `bplus-tree` is freely redistributable under MIT X License. 85 | Use of this source code is governed by a MIT license that can be found 86 | in the `LICENSE` file. 87 | -------------------------------------------------------------------------------- /include/bplus.h: -------------------------------------------------------------------------------- 1 | #ifndef _BPLUS_H_ 2 | #define _BPLUS_H_ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #define BP_PADDING 64 9 | 10 | #define BP_KEY_FIELDS \ 11 | uint64_t length; \ 12 | char *value; 13 | 14 | #include /* uintx_t */ 15 | #include "private/errors.h" 16 | 17 | typedef struct bp_db_s bp_db_t; 18 | 19 | typedef struct bp_key_s bp_key_t; 20 | typedef struct bp_key_s bp_value_t; 21 | 22 | typedef int (*bp_compare_cb)(const bp_key_t *a, const bp_key_t *b); 23 | typedef int (*bp_update_cb)(void *arg, 24 | const bp_value_t *previous, 25 | const bp_value_t *value); 26 | typedef int (*bp_remove_cb)(void *arg, 27 | const bp_value_t *value); 28 | typedef void (*bp_range_cb)(void *arg, 29 | const bp_key_t *key, 30 | const bp_value_t *value); 31 | typedef int (*bp_filter_cb)(void* arg, const bp_key_t *key); 32 | 33 | #include "private/tree.h" 34 | 35 | /* 36 | * Open and close database 37 | */ 38 | int bp_open(bp_db_t *tree, const char *filename); 39 | int bp_close(bp_db_t *tree); 40 | 41 | /* 42 | * Get one value by key 43 | */ 44 | int bp_get(bp_db_t *tree, const bp_key_t *key, bp_value_t *value); 45 | int bp_gets(bp_db_t *tree, const char *key, char **value); 46 | 47 | /* 48 | * Get previous value 49 | */ 50 | int bp_get_previous(bp_db_t *tree, 51 | const bp_value_t *value, 52 | bp_value_t *previous); 53 | 54 | /* 55 | * Set one value by key (without solving conflicts, overwrite) 56 | */ 57 | int bp_set(bp_db_t *tree, 58 | const bp_key_t *key, 59 | const bp_value_t *value); 60 | int bp_sets(bp_db_t *tree, 61 | const char *key, 62 | const char *value); 63 | 64 | /* 65 | * Update or create value by key (with solving conflicts) 66 | */ 67 | int bp_update(bp_db_t *tree, 68 | const bp_key_t *key, 69 | const bp_value_t *value, 70 | bp_update_cb update_cb, 71 | void *arg); 72 | int bp_updates(bp_db_t *tree, 73 | const char *key, 74 | const char *value, 75 | bp_update_cb update_cb, 76 | void *arg); 77 | 78 | /* 79 | * Set multiple values by keys 80 | */ 81 | int bp_bulk_set(bp_db_t *tree, 82 | const uint64_t count, 83 | const bp_key_t **keys, 84 | const bp_value_t **values); 85 | int bp_bulk_sets(bp_db_t *tree, 86 | const uint64_t count, 87 | const char **keys, 88 | const char **values); 89 | 90 | /* 91 | * Update multiple values by keys 92 | */ 93 | int bp_bulk_update(bp_db_t *tree, 94 | const uint64_t count, 95 | const bp_key_t **keys, 96 | const bp_value_t **values, 97 | bp_update_cb update_cb, 98 | void *arg); 99 | int bp_bulk_updates(bp_db_t *tree, 100 | const uint64_t count, 101 | const char **keys, 102 | const char **values, 103 | bp_update_cb update_cb, 104 | void *arg); 105 | 106 | /* 107 | * Remove one value by key 108 | */ 109 | int bp_remove(bp_db_t *tree, const bp_key_t *key); 110 | int bp_removes(bp_db_t *tree, const char *key); 111 | 112 | /* 113 | * Remove value by key only if it's equal to specified one 114 | */ 115 | int bp_removev(bp_db_t *tree, 116 | const bp_key_t *key, 117 | bp_remove_cb remove_cb, 118 | void *arg); 119 | int bp_removevs(bp_db_t *tree, 120 | const char *key, 121 | bp_remove_cb remove_cb, 122 | void *arg); 123 | 124 | /* 125 | * Get all values in range 126 | * Note: value will be automatically freed after invokation of callback 127 | */ 128 | int bp_get_range(bp_db_t *tree, 129 | const bp_key_t *start, 130 | const bp_key_t *end, 131 | bp_range_cb cb, 132 | void *arg); 133 | int bp_get_ranges(bp_db_t *tree, 134 | const char *start, 135 | const char *end, 136 | bp_range_cb cb, 137 | void *arg); 138 | 139 | /* 140 | * Get values in range (with custom key-filter) 141 | * Note: value will be automatically freed after invokation of callback 142 | */ 143 | int bp_get_filtered_range(bp_db_t *tree, 144 | const bp_key_t *start, 145 | const bp_key_t *end, 146 | bp_filter_cb filter, 147 | bp_range_cb cb, 148 | void *arg); 149 | int bp_get_filtered_ranges(bp_db_t *tree, 150 | const char *start, 151 | const char *end, 152 | bp_filter_cb filter, 153 | bp_range_cb cb, 154 | void *arg); 155 | 156 | /* 157 | * Run compaction on database 158 | */ 159 | int bp_compact(bp_db_t *tree); 160 | 161 | /* 162 | * Set compare function to define order of keys in database 163 | */ 164 | void bp_set_compare_cb(bp_db_t *tree, bp_compare_cb cb); 165 | 166 | /* 167 | * Ensure that all data is written to disk 168 | */ 169 | int bp_fsync(bp_db_t *tree); 170 | 171 | struct bp_db_s { 172 | BP_TREE_PRIVATE 173 | }; 174 | 175 | struct bp_key_s { 176 | BP_KEY_FIELDS 177 | BP_KEY_PRIVATE 178 | }; 179 | 180 | #ifdef __cplusplus 181 | } // extern "C" 182 | #endif 183 | 184 | #endif /* _BPLUS_H_ */ 185 | -------------------------------------------------------------------------------- /include/private/compressor.h: -------------------------------------------------------------------------------- 1 | #ifndef _PRIVATE_COMPRESSOR_H_ 2 | #define _PRIVATE_COMPRESSOR_H_ 3 | 4 | #include "private/errors.h" 5 | 6 | #include /* size_t */ 7 | 8 | #ifdef __cplusplus 9 | extern "C" { 10 | #endif 11 | 12 | #if BP_USE_SNAPPY == 1 13 | #include 14 | 15 | static inline 16 | size_t bp__max_compressed_size(size_t size) 17 | { 18 | return snappy_max_compressed_length(size); 19 | } 20 | 21 | static inline 22 | int bp__compress(const char *input, 23 | size_t input_length, 24 | char *compressed, 25 | size_t *compressed_length) 26 | { 27 | int ret = snappy_compress(input, input_length, 28 | compressed, compressed_length); 29 | return ret == SNAPPY_OK ? BP_OK : BP_ECOMP; 30 | } 31 | 32 | static inline 33 | int bp__uncompressed_length(const char *compressed, 34 | size_t compressed_length, 35 | size_t *result) 36 | { 37 | int ret = snappy_uncompressed_length(compressed, 38 | compressed_length, result); 39 | return ret == SNAPPY_OK ? BP_OK : BP_EDECOMP; 40 | } 41 | 42 | static inline 43 | int bp__uncompress(const char *compressed, 44 | size_t compressed_length, 45 | char *uncompressed, 46 | size_t *uncompressed_length) 47 | { 48 | int ret = snappy_uncompress(compressed, 49 | compressed_length, 50 | uncompressed, 51 | uncompressed_length); 52 | 53 | return ret == SNAPPY_OK ? BP_OK : BP_EDECOMP; 54 | } 55 | #else 56 | #include /* memcpy */ 57 | 58 | static inline 59 | size_t bp__max_compressed_size(size_t size) 60 | { 61 | return size; 62 | } 63 | 64 | static inline 65 | int bp__compress(const char *input, 66 | size_t input_length, 67 | char *compressed, 68 | size_t *compressed_length) 69 | { 70 | memcpy(compressed, input, input_length); 71 | *compressed_length = input_length; 72 | return BP_OK; 73 | } 74 | 75 | static inline 76 | int bp__uncompressed_length(const char *compressed, 77 | size_t compressed_length, 78 | size_t *result) 79 | { 80 | *result = compressed_length; 81 | return BP_OK; 82 | } 83 | 84 | static inline 85 | int bp__uncompress(const char *compressed, 86 | size_t compressed_length, 87 | char *uncompressed, 88 | size_t *uncompressed_length) 89 | { 90 | memcpy(uncompressed, compressed, compressed_length); 91 | *uncompressed_length = compressed_length; 92 | return BP_OK; 93 | } 94 | 95 | #endif /* BP_USE_SNAPPY */ 96 | 97 | #ifdef __cplusplus 98 | } /* extern "C" */ 99 | #endif 100 | 101 | #endif /* _PRIVATE_COMPRESSOR_H_ */ 102 | -------------------------------------------------------------------------------- /include/private/errors.h: -------------------------------------------------------------------------------- 1 | #ifndef _PRIVATE_ERRORS_H_ 2 | #define _PRIVATE_ERRORS_H_ 3 | 4 | #define BP_OK 0 5 | 6 | #define BP_EFILE 0x101 7 | #define BP_EFILEREAD_OOB 0x102 8 | #define BP_EFILEREAD 0x103 9 | #define BP_EFILEWRITE 0x104 10 | #define BP_EFILEFLUSH 0x105 11 | #define BP_EFILERENAME 0x106 12 | #define BP_ECOMPACT_EXISTS 0x107 13 | 14 | #define BP_ECOMP 0x201 15 | #define BP_EDECOMP 0x202 16 | 17 | #define BP_EALLOC 0x301 18 | #define BP_EMUTEX 0x302 19 | #define BP_ERWLOCK 0x303 20 | 21 | #define BP_ENOTFOUND 0x401 22 | #define BP_ESPLITPAGE 0x402 23 | #define BP_EEMPTYPAGE 0x403 24 | #define BP_EUPDATECONFLICT 0x404 25 | #define BP_EREMOVECONFLICT 0x405 26 | 27 | #endif /* _PRIVATE_ERRORS_H_ */ 28 | -------------------------------------------------------------------------------- /include/private/pages.h: -------------------------------------------------------------------------------- 1 | #ifndef _PRIVATE_PAGES_H_ 2 | #define _PRIVATE_PAGES_H_ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "private/tree.h" 9 | #include "private/values.h" 10 | 11 | typedef struct bp__page_s bp__page_t; 12 | typedef struct bp__page_search_res_s bp__page_search_res_t; 13 | 14 | enum page_type { 15 | kPage = 0, 16 | kLeaf = 1 17 | }; 18 | 19 | enum search_type { 20 | kNotLoad = 0, 21 | kLoad = 1 22 | }; 23 | 24 | int bp__page_create(bp_db_t *t, 25 | const enum page_type type, 26 | const uint64_t offset, 27 | const uint64_t config, 28 | bp__page_t **page); 29 | void bp__page_destroy(bp_db_t *t, bp__page_t *page); 30 | int bp__page_clone(bp_db_t *t, bp__page_t *page, bp__page_t **clone); 31 | 32 | int bp__page_read(bp_db_t *t, bp__page_t *page); 33 | int bp__page_load(bp_db_t *t, 34 | const uint64_t offset, 35 | const uint64_t config, 36 | bp__page_t **page); 37 | int bp__page_save(bp_db_t *t, bp__page_t *page); 38 | 39 | int bp__page_load_value(bp_db_t *t, 40 | bp__page_t *page, 41 | const uint64_t index, 42 | bp_value_t *value); 43 | int bp__page_save_value(bp_db_t *t, 44 | bp__page_t *page, 45 | const uint64_t index, 46 | const int cmp, 47 | const bp_key_t *key, 48 | const bp_value_t *value, 49 | bp_update_cb cb, 50 | void *arg); 51 | 52 | int bp__page_search(bp_db_t *t, 53 | bp__page_t *page, 54 | const bp_key_t *key, 55 | const enum search_type type, 56 | bp__page_search_res_t *result); 57 | int bp__page_get(bp_db_t *t, 58 | bp__page_t *page, 59 | const bp_key_t *key, 60 | bp_value_t *value); 61 | int bp__page_get_range(bp_db_t *t, 62 | bp__page_t *page, 63 | const bp_key_t *start, 64 | const bp_key_t *end, 65 | bp_filter_cb filter, 66 | bp_range_cb cb, 67 | void *arg); 68 | int bp__page_insert(bp_db_t *t, 69 | bp__page_t *page, 70 | const bp_key_t *key, 71 | const bp_value_t *value, 72 | bp_update_cb update_cb, 73 | void *arg); 74 | int bp__page_bulk_insert(bp_db_t *t, 75 | bp__page_t *page, 76 | const bp_key_t *limit, 77 | uint64_t *count, 78 | bp_key_t **keys, 79 | bp_value_t **values, 80 | bp_update_cb update_cb, 81 | void *arg); 82 | int bp__page_remove(bp_db_t *t, 83 | bp__page_t *page, 84 | const bp_key_t *key, 85 | bp_remove_cb remove_cb, 86 | void *arg); 87 | int bp__page_copy(bp_db_t *source, bp_db_t *target, bp__page_t *page); 88 | 89 | int bp__page_remove_idx(bp_db_t *t, bp__page_t *page, const uint64_t index); 90 | int bp__page_split(bp_db_t *t, 91 | bp__page_t *parent, 92 | const uint64_t index, 93 | bp__page_t *child); 94 | int bp__page_split_head(bp_db_t *t, bp__page_t **page); 95 | 96 | void bp__page_shiftr(bp_db_t *t, bp__page_t *page, const uint64_t index); 97 | void bp__page_shiftl(bp_db_t *t, bp__page_t *page, const uint64_t index); 98 | 99 | struct bp__page_s { 100 | enum page_type type; 101 | 102 | uint64_t length; 103 | uint64_t byte_size; 104 | 105 | uint64_t offset; 106 | uint64_t config; 107 | 108 | void *buff_; 109 | int is_head; 110 | 111 | bp__kv_t keys[1]; 112 | }; 113 | 114 | struct bp__page_search_res_s { 115 | bp__page_t *child; 116 | 117 | uint64_t index; 118 | int cmp; 119 | }; 120 | 121 | #ifdef __cplusplus 122 | } /* extern "C" */ 123 | #endif 124 | 125 | #endif /* _PRIVATE_PAGES_H_ */ 126 | -------------------------------------------------------------------------------- /include/private/tree.h: -------------------------------------------------------------------------------- 1 | #ifndef _PRIVATE_TREE_H_ 2 | #define _PRIVATE_TREE_H_ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "private/writer.h" 9 | #include "private/pages.h" 10 | 11 | #include 12 | 13 | #define BP__HEAD_SIZE sizeof(uint64_t) * 4 14 | 15 | #define BP_TREE_PRIVATE \ 16 | BP_WRITER_PRIVATE \ 17 | pthread_rwlock_t rwlock; \ 18 | bp__tree_head_t head; \ 19 | bp_compare_cb compare_cb; 20 | 21 | typedef struct bp__tree_head_s bp__tree_head_t; 22 | 23 | int bp__init(bp_db_t *tree); 24 | void bp__destroy(bp_db_t *tree); 25 | 26 | int bp__tree_read_head(bp__writer_t *w, void *data); 27 | int bp__tree_write_head(bp__writer_t *w, void *data); 28 | 29 | int bp__default_compare_cb(const bp_key_t *a, const bp_key_t *b); 30 | int bp__default_filter_cb(void *arg, const bp_key_t *key); 31 | 32 | 33 | struct bp__tree_head_s { 34 | uint64_t offset; 35 | uint64_t config; 36 | uint64_t page_size; 37 | uint64_t hash; 38 | 39 | bp__page_t *page; 40 | }; 41 | 42 | #ifdef __cplusplus 43 | } /* extern "C" */ 44 | #endif 45 | 46 | #endif /* _PRIVATE_TREE_H_ */ 47 | -------------------------------------------------------------------------------- /include/private/utils.h: -------------------------------------------------------------------------------- 1 | #ifndef _PRIVATE_UTILS_H_ 2 | #define _PRIVATE_UTILS_H_ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include /* uint64_t */ 9 | 10 | uint64_t bp__compute_hashl(uint64_t key); 11 | uint64_t htonll(uint64_t value); 12 | uint64_t ntohll(uint64_t value); 13 | 14 | #ifdef __cplusplus 15 | } // extern "C" 16 | #endif 17 | 18 | #endif /* _PRIVATE_UTILS_H_ */ 19 | -------------------------------------------------------------------------------- /include/private/values.h: -------------------------------------------------------------------------------- 1 | #ifndef _PRIVATE_VALUES_H_ 2 | #define _PRIVATE_VALUES_H_ 3 | 4 | #ifdef __cplusplus 5 | extern "C" { 6 | #endif 7 | 8 | #include "private/tree.h" 9 | #include 10 | 11 | #define BP__KV_HEADER_SIZE 24 12 | #define BP__KV_SIZE(kv) BP__KV_HEADER_SIZE + kv.length 13 | #define BP__STOVAL(str, key) \ 14 | key.value = (char *) str; \ 15 | key.length = strlen(str) + 1; 16 | 17 | #define BP_KEY_PRIVATE \ 18 | uint64_t _prev_offset; \ 19 | uint64_t _prev_length; 20 | 21 | typedef struct bp__kv_s bp__kv_t; 22 | 23 | 24 | int bp__value_load(bp_db_t *t, 25 | const uint64_t offset, 26 | const uint64_t length, 27 | bp_value_t *value); 28 | int bp__value_save(bp_db_t *t, 29 | const bp_value_t *value, 30 | const bp__kv_t *previous, 31 | uint64_t *offset, 32 | uint64_t *length); 33 | 34 | int bp__kv_copy(const bp__kv_t *source, bp__kv_t *target, int alloc); 35 | 36 | struct bp__kv_s { 37 | BP_KEY_FIELDS 38 | 39 | uint64_t offset; 40 | uint64_t config; 41 | 42 | uint8_t allocated; 43 | }; 44 | 45 | #ifdef __cplusplus 46 | } /* extern "C" */ 47 | #endif 48 | 49 | #endif /* _PRIVATE_VALUES_H_ */ 50 | -------------------------------------------------------------------------------- /include/private/writer.h: -------------------------------------------------------------------------------- 1 | #ifndef _PRIVATE_WRITER_H_ 2 | #define _PRIVATE_WRITER_H_ 3 | 4 | #include 5 | 6 | #ifdef __cplusplus 7 | extern "C" { 8 | #endif 9 | 10 | #define BP_WRITER_PRIVATE \ 11 | int fd; \ 12 | char *filename; \ 13 | uint64_t filesize; \ 14 | char padding[BP_PADDING]; 15 | 16 | typedef struct bp__writer_s bp__writer_t; 17 | typedef int (*bp__writer_cb)(bp__writer_t *w, void *data); 18 | 19 | enum comp_type { 20 | kNotCompressed = 0, 21 | kCompressed = 1 22 | }; 23 | 24 | int bp__writer_create(bp__writer_t *w, const char *filename); 25 | int bp__writer_destroy(bp__writer_t *w); 26 | 27 | int bp__writer_fsync(bp__writer_t *w); 28 | 29 | int bp__writer_compact_name(bp__writer_t *w, char **compact_name); 30 | int bp__writer_compact_finalize(bp__writer_t *s, bp__writer_t *t); 31 | 32 | int bp__writer_read(bp__writer_t *w, 33 | const enum comp_type comp, 34 | const uint64_t offset, 35 | uint64_t *size, 36 | void **data); 37 | int bp__writer_write(bp__writer_t *w, 38 | const enum comp_type comp, 39 | const void *data, 40 | uint64_t *offset, 41 | uint64_t *size); 42 | 43 | int bp__writer_find(bp__writer_t *w, 44 | const enum comp_type comp, 45 | const uint64_t size, 46 | void *data, 47 | bp__writer_cb seek, 48 | bp__writer_cb miss); 49 | 50 | struct bp__writer_s { 51 | BP_WRITER_PRIVATE 52 | }; 53 | 54 | #ifdef __cplusplus 55 | } /* extern "C" */ 56 | #endif 57 | 58 | #endif /* _PRIVATE_WRITER_H_ */ 59 | -------------------------------------------------------------------------------- /src/bplus.c: -------------------------------------------------------------------------------- 1 | #include /* malloc */ 2 | #include /* strlen */ 3 | 4 | #include "bplus.h" 5 | #include "private/utils.h" 6 | 7 | int bp_open(bp_db_t *tree, const char* filename) 8 | { 9 | int ret; 10 | 11 | ret = pthread_rwlock_init(&tree->rwlock, NULL) ? BP_ERWLOCK : BP_OK; 12 | if (ret != BP_OK) return ret; 13 | 14 | ret = bp__writer_create((bp__writer_t*) tree, filename); 15 | if (ret != BP_OK) goto fatal; 16 | 17 | tree->head.page = NULL; 18 | 19 | ret = bp__init(tree); 20 | if (ret != BP_OK) goto fatal; 21 | 22 | return BP_OK; 23 | 24 | fatal: 25 | pthread_rwlock_destroy(&tree->rwlock); 26 | return ret; 27 | } 28 | 29 | int bp_close(bp_db_t *tree) 30 | { 31 | pthread_rwlock_wrlock(&tree->rwlock); 32 | bp__destroy(tree); 33 | pthread_rwlock_unlock(&tree->rwlock); 34 | 35 | pthread_rwlock_destroy(&tree->rwlock); 36 | return BP_OK; 37 | } 38 | 39 | int bp__init(bp_db_t *tree) 40 | { 41 | int ret; 42 | /* 43 | * Load head. 44 | * Writer will not compress data chunk smaller than head, 45 | * that's why we're passing head size as compressed size here 46 | */ 47 | ret = bp__writer_find((bp__writer_t *) tree, 48 | kNotCompressed, 49 | BP__HEAD_SIZE, 50 | &tree->head, 51 | bp__tree_read_head, 52 | bp__tree_write_head); 53 | if (ret == BP_OK) { 54 | /* set default compare function */ 55 | bp_set_compare_cb(tree, bp__default_compare_cb); 56 | } 57 | 58 | return ret; 59 | } 60 | 61 | void bp__destroy(bp_db_t *tree) 62 | { 63 | bp__writer_destroy((bp__writer_t *) tree); 64 | if (tree->head.page != NULL) { 65 | bp__page_destroy(tree, tree->head.page); 66 | tree->head.page = NULL; 67 | } 68 | } 69 | 70 | int bp_get(bp_db_t *tree, const bp_key_t* key, bp_value_t *value) 71 | { 72 | int ret; 73 | 74 | pthread_rwlock_rdlock(&tree->rwlock); 75 | 76 | ret = bp__page_get(tree, tree->head.page, key, value); 77 | 78 | pthread_rwlock_unlock(&tree->rwlock); 79 | 80 | return ret; 81 | } 82 | 83 | 84 | int bp_get_previous(bp_db_t *tree, 85 | const bp_value_t *value, 86 | bp_value_t *previous) 87 | { 88 | if (value->_prev_offset == 0 && value->_prev_length == 0) { 89 | return BP_ENOTFOUND; 90 | } 91 | return bp__value_load(tree, 92 | value->_prev_offset, 93 | value->_prev_length, 94 | previous); 95 | } 96 | 97 | int bp_update(bp_db_t *tree, 98 | const bp_key_t *key, 99 | const bp_value_t *value, 100 | bp_update_cb update_cb, 101 | void *arg) 102 | { 103 | int ret; 104 | 105 | pthread_rwlock_wrlock(&tree->rwlock); 106 | 107 | ret = bp__page_insert(tree, tree->head.page, key, value, update_cb, arg); 108 | if (ret == BP_OK) { 109 | ret = bp__tree_write_head((bp__writer_t*) tree, NULL); 110 | } 111 | 112 | pthread_rwlock_unlock(&tree->rwlock); 113 | 114 | return ret; 115 | } 116 | 117 | int bp_bulk_update(bp_db_t *tree, 118 | const uint64_t count, 119 | const bp_key_t **keys, 120 | const bp_value_t **values, 121 | bp_update_cb update_cb, 122 | void *arg) 123 | { 124 | int ret; 125 | bp_key_t *keys_iter = (bp_key_t *) *keys; 126 | bp_value_t* values_iter = (bp_value_t *) *values; 127 | uint64_t left = count; 128 | 129 | pthread_rwlock_wrlock(&tree->rwlock); 130 | 131 | ret = bp__page_bulk_insert(tree, 132 | tree->head.page, 133 | NULL, 134 | &left, 135 | &keys_iter, 136 | &values_iter, 137 | update_cb, 138 | arg); 139 | if (ret == BP_OK) { 140 | ret = bp__tree_write_head((bp__writer_t *) tree, NULL); 141 | } 142 | 143 | pthread_rwlock_unlock(&tree->rwlock); 144 | 145 | return ret; 146 | } 147 | 148 | 149 | int bp_set(bp_db_t *tree, const bp_key_t *key, const bp_value_t *value) 150 | { 151 | return bp_update(tree, key, value, NULL, NULL); 152 | } 153 | 154 | 155 | int bp_bulk_set(bp_db_t *tree, 156 | const uint64_t count, 157 | const bp_key_t **keys, 158 | const bp_value_t **values) 159 | { 160 | return bp_bulk_update(tree, count, keys, values, NULL, NULL); 161 | } 162 | 163 | 164 | int bp_removev(bp_db_t *tree, 165 | const bp_key_t *key, 166 | bp_remove_cb remove_cb, 167 | void *arg) 168 | { 169 | int ret; 170 | 171 | pthread_rwlock_wrlock(&tree->rwlock); 172 | 173 | ret = bp__page_remove(tree, tree->head.page, key, remove_cb, arg); 174 | if (ret == BP_OK) { 175 | ret = bp__tree_write_head((bp__writer_t *) tree, NULL); 176 | } 177 | 178 | pthread_rwlock_unlock(&tree->rwlock); 179 | 180 | return ret; 181 | } 182 | 183 | int bp_remove(bp_db_t *tree, const bp_key_t *key) 184 | { 185 | return bp_removev(tree, key, NULL, NULL); 186 | } 187 | 188 | int bp_compact(bp_db_t *tree) 189 | { 190 | int ret; 191 | char *compacted_name; 192 | bp_db_t compacted; 193 | 194 | /* get name of compacted database (prefixed with .compact) */ 195 | ret = bp__writer_compact_name((bp__writer_t *) tree, &compacted_name); 196 | if (ret != BP_OK) return ret; 197 | 198 | /* open it */ 199 | ret = bp_open(&compacted, compacted_name); 200 | free(compacted_name); 201 | if (ret != BP_OK) return ret; 202 | 203 | /* destroy stub head page */ 204 | bp__page_destroy(&compacted, compacted.head.page); 205 | 206 | pthread_rwlock_rdlock(&tree->rwlock); 207 | 208 | /* clone source tree's head page */ 209 | ret = bp__page_clone(&compacted, tree->head.page, &compacted.head.page); 210 | 211 | pthread_rwlock_unlock(&tree->rwlock); 212 | 213 | /* copy all pages starting from head */ 214 | ret = bp__page_copy(tree, &compacted, compacted.head.page); 215 | if (ret != BP_OK) return ret; 216 | 217 | ret = bp__tree_write_head((bp__writer_t *) &compacted, NULL); 218 | if (ret != BP_OK) return ret; 219 | 220 | pthread_rwlock_wrlock(&tree->rwlock); 221 | 222 | ret = bp__writer_compact_finalize((bp__writer_t *) tree, 223 | (bp__writer_t *) &compacted); 224 | pthread_rwlock_unlock(&tree->rwlock); 225 | 226 | return ret; 227 | } 228 | 229 | int bp_get_filtered_range(bp_db_t *tree, 230 | const bp_key_t *start, 231 | const bp_key_t *end, 232 | bp_filter_cb filter, 233 | bp_range_cb cb, 234 | void *arg) 235 | { 236 | int ret; 237 | 238 | pthread_rwlock_rdlock(&tree->rwlock); 239 | 240 | ret = bp__page_get_range(tree, 241 | tree->head.page, 242 | start, 243 | end, 244 | filter, 245 | cb, 246 | arg); 247 | 248 | pthread_rwlock_unlock(&tree->rwlock); 249 | 250 | return ret; 251 | } 252 | 253 | int bp_get_range(bp_db_t *tree, 254 | const bp_key_t *start, 255 | const bp_key_t *end, 256 | bp_range_cb cb, 257 | void *arg) 258 | { 259 | return bp_get_filtered_range(tree, 260 | start, 261 | end, 262 | bp__default_filter_cb, 263 | cb, 264 | arg); 265 | } 266 | 267 | /* Wrappers to allow string to string set/get/remove */ 268 | 269 | int bp_gets(bp_db_t *tree, const char *key, char **value) 270 | { 271 | int ret; 272 | bp_key_t bkey; 273 | bp_value_t bvalue; 274 | 275 | BP__STOVAL(key, bkey); 276 | 277 | ret = bp_get(tree, &bkey, &bvalue); 278 | if (ret != BP_OK) return ret; 279 | 280 | *value = bvalue.value; 281 | 282 | return BP_OK; 283 | } 284 | 285 | int bp_updates(bp_db_t *tree, 286 | const char *key, 287 | const char *value, 288 | bp_update_cb update_cb, 289 | void *arg) 290 | { 291 | bp_key_t bkey; 292 | bp_value_t bvalue; 293 | 294 | BP__STOVAL(key, bkey); 295 | BP__STOVAL(value, bvalue); 296 | 297 | return bp_update(tree, &bkey, &bvalue, update_cb, arg); 298 | } 299 | 300 | 301 | int bp_sets(bp_db_t *tree, const char *key, const char *value) 302 | { 303 | return bp_updates(tree, key, value, NULL, NULL); 304 | } 305 | 306 | int bp_bulk_updates(bp_db_t *tree, 307 | const uint64_t count, 308 | const char **keys, 309 | const char **values, 310 | bp_update_cb update_cb, 311 | void *arg) 312 | { 313 | int ret; 314 | bp_key_t *bkeys; 315 | bp_value_t *bvalues; 316 | uint64_t i; 317 | 318 | /* allocated memory for keys/values */ 319 | bkeys = malloc(sizeof(*bkeys) * count); 320 | if (bkeys == NULL) return BP_EALLOC; 321 | 322 | bvalues = malloc(sizeof(*bvalues) * count); 323 | if (bvalues == NULL) { 324 | free(bkeys); 325 | return BP_EALLOC; 326 | } 327 | 328 | /* copy keys/values to allocated memory */ 329 | for (i = 0; i < count; i++) { 330 | BP__STOVAL(keys[i], bkeys[i]); 331 | BP__STOVAL(values[i], bvalues[i]); 332 | } 333 | 334 | ret = bp_bulk_update(tree, 335 | count, 336 | (const bp_key_t **) &bkeys, 337 | (const bp_value_t **) &bvalues, 338 | update_cb, 339 | arg); 340 | 341 | free(bkeys); 342 | free(bvalues); 343 | 344 | return ret; 345 | } 346 | 347 | int bp_bulk_sets(bp_db_t *tree, 348 | const uint64_t count, 349 | const char **keys, 350 | const char **values) 351 | { 352 | return bp_bulk_updates(tree, count, keys, values, NULL, NULL); 353 | } 354 | 355 | int bp_removevs(bp_db_t *tree, 356 | const char *key, 357 | bp_remove_cb remove_cb, 358 | void *arg) 359 | { 360 | bp_key_t bkey; 361 | 362 | BP__STOVAL(key, bkey); 363 | 364 | return bp_removev(tree, &bkey, remove_cb, arg); 365 | } 366 | 367 | int bp_removes(bp_db_t *tree, const char *key) 368 | { 369 | return bp_removevs(tree, key, NULL, NULL); 370 | } 371 | 372 | int bp_get_filtered_ranges(bp_db_t *tree, 373 | const char *start, 374 | const char *end, 375 | bp_filter_cb filter, 376 | bp_range_cb cb, 377 | void *arg) 378 | { 379 | bp_key_t bstart; 380 | bp_key_t bend; 381 | 382 | BP__STOVAL(start, bstart); 383 | BP__STOVAL(end, bend); 384 | 385 | return bp_get_filtered_range(tree, &bstart, &bend, filter, cb, arg); 386 | } 387 | 388 | int bp_get_ranges(bp_db_t *tree, 389 | const char *start, 390 | const char *end, 391 | bp_range_cb cb, 392 | void *arg) 393 | { 394 | return bp_get_filtered_ranges(tree, 395 | start, 396 | end, 397 | bp__default_filter_cb, 398 | cb, 399 | arg); 400 | } 401 | 402 | /* various functions */ 403 | 404 | void bp_set_compare_cb(bp_db_t *tree, bp_compare_cb cb) 405 | { 406 | tree->compare_cb = cb; 407 | } 408 | 409 | 410 | int bp_fsync(bp_db_t *tree) 411 | { 412 | int ret; 413 | 414 | pthread_rwlock_wrlock(&tree->rwlock); 415 | ret = bp__writer_fsync((bp__writer_t *) tree); 416 | pthread_rwlock_unlock(&tree->rwlock); 417 | 418 | return ret; 419 | } 420 | 421 | /* internal utils */ 422 | 423 | int bp__tree_read_head(bp__writer_t *w, void *data) 424 | { 425 | int ret; 426 | bp_db_t *t = (bp_db_t *) w; 427 | bp__tree_head_t* head = (bp__tree_head_t *) data; 428 | 429 | t->head.offset = ntohll(head->offset); 430 | t->head.config = ntohll(head->config); 431 | t->head.page_size = ntohll(head->page_size); 432 | t->head.hash = ntohll(head->hash); 433 | 434 | /* we've copied all data - free it */ 435 | free(data); 436 | 437 | /* Check hash first */ 438 | if (bp__compute_hashl(t->head.offset) != t->head.hash) return 1; 439 | 440 | ret = bp__page_load(t, t->head.offset, t->head.config, &t->head.page); 441 | if (ret != BP_OK) return ret; 442 | 443 | t->head.page->is_head = 1; 444 | 445 | return ret; 446 | } 447 | 448 | int bp__tree_write_head(bp__writer_t *w, void *data) 449 | { 450 | int ret; 451 | bp_db_t* t = (bp_db_t*) w; 452 | bp__tree_head_t nhead; 453 | uint64_t offset; 454 | uint64_t size; 455 | 456 | if (t->head.page == NULL) { 457 | /* TODO: page size should be configurable */ 458 | t->head.page_size = 64; 459 | 460 | /* Create empty leaf page */ 461 | ret = bp__page_create(t, kLeaf, 0, 1, &t->head.page); 462 | if (ret != BP_OK) return ret; 463 | 464 | t->head.page->is_head = 1; 465 | } 466 | 467 | /* Update head's position */ 468 | t->head.offset = t->head.page->offset; 469 | t->head.config = t->head.page->config; 470 | 471 | t->head.hash = bp__compute_hashl(t->head.offset); 472 | 473 | /* Create temporary head with fields in network byte order */ 474 | nhead.offset = htonll(t->head.offset); 475 | nhead.config = htonll(t->head.config); 476 | nhead.page_size = htonll(t->head.page_size); 477 | nhead.hash = htonll(t->head.hash); 478 | 479 | size = BP__HEAD_SIZE; 480 | ret = bp__writer_write(w, 481 | kNotCompressed, 482 | &nhead, 483 | &offset, 484 | &size); 485 | 486 | return ret; 487 | } 488 | 489 | int bp__default_compare_cb(const bp_key_t *a, const bp_key_t *b) 490 | { 491 | uint32_t len = a->length < b->length ? a->length : b->length; 492 | 493 | for (uint32_t i = 0; i < len; i++) { 494 | if (a->value[i] != b->value[i]) 495 | return (uint8_t) a->value[i] > (uint8_t) b->value[i] ? 1 : -1; 496 | } 497 | 498 | return a->length - b->length; 499 | } 500 | 501 | 502 | int bp__default_filter_cb(void *arg, const bp_key_t *key) 503 | { 504 | /* default filter accepts all keys */ 505 | return 1; 506 | } 507 | -------------------------------------------------------------------------------- /src/pages.c: -------------------------------------------------------------------------------- 1 | #include /* malloc, free */ 2 | #include /* memcpy */ 3 | #include /* assert */ 4 | 5 | #include "bplus.h" 6 | #include "private/pages.h" 7 | #include "private/utils.h" 8 | 9 | int bp__page_create(bp_db_t *t, 10 | const enum page_type type, 11 | const uint64_t offset, 12 | const uint64_t config, 13 | bp__page_t **page) 14 | { 15 | /* Allocate space for page + keys */ 16 | bp__page_t *p; 17 | 18 | p = malloc(sizeof(*p) + sizeof(p->keys[0]) * (t->head.page_size - 1)); 19 | if (p == NULL) return BP_EALLOC; 20 | 21 | p->type = type; 22 | if (type == kLeaf) { 23 | p->length = 0; 24 | p->byte_size = 0; 25 | } else { 26 | /* non-leaf pages always have left element */ 27 | p->length = 1; 28 | p->keys[0].value = NULL; 29 | p->keys[0].length = 0; 30 | p->keys[0].offset = 0; 31 | p->keys[0].config = 0; 32 | p->keys[0].allocated = 0; 33 | p->byte_size = BP__KV_SIZE(p->keys[0]); 34 | } 35 | 36 | /* this two fields will be changed on page_write */ 37 | p->offset = offset; 38 | p->config = config; 39 | 40 | p->buff_ = NULL; 41 | p->is_head = 0; 42 | 43 | *page = p; 44 | return BP_OK; 45 | } 46 | 47 | void bp__page_destroy(bp_db_t *t, bp__page_t *page) 48 | { 49 | /* Free all keys */ 50 | for (uint64_t i = 0; i < page->length; i++) { 51 | if (page->keys[i].allocated) { 52 | free(page->keys[i].value); 53 | page->keys[i].value = NULL; 54 | } 55 | } 56 | 57 | if (page->buff_ != NULL) { 58 | free(page->buff_); 59 | page->buff_ = NULL; 60 | } 61 | 62 | /* Free page itself */ 63 | free(page); 64 | } 65 | 66 | int bp__page_clone(bp_db_t *t, bp__page_t *page, bp__page_t **clone) 67 | { 68 | int ret = BP_OK; 69 | ret = bp__page_create(t, page->type, page->offset, page->config, clone); 70 | if (ret != BP_OK) return ret; 71 | 72 | (*clone)->is_head = page->is_head; 73 | 74 | (*clone)->length = 0; 75 | for (uint64_t i = 0; i < page->length; i++) { 76 | ret = bp__kv_copy(&page->keys[i], &(*clone)->keys[i], 1); 77 | (*clone)->length++; 78 | if (ret != BP_OK) break; 79 | } 80 | (*clone)->byte_size = page->byte_size; 81 | 82 | /* if failed - free memory for all allocated keys */ 83 | if (ret != BP_OK) bp__page_destroy(t, *clone); 84 | 85 | return ret; 86 | } 87 | 88 | int bp__page_read(bp_db_t *t, bp__page_t *page) 89 | { 90 | int ret; 91 | uint64_t size, o; 92 | uint64_t i; 93 | bp__writer_t *w = (bp__writer_t *) t; 94 | 95 | char *buff = NULL; 96 | 97 | /* Read page size and leaf flag */ 98 | size = page->config >> 1; 99 | page->type = page->config & 1 ? kLeaf : kPage; 100 | 101 | /* Read page data */ 102 | ret = bp__writer_read(w, kCompressed, page->offset, &size, (void**) &buff); 103 | if (ret != BP_OK) return ret; 104 | 105 | /* Parse data */ 106 | i = 0; 107 | o = 0; 108 | while (o < size) { 109 | page->keys[i].length = ntohll(*(uint64_t *) (buff + o)); 110 | page->keys[i].offset = ntohll(*(uint64_t *) (buff + o + 8)); 111 | page->keys[i].config = ntohll(*(uint64_t *) (buff + o + 16)); 112 | page->keys[i].value = buff + o + 24; 113 | page->keys[i].allocated = 0; 114 | 115 | o += BP__KV_SIZE(page->keys[i]); 116 | i++; 117 | } 118 | page->length = i; 119 | page->byte_size = size; 120 | 121 | if (page->buff_ != NULL) { 122 | free(page->buff_); 123 | } 124 | page->buff_ = buff; 125 | 126 | return BP_OK; 127 | } 128 | 129 | int bp__page_load(bp_db_t *t, 130 | const uint64_t offset, 131 | const uint64_t config, 132 | bp__page_t **page) 133 | { 134 | int ret; 135 | 136 | bp__page_t *new_page; 137 | ret = bp__page_create(t, 0, offset, config, &new_page); 138 | if (ret != BP_OK) return ret; 139 | 140 | ret = bp__page_read(t, new_page); 141 | if (ret != BP_OK) { 142 | bp__page_destroy(t, new_page); 143 | return ret; 144 | } 145 | 146 | /* bp__page_load should be atomic */ 147 | *page = new_page; 148 | 149 | return BP_OK; 150 | } 151 | 152 | int bp__page_save(bp_db_t *t, bp__page_t *page) 153 | { 154 | int ret; 155 | bp__writer_t *w = (bp__writer_t *) t; 156 | uint64_t i; 157 | uint64_t o; 158 | char *buff; 159 | 160 | assert(page->type == kLeaf || page->length != 0); 161 | 162 | /* Allocate space for serialization (header + keys); */ 163 | buff = malloc(page->byte_size); 164 | if (buff == NULL) return BP_EALLOC; 165 | 166 | o = 0; 167 | for (i = 0; i < page->length; i++) { 168 | assert(o + BP__KV_SIZE(page->keys[i]) <= page->byte_size); 169 | 170 | *(uint64_t *) (buff + o) = htonll(page->keys[i].length); 171 | *(uint64_t *) (buff + o + 8) = htonll(page->keys[i].offset); 172 | *(uint64_t *) (buff + o + 16) = htonll(page->keys[i].config); 173 | 174 | memcpy(buff + o + 24, page->keys[i].value, page->keys[i].length); 175 | 176 | o += BP__KV_SIZE(page->keys[i]); 177 | } 178 | assert(o == page->byte_size); 179 | 180 | page->config = page->byte_size; 181 | ret = bp__writer_write(w, 182 | kCompressed, 183 | buff, 184 | &page->offset, 185 | &page->config); 186 | page->config = (page->config << 1) | (page->type == kLeaf); 187 | 188 | free(buff); 189 | return ret; 190 | } 191 | 192 | 193 | int bp__page_load_value(bp_db_t *t, 194 | bp__page_t *page, 195 | const uint64_t index, 196 | bp_value_t *value) 197 | { 198 | return bp__value_load(t, 199 | page->keys[index].offset, 200 | page->keys[index].config, 201 | value); 202 | } 203 | 204 | 205 | int bp__page_save_value(bp_db_t *t, 206 | bp__page_t *page, 207 | const uint64_t index, 208 | const int cmp, 209 | const bp_key_t *key, 210 | const bp_value_t *value, 211 | bp_update_cb update_cb, 212 | void *arg) 213 | { 214 | int ret; 215 | bp__kv_t previous, tmp; 216 | 217 | /* replace item with same key from page */ 218 | if (cmp == 0) { 219 | /* solve conflicts if callback was provided */ 220 | if (update_cb != NULL) { 221 | bp_value_t prev_value; 222 | 223 | ret = bp__page_load_value(t, page, index, &prev_value); 224 | if (ret != BP_OK) return ret; 225 | 226 | ret = update_cb(arg, &prev_value, value); 227 | free(prev_value.value); 228 | 229 | if (!ret) return BP_EUPDATECONFLICT; 230 | } 231 | previous.offset = page->keys[index].offset; 232 | previous.length = page->keys[index].config; 233 | bp__page_remove_idx(t, page, index); 234 | } 235 | 236 | /* store key */ 237 | tmp.value = key->value; 238 | tmp.length = key->length; 239 | 240 | /* store value */ 241 | ret = bp__value_save(t, 242 | value, 243 | cmp == 0 ? &previous : NULL, 244 | &tmp.offset, 245 | &tmp.config); 246 | if (ret != BP_OK) return ret; 247 | 248 | /* Shift all keys right */ 249 | bp__page_shiftr(t, page, index); 250 | 251 | /* Insert key in the middle */ 252 | ret = bp__kv_copy(&tmp, &page->keys[index], 1); 253 | if (ret != BP_OK) { 254 | /* shift keys back */ 255 | bp__page_shiftl(t, page, index); 256 | return ret; 257 | } 258 | 259 | page->byte_size += BP__KV_SIZE(tmp); 260 | page->length++; 261 | 262 | return BP_OK; 263 | } 264 | 265 | int bp__page_search(bp_db_t *t, 266 | bp__page_t *page, 267 | const bp_key_t *key, 268 | const enum search_type type, 269 | bp__page_search_res_t *result) 270 | { 271 | int ret; 272 | uint64_t i = page->type == kPage; 273 | int cmp = -1; 274 | bp__page_t *child; 275 | 276 | /* assert infinite recursion */ 277 | assert(page->type == kLeaf || page->length > 0); 278 | 279 | while (i < page->length) { 280 | /* left key is always lower in non-leaf nodes */ 281 | cmp = t->compare_cb((bp_key_t*) &page->keys[i], key); 282 | 283 | if (cmp >= 0) break; 284 | i++; 285 | } 286 | 287 | result->cmp = cmp; 288 | 289 | if (page->type == kLeaf) { 290 | result->index = i; 291 | result->child = NULL; 292 | 293 | return BP_OK; 294 | } else { 295 | assert(i > 0); 296 | if (cmp != 0) i--; 297 | 298 | if (type == kLoad) { 299 | ret = bp__page_load(t, 300 | page->keys[i].offset, 301 | page->keys[i].config, 302 | &child); 303 | if (ret != BP_OK) return ret; 304 | 305 | result->child = child; 306 | } else { 307 | result->child = NULL; 308 | } 309 | 310 | result->index = i; 311 | 312 | return BP_OK; 313 | } 314 | } 315 | 316 | int bp__page_get(bp_db_t *t, 317 | bp__page_t *page, 318 | const bp_key_t *key, 319 | bp_value_t *value) 320 | { 321 | int ret; 322 | bp__page_search_res_t res; 323 | ret = bp__page_search(t, page, key, kLoad, &res); 324 | if (ret != BP_OK) return ret; 325 | 326 | if (res.child == NULL) { 327 | if (res.cmp != 0) return BP_ENOTFOUND; 328 | 329 | return bp__page_load_value(t, page, res.index, value); 330 | } else { 331 | ret = bp__page_get(t, res.child, key, value); 332 | bp__page_destroy(t, res.child); 333 | res.child = NULL; 334 | return ret; 335 | } 336 | } 337 | 338 | int bp__page_get_range(bp_db_t *t, 339 | bp__page_t *page, 340 | const bp_key_t *start, 341 | const bp_key_t *end, 342 | bp_filter_cb filter, 343 | bp_range_cb cb, 344 | void *arg) 345 | { 346 | int ret; 347 | uint64_t i; 348 | bp__page_search_res_t start_res, end_res; 349 | 350 | /* find start and end indexes */ 351 | ret = bp__page_search(t, page, start, kNotLoad, &start_res); 352 | if (ret != BP_OK) return ret; 353 | ret = bp__page_search(t, page, end, kNotLoad, &end_res); 354 | if (ret != BP_OK) return ret; 355 | 356 | if (page->type == kLeaf) { 357 | /* on leaf pages end-key should always be greater or equal than first key */ 358 | if (end_res.cmp > 0 && end_res.index == 0) return BP_OK; 359 | 360 | if (end_res.cmp < 0) end_res.index--; 361 | } 362 | 363 | /* go through each page item */ 364 | for (i = start_res.index; i <= end_res.index; i++) { 365 | /* run filter */ 366 | if (!filter(arg, (bp_key_t *) &page->keys[i])) continue; 367 | 368 | if (page->type == kPage) { 369 | /* load child page and apply range get to it */ 370 | bp__page_t* child; 371 | 372 | ret = bp__page_load(t, 373 | page->keys[i].offset, 374 | page->keys[i].config, 375 | &child); 376 | if (ret != BP_OK) return ret; 377 | 378 | ret = bp__page_get_range(t, child, start, end, filter, cb, arg); 379 | 380 | /* destroy child regardless of error */ 381 | bp__page_destroy(t, child); 382 | 383 | if (ret != BP_OK) return ret; 384 | } else { 385 | /* load value and pass it to callback */ 386 | bp_value_t value; 387 | ret = bp__page_load_value(t, page, i, &value); 388 | if (ret != BP_OK) return ret; 389 | 390 | cb(arg, (bp_key_t *) &page->keys[i], &value); 391 | 392 | free(value.value); 393 | } 394 | } 395 | 396 | return BP_OK; 397 | } 398 | 399 | int bp__page_insert(bp_db_t *t, 400 | bp__page_t *page, 401 | const bp_key_t *key, 402 | const bp_value_t *value, 403 | bp_update_cb update_cb, 404 | void *arg) 405 | { 406 | int ret; 407 | bp__page_search_res_t res; 408 | ret = bp__page_search(t, page, key, kLoad, &res); 409 | if (ret != BP_OK) return ret; 410 | 411 | if (res.child == NULL) { 412 | /* store value in db file to get offset and config */ 413 | ret = bp__page_save_value(t, 414 | page, 415 | res.index, 416 | res.cmp, 417 | key, 418 | value, 419 | update_cb, 420 | arg); 421 | if (ret != BP_OK) return ret; 422 | } else { 423 | /* Insert kv in child page */ 424 | ret = bp__page_insert(t, res.child, key, value, update_cb, arg); 425 | 426 | /* kv was inserted but page is full now */ 427 | if (ret == BP_ESPLITPAGE) { 428 | ret = bp__page_split(t, page, res.index, res.child); 429 | } else if (ret == BP_OK) { 430 | /* Update offsets in page */ 431 | page->keys[res.index].offset = res.child->offset; 432 | page->keys[res.index].config = res.child->config; 433 | } 434 | 435 | bp__page_destroy(t, res.child); 436 | res.child = NULL; 437 | 438 | if (ret != BP_OK) { 439 | return ret; 440 | } 441 | } 442 | 443 | if (page->length == t->head.page_size) { 444 | if (page->is_head) { 445 | ret = bp__page_split_head(t, &page); 446 | if (ret != BP_OK) return ret; 447 | } else { 448 | /* Notify caller that it should split page */ 449 | return BP_ESPLITPAGE; 450 | } 451 | } 452 | 453 | assert(page->length < t->head.page_size); 454 | 455 | ret = bp__page_save(t, page); 456 | if (ret != BP_OK) return ret; 457 | 458 | return BP_OK; 459 | } 460 | 461 | int bp__page_bulk_insert(bp_db_t *t, 462 | bp__page_t *page, 463 | const bp_key_t *limit, 464 | uint64_t *count, 465 | bp_key_t **keys, 466 | bp_value_t **values, 467 | bp_update_cb update_cb, 468 | void *arg) 469 | { 470 | int ret; 471 | bp__page_search_res_t res; 472 | 473 | while (*count > 0 && 474 | (limit == NULL || t->compare_cb(limit, *keys) > 0)) { 475 | 476 | ret = bp__page_search(t, page, *keys, kLoad, &res); 477 | if (ret != BP_OK) return ret; 478 | 479 | if (res.child == NULL) { 480 | /* store value in db file to get offset and config */ 481 | ret = bp__page_save_value(t, 482 | page, 483 | res.index, 484 | res.cmp, 485 | *keys, 486 | *values, 487 | update_cb, 488 | arg); 489 | /* 490 | * ignore update conflicts, to handle situations where 491 | * only one kv failed in a bulk 492 | */ 493 | if (ret != BP_OK && ret != BP_EUPDATECONFLICT) return ret; 494 | 495 | *keys = *keys + 1; 496 | *values = *values + 1; 497 | *count = *count - 1; 498 | } else { 499 | /* we're in regular page */ 500 | bp_key_t* new_limit = NULL; 501 | 502 | if (res.index + 1 < page->length) { 503 | new_limit = (bp_key_t*) &page->keys[res.index + 1]; 504 | } 505 | 506 | ret = bp__page_bulk_insert(t, 507 | res.child, 508 | new_limit, 509 | count, 510 | keys, 511 | values, 512 | update_cb, 513 | arg); 514 | if (ret == BP_ESPLITPAGE) { 515 | ret = bp__page_split(t, page, res.index, res.child); 516 | } else if (ret == BP_OK) { 517 | /* Update offsets in page */ 518 | page->keys[res.index].offset = res.child->offset; 519 | page->keys[res.index].config = res.child->config; 520 | } 521 | 522 | bp__page_destroy(t, res.child); 523 | res.child = NULL; 524 | 525 | if (ret != BP_OK) return ret; 526 | } 527 | 528 | if (page->length == t->head.page_size) { 529 | if (page->is_head) { 530 | ret = bp__page_split_head(t, &page); 531 | if (ret != BP_OK) return ret; 532 | } else { 533 | /* Notify caller that it should split page */ 534 | return BP_ESPLITPAGE; 535 | } 536 | } 537 | 538 | assert(page->length < t->head.page_size); 539 | } 540 | 541 | return bp__page_save(t, page); 542 | } 543 | 544 | int bp__page_remove(bp_db_t *t, 545 | bp__page_t *page, 546 | const bp_key_t *key, 547 | bp_remove_cb remove_cb, 548 | void *arg) 549 | { 550 | int ret; 551 | bp__page_search_res_t res; 552 | ret = bp__page_search(t, page, key, kLoad, &res); 553 | if (ret != BP_OK) return ret; 554 | 555 | if (res.child == NULL) { 556 | if (res.cmp != 0) return BP_ENOTFOUND; 557 | 558 | /* remove only if remove_cb returns BP_OK */ 559 | if (remove_cb != NULL) { 560 | bp_value_t prev_val; 561 | 562 | ret = bp__page_load_value(t, page, res.index, &prev_val); 563 | if (ret != BP_OK) return ret; 564 | 565 | ret = remove_cb(arg, &prev_val); 566 | free(prev_val.value); 567 | 568 | if (!ret) return BP_EREMOVECONFLICT; 569 | } 570 | bp__page_remove_idx(t, page, res.index); 571 | 572 | if (page->length == 0 && !page->is_head) return BP_EEMPTYPAGE; 573 | } else { 574 | /* Insert kv in child page */ 575 | ret = bp__page_remove(t, res.child, key, remove_cb, arg); 576 | 577 | if (ret != BP_OK && ret != BP_EEMPTYPAGE) { 578 | return ret; 579 | } 580 | 581 | /* kv was inserted but page is full now */ 582 | if (ret == BP_EEMPTYPAGE) { 583 | bp__page_remove_idx(t, page, res.index); 584 | 585 | /* we don't need child now */ 586 | bp__page_destroy(t, res.child); 587 | res.child = NULL; 588 | 589 | /* only one item left - lift kv from last child to current page */ 590 | if (page->length == 1) { 591 | page->offset = page->keys[0].offset; 592 | page->config = page->keys[0].config; 593 | 594 | /* remove child to free memory */ 595 | bp__page_remove_idx(t, page, 0); 596 | 597 | /* and load child as current page */ 598 | ret = bp__page_read(t, page); 599 | if (ret != BP_OK) return ret; 600 | } 601 | } else { 602 | /* Update offsets in page */ 603 | page->keys[res.index].offset = res.child->offset; 604 | page->keys[res.index].config = res.child->config; 605 | 606 | /* we don't need child now */ 607 | bp__page_destroy(t, res.child); 608 | res.child = NULL; 609 | } 610 | } 611 | 612 | return bp__page_save(t, page); 613 | } 614 | 615 | int bp__page_copy(bp_db_t *source, bp_db_t *target, bp__page_t *page) 616 | { 617 | int ret; 618 | for (uint64_t i = 0; i < page->length; i++) { 619 | if (page->type == kPage) { 620 | /* copy child page */ 621 | bp__page_t *child; 622 | ret = bp__page_load(source, 623 | page->keys[i].offset, 624 | page->keys[i].config, 625 | &child); 626 | if (ret != BP_OK) return ret; 627 | 628 | ret = bp__page_copy(source, target, child); 629 | if (ret != BP_OK) return ret; 630 | 631 | /* update child position */ 632 | page->keys[i].offset = child->offset; 633 | page->keys[i].config = child->config; 634 | 635 | bp__page_destroy(source, child); 636 | } else { 637 | /* copy value */ 638 | bp_value_t value; 639 | 640 | ret = bp__page_load_value(source, page, i, &value); 641 | if (ret != BP_OK) return ret; 642 | 643 | page->keys[i].config = value.length; 644 | ret = bp__value_save(target, 645 | &value, 646 | NULL, 647 | &page->keys[i].offset, 648 | &page->keys[i].config); 649 | 650 | /* value is not needed anymore */ 651 | free(value.value); 652 | if (ret != BP_OK) return ret; 653 | } 654 | } 655 | 656 | return bp__page_save(target, page); 657 | } 658 | 659 | 660 | int bp__page_remove_idx(bp_db_t *t, bp__page_t *page, const uint64_t index) 661 | { 662 | assert(index < page->length); 663 | 664 | /* Free memory allocated for kv and reduce byte_size of page */ 665 | page->byte_size -= BP__KV_SIZE(page->keys[index]); 666 | if (page->keys[index].allocated) { 667 | free(page->keys[index].value); 668 | page->keys[index].value = NULL; 669 | } 670 | 671 | /* Shift all keys left */ 672 | bp__page_shiftl(t, page, index); 673 | 674 | page->length--; 675 | 676 | return BP_OK; 677 | } 678 | 679 | 680 | int bp__page_split(bp_db_t *t, 681 | bp__page_t *parent, 682 | const uint64_t index, 683 | bp__page_t *child) 684 | { 685 | int ret; 686 | uint64_t i, middle; 687 | bp__page_t *left = NULL, *right = NULL; 688 | bp__kv_t middle_key; 689 | 690 | bp__page_create(t, child->type, 0, 0, &left); 691 | bp__page_create(t, child->type, 0, 0, &right); 692 | 693 | middle = t->head.page_size >> 1; 694 | ret = bp__kv_copy(&child->keys[middle], &middle_key, 1); 695 | if (ret != BP_OK) goto fatal; 696 | 697 | /* non-leaf nodes has byte_size > 0 nullify it first */ 698 | left->byte_size = 0; 699 | left->length = 0; 700 | for (i = 0; i < middle; i++) { 701 | ret = bp__kv_copy(&child->keys[i], &left->keys[left->length++], 1); 702 | if (ret != BP_OK) goto fatal; 703 | left->byte_size += BP__KV_SIZE(child->keys[i]); 704 | } 705 | 706 | right->byte_size = 0; 707 | right->length = 0; 708 | for (; i < t->head.page_size; i++) { 709 | ret = bp__kv_copy(&child->keys[i], &right->keys[right->length++], 1); 710 | if (ret != BP_OK) goto fatal; 711 | right->byte_size += BP__KV_SIZE(child->keys[i]); 712 | } 713 | 714 | /* save left and right parts to get offsets */ 715 | ret = bp__page_save(t, left); 716 | if (ret != BP_OK) goto fatal; 717 | 718 | ret = bp__page_save(t, right); 719 | if (ret != BP_OK) goto fatal; 720 | 721 | /* store offsets with middle key */ 722 | middle_key.offset = right->offset; 723 | middle_key.config = right->config; 724 | 725 | /* insert middle key into parent page */ 726 | bp__page_shiftr(t, parent, index + 1); 727 | bp__kv_copy(&middle_key, &parent->keys[index + 1], 0); 728 | 729 | parent->byte_size += BP__KV_SIZE(middle_key); 730 | parent->length++; 731 | 732 | /* change left element */ 733 | parent->keys[index].offset = left->offset; 734 | parent->keys[index].config = left->config; 735 | 736 | ret = BP_OK; 737 | fatal: 738 | /* cleanup */ 739 | bp__page_destroy(t, left); 740 | bp__page_destroy(t, right); 741 | return ret; 742 | } 743 | 744 | int bp__page_split_head(bp_db_t *t, bp__page_t **page) 745 | { 746 | int ret; 747 | bp__page_t *new_head = NULL; 748 | bp__page_create(t, 0, 0, 0, &new_head); 749 | new_head->is_head = 1; 750 | 751 | ret = bp__page_split(t, new_head, 0, *page); 752 | if (ret != BP_OK) { 753 | bp__page_destroy(t, new_head); 754 | return ret; 755 | } 756 | 757 | t->head.page = new_head; 758 | bp__page_destroy(t, *page); 759 | *page = new_head; 760 | 761 | return BP_OK; 762 | } 763 | 764 | void bp__page_shiftr(bp_db_t *t, bp__page_t *p, const uint64_t index) 765 | { 766 | if (p->length != 0) { 767 | for (uint64_t i = p->length - 1; i >= index; i--) { 768 | bp__kv_copy(&p->keys[i], &p->keys[i + 1], 0); 769 | 770 | if (i == 0) break; 771 | } 772 | } 773 | } 774 | 775 | void bp__page_shiftl(bp_db_t *t, bp__page_t *p, const uint64_t index) 776 | { 777 | for (uint64_t i = index + 1; i < p->length; i++) { 778 | bp__kv_copy(&p->keys[i], &p->keys[i - 1], 0); 779 | } 780 | } 781 | -------------------------------------------------------------------------------- /src/utils.c: -------------------------------------------------------------------------------- 1 | #include "private/utils.h" 2 | 3 | #include /* uint64_t */ 4 | #include /* nothl, htonl */ 5 | 6 | /* Thomas Wang, Integer Hash Functions. */ 7 | /* http://www.concentric.net/~Ttwang/tech/inthash.htm */ 8 | uint32_t bp__compute_hash(uint32_t key) 9 | { 10 | uint32_t hash = key; 11 | hash = ~hash + (hash << 15); /* hash = (hash << 15) - hash - 1; */ 12 | hash = hash ^ (hash >> 12); 13 | hash = hash + (hash << 2); 14 | hash = hash ^ (hash >> 4); 15 | hash = hash * 2057; /* hash = (hash + (hash << 3)) + (hash << 11); */ 16 | hash = hash ^ (hash >> 16); 17 | return hash; 18 | } 19 | 20 | uint64_t bp__compute_hashl(uint64_t key) 21 | { 22 | uint32_t keyh = key >> 32; 23 | uint32_t keyl = key & 0xffffffffLL; 24 | 25 | return ((uint64_t) bp__compute_hash(keyh) << 32) | 26 | bp__compute_hash(keyl); 27 | } 28 | 29 | uint64_t htonll(uint64_t value) 30 | { 31 | static const int num = 23; 32 | 33 | if (*(const char *) (&num) != num) return value; 34 | 35 | uint32_t high_part = htonl((uint32_t) (value >> 32)); 36 | uint32_t low_part = htonl((uint32_t) (value & 0xffffffffLL)); 37 | 38 | return ((uint64_t) low_part << 32) | high_part; 39 | } 40 | 41 | uint64_t ntohll(uint64_t value) 42 | { 43 | static const int num = 23; 44 | 45 | if (*(const char *) (&num) != num) return value; 46 | 47 | uint32_t high_part = ntohl((uint32_t) (value >> 32)); 48 | uint32_t low_part = ntohl((uint32_t) (value & 0xffffffffLL)); 49 | 50 | return ((uint64_t) low_part << 32) | high_part; 51 | } 52 | -------------------------------------------------------------------------------- /src/values.c: -------------------------------------------------------------------------------- 1 | #include "bplus.h" 2 | #include "private/values.h" 3 | #include "private/writer.h" 4 | #include "private/utils.h" 5 | 6 | #include /* malloc, free */ 7 | #include /* memcpy */ 8 | 9 | 10 | int bp__value_load(bp_db_t *t, 11 | const uint64_t offset, 12 | const uint64_t length, 13 | bp_value_t *value) 14 | { 15 | int ret; 16 | char* buff; 17 | uint64_t buff_len = length; 18 | 19 | /* read data from disk first */ 20 | ret = bp__writer_read((bp__writer_t*) t, 21 | kCompressed, 22 | offset, 23 | &buff_len, 24 | (void **) &buff); 25 | if (ret != BP_OK) return ret; 26 | 27 | value->value = malloc(buff_len - 16); 28 | if (value->value == NULL) { 29 | free(buff); 30 | return BP_EALLOC; 31 | } 32 | 33 | /* first 16 bytes are representing previous value */ 34 | value->_prev_offset = ntohll(*(uint64_t *) (buff)); 35 | value->_prev_length = ntohll(*(uint64_t *) (buff + 8)); 36 | 37 | /* copy the rest into result buffer */ 38 | memcpy(value->value, buff + 16, buff_len - 16); 39 | value->length = buff_len - 16; 40 | 41 | free(buff); 42 | 43 | return BP_OK; 44 | } 45 | 46 | 47 | int bp__value_save(bp_db_t *t, 48 | const bp_value_t *value, 49 | const bp__kv_t *previous, 50 | uint64_t *offset, 51 | uint64_t *length) 52 | { 53 | int ret; 54 | char* buff; 55 | 56 | buff = malloc(value->length + 16); 57 | if (buff == NULL) return BP_EALLOC; 58 | 59 | /* insert offset, length of previous value */ 60 | if (previous != NULL) { 61 | *(uint64_t *) (buff) = htonll(previous->offset); 62 | *(uint64_t *) (buff + 8) = htonll(previous->length); 63 | } else { 64 | *(uint64_t *) (buff) = 0; 65 | *(uint64_t *) (buff + 8) = 0; 66 | } 67 | 68 | /* insert current value itself */ 69 | memcpy(buff + 16, value->value, value->length); 70 | 71 | *length = value->length + 16; 72 | ret = bp__writer_write((bp__writer_t *) t, 73 | kCompressed, 74 | buff, 75 | offset, 76 | length); 77 | free(buff); 78 | 79 | return ret; 80 | } 81 | 82 | 83 | int bp__kv_copy(const bp__kv_t *source, bp__kv_t *target, int alloc) 84 | { 85 | /* copy key fields */ 86 | if (alloc) { 87 | target->value = malloc(source->length); 88 | if (target->value == NULL) return BP_EALLOC; 89 | 90 | memcpy(target->value, source->value, source->length); 91 | target->allocated = 1; 92 | } else { 93 | target->value = source->value; 94 | target->allocated = source->allocated; 95 | } 96 | 97 | target->length = source->length; 98 | 99 | /* copy rest */ 100 | target->offset = source->offset; 101 | target->config = source->config; 102 | 103 | return BP_OK; 104 | } 105 | -------------------------------------------------------------------------------- /src/writer.c: -------------------------------------------------------------------------------- 1 | #include "bplus.h" 2 | #include "private/writer.h" 3 | #include "private/compressor.h" 4 | 5 | #include /* open */ 6 | #include /* close, write, read */ 7 | #include /* S_IWUSR, S_IRUSR */ 8 | #include /* malloc, free */ 9 | #include /* sprintf */ 10 | #include /* memset */ 11 | #include /* errno */ 12 | 13 | int bp__writer_create(bp__writer_t *w, const char *filename) 14 | { 15 | off_t filesize; 16 | size_t filename_length; 17 | 18 | /* copy filename + '\0' char */ 19 | filename_length = strlen(filename) + 1; 20 | w->filename = malloc(filename_length); 21 | if (w->filename == NULL) return BP_EALLOC; 22 | memcpy(w->filename, filename, filename_length); 23 | 24 | w->fd = open(filename, 25 | O_RDWR | O_APPEND | O_CREAT, 26 | S_IRUSR | S_IRGRP | S_IWGRP | S_IWUSR); 27 | if (w->fd == -1) goto error; 28 | 29 | /* Determine filesize */ 30 | filesize = lseek(w->fd, 0, SEEK_END); 31 | if (filesize == -1) goto error; 32 | 33 | w->filesize = (uint64_t) filesize; 34 | 35 | /* Nullify padding to shut up valgrind */ 36 | memset(&w->padding, 0, sizeof(w->padding)); 37 | 38 | return BP_OK; 39 | 40 | error: 41 | free(w->filename); 42 | return BP_EFILE; 43 | } 44 | 45 | int bp__writer_destroy(bp__writer_t *w) 46 | { 47 | free(w->filename); 48 | w->filename = NULL; 49 | if (close(w->fd)) return BP_EFILE; 50 | return BP_OK; 51 | } 52 | 53 | int bp__writer_fsync(bp__writer_t *w) 54 | { 55 | #ifdef F_FULLFSYNC 56 | /* OSX support */ 57 | return fcntl(w->fd, F_FULLFSYNC); 58 | #else 59 | return fdatasync(w->fd) == 0 ? BP_OK : BP_EFILEFLUSH; 60 | #endif 61 | } 62 | 63 | int bp__writer_compact_name(bp__writer_t *w, char **compact_name) 64 | { 65 | char *filename = malloc(strlen(w->filename) + sizeof(".compact") + 1); 66 | if (filename == NULL) return BP_EALLOC; 67 | 68 | sprintf(filename, "%s.compact", w->filename); 69 | if (access(filename, F_OK) != -1 || errno != ENOENT) { 70 | free(filename); 71 | return BP_ECOMPACT_EXISTS; 72 | } 73 | 74 | *compact_name = filename; 75 | return BP_OK; 76 | } 77 | 78 | int bp__writer_compact_finalize(bp__writer_t *s, bp__writer_t *t) 79 | { 80 | int ret; 81 | char *name, *compacted_name; 82 | 83 | /* save filename and prevent freeing it */ 84 | name = s->filename; 85 | compacted_name = t->filename; 86 | s->filename = NULL; 87 | t->filename = NULL; 88 | 89 | /* close both trees */ 90 | bp__destroy((bp_db_t *) s); 91 | ret = bp_close((bp_db_t *) t); 92 | if (ret != BP_OK) goto fatal; 93 | 94 | if (rename(compacted_name, name) != 0) return BP_EFILERENAME; 95 | 96 | /* reopen source tree */ 97 | ret = bp__writer_create(s, name); 98 | if (ret != BP_OK) goto fatal; 99 | ret = bp__init((bp_db_t *) s); 100 | 101 | fatal: 102 | free(compacted_name); 103 | free(name); 104 | 105 | return ret; 106 | } 107 | 108 | int bp__writer_read(bp__writer_t *w, 109 | const enum comp_type comp, 110 | const uint64_t offset, 111 | uint64_t *size, 112 | void **data) 113 | { 114 | ssize_t bytes_read; 115 | char *cdata; 116 | 117 | if (w->filesize < offset + *size) return BP_EFILEREAD_OOB; 118 | 119 | /* Ignore empty reads */ 120 | if (*size == 0) { 121 | *data = NULL; 122 | return BP_OK; 123 | } 124 | 125 | cdata = malloc(*size); 126 | if (cdata == NULL) return BP_EALLOC; 127 | 128 | bytes_read = pread(w->fd, cdata, (size_t) *size, (off_t) offset); 129 | if ((uint64_t) bytes_read != *size) { 130 | free(cdata); 131 | return BP_EFILEREAD; 132 | } 133 | 134 | /* no compression for head */ 135 | if (comp == kNotCompressed) { 136 | *data = cdata; 137 | } else { 138 | int ret = 0; 139 | 140 | char *uncompressed = NULL; 141 | size_t usize; 142 | 143 | if (bp__uncompressed_length(cdata, *size, &usize) != BP_OK) { 144 | ret = BP_EDECOMP; 145 | } else { 146 | uncompressed = malloc(usize); 147 | if (uncompressed == NULL) { 148 | ret = BP_EALLOC; 149 | } else if (bp__uncompress(cdata, *size, uncompressed, &usize) != BP_OK) { 150 | ret = BP_EDECOMP; 151 | } else { 152 | *data = uncompressed; 153 | *size = usize; 154 | } 155 | } 156 | 157 | free(cdata); 158 | 159 | if (ret != BP_OK) { 160 | free(uncompressed); 161 | return ret; 162 | } 163 | } 164 | 165 | return BP_OK; 166 | } 167 | 168 | int bp__writer_write(bp__writer_t *w, 169 | const enum comp_type comp, 170 | const void *data, 171 | uint64_t *offset, 172 | uint64_t *size) 173 | { 174 | ssize_t written; 175 | uint32_t padding = sizeof(w->padding) - (w->filesize % sizeof(w->padding)); 176 | 177 | /* Write padding */ 178 | if (padding != sizeof(w->padding)) { 179 | written = write(w->fd, &w->padding, (size_t) padding); 180 | if ((uint32_t) written != padding) return BP_EFILEWRITE; 181 | w->filesize += padding; 182 | } 183 | 184 | /* Ignore empty writes */ 185 | if (size == NULL || *size == 0) { 186 | if (offset != NULL) *offset = w->filesize; 187 | return BP_OK; 188 | } 189 | 190 | /* head shouldn't be compressed */ 191 | if (comp == kNotCompressed) { 192 | written = write(w->fd, data, *size); 193 | } else { 194 | int ret; 195 | size_t max_csize = bp__max_compressed_size(*size); 196 | size_t result_size; 197 | char *compressed = malloc(max_csize); 198 | if (compressed == NULL) return BP_EALLOC; 199 | 200 | result_size = max_csize; 201 | ret = bp__compress(data, *size, compressed, &result_size); 202 | if (ret != BP_OK) { 203 | free(compressed); 204 | return BP_ECOMP; 205 | } 206 | 207 | *size = result_size; 208 | written = write(w->fd, compressed, result_size); 209 | free(compressed); 210 | } 211 | 212 | if ((uint64_t) written != *size) return BP_EFILEWRITE; 213 | 214 | /* change offset */ 215 | *offset = w->filesize; 216 | w->filesize += written; 217 | 218 | return BP_OK; 219 | } 220 | 221 | int bp__writer_find(bp__writer_t*w, 222 | const enum comp_type comp, 223 | const uint64_t size, 224 | void *data, 225 | bp__writer_cb seek, 226 | bp__writer_cb miss) 227 | { 228 | int ret = 0; 229 | int match = 0; 230 | uint64_t offset, size_tmp; 231 | 232 | /* Write padding first */ 233 | ret = bp__writer_write(w, kNotCompressed, NULL, NULL, NULL); 234 | if (ret != BP_OK) return ret; 235 | 236 | offset = w->filesize; 237 | size_tmp = size; 238 | 239 | /* Start seeking from bottom of file */ 240 | while (offset >= size) { 241 | ret = bp__writer_read(w, comp, offset - size, &size_tmp, &data); 242 | if (ret != BP_OK) break; 243 | 244 | /* Break if matched */ 245 | if (seek(w, data) == 0) { 246 | match = 1; 247 | break; 248 | } 249 | 250 | offset -= size; 251 | } 252 | 253 | /* Not found - invoke miss */ 254 | if (!match) 255 | ret = miss(w, data); 256 | 257 | return ret; 258 | } 259 | -------------------------------------------------------------------------------- /test/bench-basic.cc: -------------------------------------------------------------------------------- 1 | #include "test.h" 2 | 3 | TEST_START("basic benchmark", "basic-bench") 4 | 5 | const int num = 500000; 6 | const int value_len = 1000; 7 | const int delta = 20000; 8 | int i, start; 9 | 10 | char keys[num][10]; 11 | char value[value_len]; 12 | 13 | /* init keys */ 14 | for (int i = 0; i < num; i++) { 15 | sprintf(keys[i], "%d", i); 16 | } 17 | /* init value */ 18 | for (int i = 0; i < value_len; i++) { 19 | value[i] = 'a' + ((i << 3) | i) % 52; 20 | } 21 | value[value_len - 1] = 0; 22 | 23 | for (start = 0; start < num; start += delta) { 24 | fprintf(stdout, "%d items in db\n", start); 25 | 26 | BENCH_START(write, delta) 27 | for (i = start; i < start + delta; i++) { 28 | bp_sets(&db, keys[i], value); 29 | } 30 | BENCH_END(write, delta) 31 | 32 | BENCH_START(read, start + delta) 33 | for (i = 0; i < start + delta; i++) { 34 | char* value1; 35 | bp_gets(&db, keys[i], &value1); 36 | free(value1); 37 | } 38 | BENCH_END(read, start + delta) 39 | } 40 | 41 | BENCH_START(compact, 0) 42 | bp_compact(&db); 43 | BENCH_END(compact, 0) 44 | 45 | BENCH_START(read_after_compact, num) 46 | for (i = 0; i < num; i++) { 47 | char* value; 48 | bp_gets(&db, keys[i], &value); 49 | free(value); 50 | } 51 | BENCH_END(read_after_compact, num) 52 | 53 | BENCH_START(read_after_compact_with_os_cache, num) 54 | for (i = 0; i < num; i++) { 55 | char* value; 56 | bp_gets(&db, keys[i], &value); 57 | free(value); 58 | } 59 | BENCH_END(read_after_compact_with_os_cache, num) 60 | 61 | BENCH_START(remove, num) 62 | for (i = 0; i < num; i++) { 63 | bp_removes(&db, keys[i]); 64 | } 65 | BENCH_END(remove, num) 66 | 67 | TEST_END("basic benchmark", "basic-bench") 68 | -------------------------------------------------------------------------------- /test/bench-bulk.cc: -------------------------------------------------------------------------------- 1 | #include "test.h" 2 | 3 | TEST_START("bulk set benchmark", "bulk-bench") 4 | 5 | const int num = 500000; 6 | const int delta = 20000; 7 | int i, start; 8 | 9 | char* keys[num]; 10 | 11 | /* init keys */ 12 | 13 | for (start = 0; start < num; start += delta) { 14 | for (i = start; i < start + delta; i++) { 15 | keys[i] = (char*) malloc(20); 16 | sprintf(keys[i], "%0*d", 20, i); 17 | } 18 | 19 | fprintf(stdout, "%d items in db\n", start); 20 | 21 | BENCH_START(bulk, delta) 22 | bp_bulk_sets(&db, 23 | delta, 24 | (const char**) keys + start, 25 | (const char**) keys + start); 26 | BENCH_END(bulk, delta) 27 | 28 | for (i = start; i < start + delta; i++) { 29 | free(keys[i]); 30 | } 31 | } 32 | 33 | /* ensure that results are correct */ 34 | for (i = 0; i < num; i++) { 35 | char* key; 36 | char* value; 37 | key = (char*) malloc(20); 38 | sprintf(key, "%0*d", 20, i); 39 | 40 | assert(bp_gets(&db, key, &value) == BP_OK); 41 | assert(strcmp(value, key) == 0); 42 | 43 | free(key); 44 | free(value); 45 | } 46 | 47 | TEST_END("bulk set benchmark", "bulk-bench") 48 | -------------------------------------------------------------------------------- /test/bench-multithread-get.cc: -------------------------------------------------------------------------------- 1 | #include "test.h" 2 | 3 | const int num = 100000; 4 | const int rnum = 4; 5 | static char* keys[num]; 6 | 7 | void* reader_thread(void* db_) { 8 | bp_db_t* db = (bp_db_t*) db_; 9 | 10 | for (int i = 0; i < num; i++) { 11 | char* value; 12 | bp_gets(db, keys[i], &value); 13 | free(value); 14 | } 15 | 16 | return NULL; 17 | } 18 | 19 | TEST_START("multi-threaded get benchmark", "mt-get-bench") 20 | int i; 21 | pthread_t readers[rnum]; 22 | 23 | for (i = 0; i < num; i++) { 24 | keys[i] = (char*) malloc(20); 25 | sprintf(keys[i], "%0*d", 20, i); 26 | } 27 | 28 | bp_bulk_sets(&db, 29 | num, 30 | (const char**) keys, 31 | (const char**) keys); 32 | 33 | BENCH_START(get, rnum * num) 34 | for (i = 0; i < rnum; i++) { 35 | pthread_create(&readers[i], NULL, reader_thread, (void*) &db); 36 | } 37 | 38 | for (i = 0; i < rnum; i++) { 39 | pthread_join(readers[i], NULL); 40 | } 41 | BENCH_END(get, rnum * num) 42 | TEST_END("multi-threaded get benchmark", "mt-get-bench") 43 | -------------------------------------------------------------------------------- /test/test-api.cc: -------------------------------------------------------------------------------- 1 | #include "test.h" 2 | 3 | int update_cb(void* arg, const bp_value_t* previous, const bp_value_t* curr) { 4 | char* expected = (char*) arg; 5 | assert(strcmp(previous->value, expected) == 0); 6 | 7 | return 1; 8 | } 9 | 10 | int remove_cb(void* arg, const bp_value_t* value) { 11 | char* expected = (char*) arg; 12 | return strcmp(value->value, expected) == 0; 13 | } 14 | 15 | TEST_START("API test", "api") 16 | 17 | const int n = 1000; 18 | char key[100]; 19 | char val[100]; 20 | char expected[100]; 21 | int i; 22 | 23 | for (i = 0; i < n; i++) { 24 | sprintf(key, "some key %d", i); 25 | sprintf(val, "some long long long long long value %d", i); 26 | assert(bp_sets(&db, key, val) == BP_OK); 27 | } 28 | 29 | assert(bp_compact(&db) == BP_OK); 30 | 31 | for (i = 0; i < n; i++) { 32 | sprintf(key, "some key %d", i); 33 | sprintf(val, "some updated long long long long long value %d", i); 34 | sprintf(expected, "some long long long long long value %d", i); 35 | assert(bp_updates(&db, key, val, update_cb, (void*) expected) == BP_OK); 36 | } 37 | 38 | assert(bp_compact(&db) == BP_OK); 39 | 40 | for (i = 0; i < n; i++) { 41 | char* result = NULL; 42 | 43 | sprintf(key, "some key %d", i); 44 | sprintf(expected, "some updated long long long long long value %d", i); 45 | 46 | assert(bp_gets(&db, key, &result) == BP_OK); 47 | assert(strcmp(result, expected) == 0); 48 | 49 | free(result); 50 | } 51 | 52 | /* overwrite every key */ 53 | for (i = 0; i < n; i++) { 54 | sprintf(key, "some key %d", i); 55 | sprintf(val, "some another value %d", i); 56 | assert(bp_sets(&db, key, val) == BP_OK); 57 | } 58 | 59 | for (i = 0; i < n; i++) { 60 | bp_key_t kkey; 61 | bp_value_t result; 62 | bp_value_t previous; 63 | 64 | sprintf(key, "some key %d", i); 65 | 66 | kkey.length = strlen(key) + 1; 67 | kkey.value = key; 68 | 69 | /* new values should be in place */ 70 | sprintf(expected, "some another value %d", i); 71 | assert(bp_get(&db, &kkey, &result) == BP_OK); 72 | assert(strcmp(result.value, expected) == 0); 73 | 74 | /* previous values should be available before compaction */ 75 | sprintf(expected, "some updated long long long long long value %d", i); 76 | assert(bp_get_previous(&db, &result, &previous) == BP_OK); 77 | assert(strcmp(previous.value, expected) == 0); 78 | 79 | free(result.value); 80 | 81 | /* previous of previous ain't exist */ 82 | assert(bp_get_previous(&db, &previous, &result) == BP_ENOTFOUND); 83 | 84 | free(previous.value); 85 | } 86 | 87 | assert(bp_compact(&db) == BP_OK); 88 | 89 | for (i = 0; i < n; i++) { 90 | bp_key_t kkey; 91 | bp_value_t result; 92 | bp_value_t previous; 93 | 94 | sprintf(key, "some key %d", i); 95 | 96 | kkey.length = strlen(key) + 1; 97 | kkey.value = key; 98 | 99 | /* new values should be in place */ 100 | sprintf(expected, "some another value %d", i); 101 | assert(bp_get(&db, &kkey, &result) == BP_OK); 102 | assert(strcmp(result.value, expected) == 0); 103 | 104 | /* previous should be not available after compaction */ 105 | assert(bp_get_previous(&db, &result, &previous) == BP_ENOTFOUND); 106 | 107 | free(result.value); 108 | } 109 | 110 | for (i = 0; i < n; i++) { 111 | sprintf(key, "some key %d", i); 112 | sprintf(expected, "some another value %d", i); 113 | assert(bp_removevs(&db, key, remove_cb, (void*) expected) == BP_OK); 114 | } 115 | 116 | assert(bp_compact(&db) == BP_OK); 117 | 118 | TEST_END("API test", "api") 119 | -------------------------------------------------------------------------------- /test/test-bulk.cc: -------------------------------------------------------------------------------- 1 | #include "test.h" 2 | 3 | int update_cb(void* arg, const bp_value_t* previous, const bp_value_t* curr) { 4 | int i = (unsigned char) previous->value[5]; 5 | return i % 2 == 0 ? 1 : 0; 6 | } 7 | 8 | TEST_START("bulk set test", "bulk-set") 9 | /* write some stuff */ 10 | const int n = 128; 11 | int i = 0; 12 | char key[100]; 13 | char* keys[n]; 14 | 15 | sprintf(key, "key: x"); 16 | for (i = 0; i < n; i++) { 17 | key[5] = i << 1; 18 | assert(bp_sets(&db, key, key) == BP_OK); 19 | } 20 | 21 | for (i = 0; i < n; i++) { 22 | keys[i] = (char*) malloc(100); 23 | assert(keys[i] != NULL); 24 | 25 | sprintf(keys[i], "key: x"); 26 | keys[i][5] = (i << 1) + 1; 27 | } 28 | 29 | assert(bp_bulk_sets(&db, n, (const char**) keys, (const char**) keys) == 30 | BP_OK); 31 | 32 | /* just for sanity_check */ 33 | assert(bp_bulk_updates(&db, 34 | n, 35 | (const char**) keys, 36 | (const char**) keys, 37 | update_cb, 38 | NULL) == BP_OK); 39 | 40 | for (i = 0; i < n; i++) { 41 | free(keys[i]); 42 | } 43 | 44 | for (i = 0; i < n; i++) { 45 | char* value; 46 | 47 | key[5] = i << 1; 48 | assert(bp_gets(&db, key, &value) == BP_OK); 49 | assert(strcmp(key, value) == 0); 50 | free(value); 51 | 52 | key[5] = (i << 1) + 1; 53 | assert(bp_gets(&db, key, &value) == BP_OK); 54 | assert(strcmp(key, value) == 0); 55 | free(value); 56 | } 57 | 58 | TEST_END("range get test", "range") 59 | -------------------------------------------------------------------------------- /test/test-corruption.cc: -------------------------------------------------------------------------------- 1 | #include "test.h" 2 | 3 | TEST_START("database corruption test", "db-corrupt") 4 | const int n = 1024; 5 | char key[100]; 6 | char val[100]; 7 | char expected[100]; 8 | int i, fd, filesize, matched; 9 | 10 | for (i = 0; i < n; i++) { 11 | sprintf(key, "some key %d", i); 12 | sprintf(val, "some long long long long long value %d", i); 13 | assert(bp_sets(&db, key, val) == BP_OK); 14 | } 15 | 16 | assert(bp_close(&db) == BP_OK); 17 | 18 | /* corrupt file by zeroing last 2k bytes of it */ 19 | fd = open(__db_file, O_RDWR, S_IWUSR | S_IRUSR); 20 | assert(fd != -1); 21 | 22 | char buff[13589]; 23 | memset((void*) buff, 0xff, sizeof(buff)); 24 | 25 | filesize = lseek(fd, 0, SEEK_END); 26 | assert(filesize != -1); 27 | assert(pwrite(fd, 28 | (void*) buff, 29 | sizeof(buff), 30 | filesize - sizeof(buff)) == sizeof(buff)); 31 | assert(close(fd) == 0); 32 | 33 | assert(bp_open(&db, __db_file) == BP_OK); 34 | 35 | matched = 0; 36 | for (i = 0; i < n; i++) { 37 | sprintf(key, "some key %d", i); 38 | sprintf(expected, "some long long long long long value %d", i); 39 | char* actual; 40 | if (bp_gets(&db, key, &actual) != BP_OK) continue; 41 | 42 | assert(strcmp(expected, actual) == 0); 43 | free(actual); 44 | 45 | matched++; 46 | } 47 | assert(matched > 3 * (n >> 2)); 48 | TEST_END("database corruption test", "db-corrupt") 49 | -------------------------------------------------------------------------------- /test/test-range.cc: -------------------------------------------------------------------------------- 1 | #include "test.h" 2 | 3 | void range_cb(void* matched, const bp_key_t* key, const bp_value_t* value) { 4 | /* add '\0' to values */ 5 | (*(int*) matched)++; 6 | } 7 | 8 | TEST_START("range get test", "range") 9 | /* write some stuff */ 10 | const int n = 250; 11 | int i = 0; 12 | char key[100]; 13 | char value[100]; 14 | int matched; 15 | 16 | sprintf(key, "key: x"); 17 | sprintf(value, "val: x"); 18 | for (i = 1; i < n; i++) { 19 | key[5] = i; 20 | value[5] = i; 21 | assert(bp_sets(&db, key, value) == BP_OK); 22 | } 23 | 24 | matched = 0; 25 | 26 | /* try getting 1 key-value */ 27 | bp_get_ranges(&db, "key: \x15", "key: \x15", range_cb, &matched); 28 | 29 | assert(matched == 1); 30 | 31 | matched = 0; 32 | 33 | /* try getting < 32 key-value (in one leaf-page) */ 34 | bp_get_ranges(&db, "key: \x05", "key: \x1f", range_cb, &matched); 35 | 36 | assert(matched == (0x1f - 0x05 + 1)); 37 | 38 | matched = 0; 39 | 40 | /* try getting > 32 key-value (in multiple leaf-pages) */ 41 | bp_get_ranges(&db, "key: \x12", "key: \x5f", range_cb, &matched); 42 | 43 | assert(matched == (0x5f - 0x12 + 1)); 44 | 45 | matched = 0; 46 | 47 | /* try getting all key-values */ 48 | bp_get_ranges(&db, "key: \x01", "key: \xfa", range_cb, &matched); 49 | 50 | assert(matched == 249); 51 | TEST_END("range get test", "range") 52 | -------------------------------------------------------------------------------- /test/test-reopen.cc: -------------------------------------------------------------------------------- 1 | #include "test.h" 2 | 3 | TEST_START("reopen test", "reopen") 4 | char* result = NULL; 5 | 6 | /* reopen empty database */ 7 | assert(bp_close(&db) == BP_OK); 8 | assert(bp_open(&db, __db_file) == BP_OK); 9 | assert(bp_gets(&db, "key-1", &result) == BP_ENOTFOUND); 10 | 11 | /* write some stuff */ 12 | assert(bp_sets(&db, "key-1", "val-1") == BP_OK); 13 | assert(bp_gets(&db, "key-1", &result) == BP_OK); 14 | assert(strncmp(result, "val-1", 5) == 0); 15 | free(result); 16 | result = NULL; 17 | 18 | /* reopen database */ 19 | assert(bp_close(&db) == BP_OK); 20 | assert(bp_open(&db, __db_file) == BP_OK); 21 | 22 | /* ensure that stuff is still in */ 23 | assert(bp_gets(&db, "key-1", &result) == BP_OK); 24 | assert(strncmp(result, "val-1", 5) == 0); 25 | free(result); 26 | result = NULL; 27 | 28 | /* add another stuff */ 29 | assert(bp_sets(&db, "key-2", "val-2") == BP_OK); 30 | assert(bp_gets(&db, "key-2", &result) == BP_OK); 31 | assert(strncmp(result, "val-2", 5) == 0); 32 | free(result); 33 | result = NULL; 34 | TEST_END("reopen test", "reopen") 35 | -------------------------------------------------------------------------------- /test/test-threaded-rw.cc: -------------------------------------------------------------------------------- 1 | #include "test.h" 2 | 3 | const int items = 1000; 4 | const int times = 100; 5 | 6 | void* test_reader(void* db_) { 7 | bp_db_t* db = (bp_db_t*) db_; 8 | 9 | char key[20]; 10 | char* value; 11 | 12 | for (int j = 0; j < times; j++) { 13 | for (int i = 0; i < items; i++) { 14 | sprintf(key, "%d", i); 15 | if (bp_gets(db, key, &value) == BP_OK) { 16 | assert(strcmp(key, value) == 0); 17 | free(value); 18 | } 19 | } 20 | } 21 | 22 | return NULL; 23 | } 24 | 25 | void* test_writer(void* db_) { 26 | bp_db_t* db = (bp_db_t*) db_; 27 | 28 | char key[20]; 29 | int ret; 30 | 31 | for (int j = 0; j < times; j++) { 32 | for (int i = 0; i < items; i++) { 33 | sprintf(key, "%d", i); 34 | ret = bp_sets(db, key, key); 35 | assert(ret == BP_OK); 36 | usleep(30); 37 | } 38 | } 39 | 40 | return NULL; 41 | } 42 | 43 | void* test_remover(void* db_) { 44 | bp_db_t* db = (bp_db_t*) db_; 45 | 46 | char key[20]; 47 | int ret; 48 | 49 | for (int i = 0; i < items; i++) { 50 | sprintf(key, "%d", i); 51 | bp_removes(db, key); 52 | usleep(100); 53 | } 54 | 55 | return NULL; 56 | } 57 | 58 | void* test_compact(void* db_) { 59 | bp_db_t* db = (bp_db_t*) db_; 60 | int ret; 61 | 62 | for (int i = 0; i < times; i++) { 63 | usleep(3300); 64 | ret = bp_compact(db); 65 | assert(ret == BP_OK); 66 | } 67 | 68 | return NULL; 69 | } 70 | 71 | TEST_START("threaded read/write test", "threaded-rw") 72 | 73 | const int n = 2; 74 | pthread_t readers[n]; 75 | pthread_t writers[n]; 76 | pthread_t removers[n]; 77 | pthread_t compact; 78 | 79 | for (int i = 0; i < n; i++) { 80 | assert(pthread_create(&readers[i], NULL, test_reader, (void*) &db) == 0); 81 | assert(pthread_create(&writers[i], NULL, test_writer, (void*) &db) == 0); 82 | assert(pthread_create(&removers[i], NULL, test_remover, (void*) &db) == 0); 83 | } 84 | assert(pthread_create(&compact, NULL, test_compact, (void*) &db) == 0); 85 | 86 | for (int i = 0; i < n; i++) { 87 | assert(pthread_join(readers[i], NULL) == 0); 88 | assert(pthread_join(writers[i], NULL) == 0); 89 | assert(pthread_join(removers[i], NULL) == 0); 90 | } 91 | assert(pthread_join(compact, NULL) == 0); 92 | 93 | TEST_END("threaded read/write test", "threaded-rw") 94 | -------------------------------------------------------------------------------- /test/test.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | #define TRY_REMOVE(db_file) \ 14 | if (access("/tmp/" db_file ".bp", F_OK) == 0) { \ 15 | assert(unlink("/tmp/" db_file ".bp") == 0); \ 16 | } \ 17 | if (access("/tmp/" db_file ".bp.compact", F_OK) == 0) { \ 18 | assert(unlink("/tmp/" db_file ".bp.compact") == 0); \ 19 | } 20 | 21 | #define TEST_START(name, db_file) \ 22 | int main(void) { \ 23 | fprintf(stdout, "-- %s --\n", name); \ 24 | TRY_REMOVE(db_file) \ 25 | bp_db_t db; \ 26 | assert(bp_open(&db, "/tmp/" db_file ".bp") == 0); \ 27 | const char *__db_file = "/tmp/" db_file ".bp"; 28 | 29 | #define TEST_END(name, db_file) \ 30 | assert(bp_close(&db) == 0); \ 31 | /* cleanup */ \ 32 | TRY_REMOVE(db_file) \ 33 | fclose(stdout); \ 34 | return 0;\ 35 | } 36 | 37 | #define BENCH_START(name, num) \ 38 | timeval __bench_##name##_start; \ 39 | gettimeofday(&__bench_##name##_start, NULL); 40 | 41 | #define BENCH_END(name, num) \ 42 | timeval __bench_##name##_end; \ 43 | gettimeofday(&__bench_##name##_end, NULL); \ 44 | double __bench_##name##_total = \ 45 | __bench_##name##_end.tv_sec - \ 46 | __bench_##name##_start.tv_sec + \ 47 | __bench_##name##_end.tv_usec * 1e-6 - \ 48 | __bench_##name##_start.tv_usec * 1e-6; \ 49 | if ((num) != 0) { \ 50 | fprintf(stdout, #name " : %f ops/sec\n", \ 51 | (num) / __bench_##name##_total); \ 52 | } else { \ 53 | fprintf(stdout, #name " : %fs\n", \ 54 | __bench_##name##_total); \ 55 | } 56 | --------------------------------------------------------------------------------