├── README.md ├── shift_or.py ├── shift_and.py ├── gcd.py ├── shift_and.c ├── shift_and.cpp ├── shift_or.cpp ├── bitmap.py ├── kmp_search.py ├── qsort.c ├── bptree.c ├── bptree_v1.5.c └── bptree_v2.c /README.md: -------------------------------------------------------------------------------- 1 | algorithm 2 | ========= 3 | 4 | Data Structure and Algorithms 5 | 6 | Some useful data structure & algorithms. 7 | 8 | e.g. , greatest common divisor, bitmap, KMP-search, and so on. 9 | -------------------------------------------------------------------------------- /shift_or.py: -------------------------------------------------------------------------------- 1 | def pre(t): 2 | B = {} 3 | for i in xrange(len(t)): 4 | c = t[i] 5 | if c in B: 6 | B[c] ^= (1 << i) 7 | else: 8 | B[c] = ~(1 << i) 9 | return B 10 | 11 | def shift_or_match(s, t): 12 | B = pre(t) 13 | D = -1 14 | m = len(t) 15 | mask = ~(1 << (m - 1)) 16 | res = [] 17 | for i in xrange(len(s)): 18 | c = s[i] 19 | if c in B: 20 | D = (D << 1) | B[c] 21 | else: 22 | D = -1 23 | if (~(mask | D)): 24 | res.append(i - m + 1) 25 | return res 26 | 27 | if __name__ == '__main__': 28 | s = raw_input("Source string:\n") 29 | t = raw_input("Pattern string:\n") 30 | res = shift_or_match(s, t) 31 | print 'Find "%s" in "%s" by positions: ' % (t, s), res 32 | -------------------------------------------------------------------------------- /shift_and.py: -------------------------------------------------------------------------------- 1 | #coding=utf8 2 | 3 | def preprocess(t): 4 | B = {} 5 | for i in xrange(len(t)): 6 | c = t[i] 7 | if c in B: 8 | B[c] |= (1 << i) 9 | else: 10 | B[c] = 1 << i 11 | return B 12 | 13 | def shift_and_match(s, t): 14 | B = preprocess(t) 15 | D = 0 16 | m = len(t) 17 | mask = 1 << (m - 1) 18 | res = [] 19 | for i in xrange(len(s)): 20 | c = s[i] 21 | if c in B: 22 | D = ((D << 1) | 1) & B[c] 23 | else: 24 | D = 0 25 | if (D & mask): 26 | res.append(i - m + 1) 27 | return res 28 | 29 | if __name__ == '__main__': 30 | s = raw_input("Source string:\n") 31 | t = raw_input("Pattern string:\n") 32 | res = shift_and_match(s, t) 33 | print 'Find "%s" in "%s" by positions: ' % (t, s), res 34 | -------------------------------------------------------------------------------- /gcd.py: -------------------------------------------------------------------------------- 1 | #coding=utf8 2 | 3 | """ 4 | Greatest Common Divisor by Euclid's algorithm. 5 | gcd(n, m) = gcd(m, n mod m) 6 | 7 | n = km+b => n%m = b = n-km 8 | 1, if gcd(n,m) = r 9 | then n=pr and m=qr => n%m = pr-kqr = (p-kq)r 10 | So, r is common divisor of (m,n%m) 11 | 12 | 2, if gcd(m,n%m) = r 13 | then m=pr and n%m=qr => n-km=qr => n=qr+km=qr+kpr=(q+kp)r 14 | So, r is common divisor of (n,m) 15 | 16 | Since 1 and 2, we know gcd(n,m) = gcd(m,n%m) 17 | """ 18 | 19 | def gcd(n, m): 20 | if n < m: 21 | t = m 22 | m = n 23 | n = t 24 | 25 | while m: 26 | r = n%m 27 | n = m 28 | m = r 29 | 30 | return n 31 | 32 | 33 | 34 | def lcm(n, m): 35 | """ 36 | Least Common Multiple 37 | lcm(n,m) = n*m/gcd(n,m) 38 | """ 39 | return n*(m/gcd(n,m)) 40 | 41 | 42 | if __name__ == '__main__': 43 | print gcd(25,30) 44 | print lcm(30,25) 45 | 46 | -------------------------------------------------------------------------------- /shift_and.c: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013 Peking University. 2 | // Author: Xiaosong Rong (rongxiaosong@gmail.com) 3 | 4 | #include 5 | #include 6 | #define MAXSIZE 200 7 | 8 | void pre(const char *t, long B[]) 9 | { 10 | int i, c; 11 | for (i = 0; (c = t[i]) != '\0'; i++) { 12 | B[c] |= (1 << i); 13 | } 14 | } 15 | 16 | int shift_and_match(const char *s, const char *t, int res[]) 17 | { 18 | int i, c, k, m; 19 | long B[256] = {0}, D = 0; 20 | long mask; 21 | m = strlen(t); 22 | mask = 1 << (strlen(t) - 1); 23 | pre(t, B); 24 | for (i = 0, k = 0; (c = s[i]) != '\0'; i++) { 25 | D = ((D << 1) | 1) & B[c]; 26 | if (D & mask) 27 | res[k++] = i - m + 1; 28 | } 29 | return k; 30 | } 31 | 32 | main() 33 | { 34 | char s[MAXSIZE], t[MAXSIZE]; 35 | int res[MAXSIZE], i, n; 36 | scanf("%s%s", s, t); 37 | n = shift_and_match(s, t, res); 38 | printf("Find %d positions:\n", n); 39 | for (i = 0; i < n; i++) 40 | printf("%d\n", res[i]); 41 | } 42 | -------------------------------------------------------------------------------- /shift_and.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013 Peking University. 2 | // Author: Xiaosong Rong (rongxiaosong@gmail.com) 3 | 4 | #include 5 | #include 6 | #include 7 | using namespace std; 8 | 9 | void preprocess(unsigned int B[], string T, int n) 10 | { 11 | unsigned int shift=1; 12 | for (int i=0; i and_match(string S, string T) 19 | { 20 | int m=S.length(), n=T.length(); 21 | unsigned int B[256], D=0, mask; 22 | for (int i=0; i<256; i++) 23 | B[i] = 0; 24 | preprocess(B, T, n); 25 | vector res; 26 | 27 | mask = 1 << (n - 1); 28 | for (int i=0; i> S >> T; 41 | vector res=and_match(S,T); 42 | for (vector::iterator it=res.begin(); it!=res.end(); ++it) 43 | cout << *it << endl; 44 | return 0; 45 | } 46 | -------------------------------------------------------------------------------- /shift_or.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013 Peking University. 2 | // Author: Xiaosong Rong (rongxiaosong@gmail.com) 3 | 4 | #include 5 | #include 6 | #include 7 | using namespace std; 8 | 9 | void preprocess(unsigned int B[], string T, int n) 10 | { 11 | unsigned int shift=1; 12 | for (int i=0; i or_match(string S, string T) 19 | { 20 | int m=S.length(), n=T.length(); 21 | unsigned int B[256], D=~0, mask; 22 | for (int i=0; i<256; i++) 23 | B[i] = ~0; // every bit is set to "1" 24 | preprocess(B, T, n); 25 | vector res; 26 | 27 | mask = ~(1 << (n - 1)); 28 | for (int i=0; i> S >> T; 41 | vector res=or_match(S,T); 42 | for (vector::iterator it=res.begin(); it!=res.end(); ++it) 43 | cout << *it << endl; 44 | return 0; 45 | } 46 | -------------------------------------------------------------------------------- /bitmap.py: -------------------------------------------------------------------------------- 1 | #coding=utf8 2 | 3 | """ 4 | Bitmap by python array. 5 | 6 | python array: http://docs.python.org/2/library/array.html 7 | """ 8 | import array 9 | import hashlib 10 | 11 | 12 | class BitMap(): 13 | ''' 14 | array(0b00000000, 0b00000000, 0b00000000, ... ) 15 | ''' 16 | 17 | def __init__(self, n): 18 | self.arr = array.array('B', [0]*n) 19 | self.length = n 20 | 21 | def set_bit(self, i): 22 | i, r = divmod(i, 8) 23 | self.arr[i] |= (1< m, so overall complexity of KMP algorithm is O(n) 9 | """ 10 | 11 | def preprocess(t): 12 | m = len(t) 13 | next = [0]*(m) 14 | i = 1 15 | j = 0 16 | """ 17 | In this loop, i+=1 is excuted exactly m times. 18 | j=next[j-1], it will decrease the value of j by at least 1. 19 | j can only be increased at most m times, 20 | so j=next[j-1] will be excuted at most m times. 21 | So, overall loop requires at most 2m=O(m) steps. 22 | """ 23 | while in-1, remain of t cannot match any substring of remain of s 48 | while j 5 | #include 6 | #include 7 | 8 | // typedef unsigned int size_t; 9 | #define MAX_SIZE 500 10 | #define swap(p, q, size) { \ 11 | char *t; \ 12 | t = malloc(size); \ 13 | memcpy(t, p, size); \ 14 | memcpy(p, q, size); \ 15 | memcpy(q, t, size); \ 16 | free(t); \ 17 | } 18 | 19 | /* 20 | void swap(const void *p, const void *q, size_t size) 21 | { 22 | char *t; 23 | t = malloc(size); 24 | memcpy(t, p, size); 25 | memcpy((char *)p, q, size); 26 | memcpy((char *)q, t, size); 27 | free(t); 28 | } 29 | */ 30 | 31 | void _qsort(void *base, size_t n, size_t size, int (*cmp)(const void *, const void *)) 32 | { 33 | char *p, *q, *hi, *lo; 34 | size_t i; 35 | if (n < 2) 36 | return ; 37 | if (n == 2) { 38 | p = (char *)base + size; 39 | if ((*cmp)(base, p) > 0) 40 | swap(base, p, size) 41 | return ; 42 | } 43 | lo = (char *)base; 44 | hi = (char *)base + size * (n - 1); 45 | p = lo + size; 46 | q = hi; 47 | while (1) { 48 | for (; p <= hi && (*cmp)(p, lo) < 0; p += size) ; 49 | for (; q > lo && (*cmp)(q, lo) > 0; q -= size) ; 50 | if (p < q) 51 | swap(p, q, size) 52 | else { 53 | if (q != lo) 54 | swap(lo, q, size) 55 | break; 56 | } 57 | } 58 | _qsort(lo, (q - lo) / size, size, cmp); 59 | _qsort(q + size, (hi - q) / size, size, cmp); 60 | return ; 61 | } 62 | 63 | int intcmp(const int *x, const int *y) 64 | { 65 | return *x - *y; 66 | } 67 | 68 | #define STR_LEN 20 69 | main() 70 | { 71 | int i, n; 72 | // char base[MAX_SIZE][STR_LEN]; 73 | int nums[MAX_SIZE]; 74 | // for (i = 0; i < MAX_SIZE && scanf("%s", base[i]) != EOF; i++) ; 75 | for (i = 0; i < MAX_SIZE && scanf("%d", &nums[i]) != EOF; i++) ; 76 | n = i; 77 | for (i = 0; i < n; i++) 78 | printf("%d ", nums[i]); 79 | putchar('\n'); 80 | 81 | // my _qsort function 82 | // _qsort(base, n, STR_LEN, (int (*)(const void *, const void *))strcmp); 83 | _qsort(nums, n, sizeof(int), (int (*)(const void *, const void *))intcmp); 84 | 85 | for (i = 0; i < n; i++) 86 | printf("%d ", nums[i]); 87 | putchar('\n'); 88 | } 89 | -------------------------------------------------------------------------------- /bptree.c: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013 Peking University. 2 | // Author: Xiaosong Rong (rongxiaosong@gmail.com) 3 | // 4 | // C implementation of B+ tree. 5 | 6 | #include 7 | #include 8 | #include 9 | #include 10 | #define MAX_KEY_LEN 15 11 | 12 | // each key is a string pointer 13 | // num_keys if the number of keys, number of pointers = num_keys + 1 14 | typedef struct node { 15 | void **pointers; 16 | char **keys; 17 | struct node *parent; 18 | int num_keys; 19 | bool is_leaf; 20 | struct node *next; // used for printing 21 | } node; 22 | 23 | typedef struct record { 24 | int value; 25 | } record; 26 | 27 | int size = 3; // number of pointers of a node 28 | 29 | 30 | void print_tree(node *root); 31 | record *find(node *root, char *key); 32 | node *find_leaf(node *root, char *key); 33 | 34 | // Insertion 35 | record *new_record(int value); 36 | node *make_new_node(); 37 | node *make_new_leaf(); 38 | node *make_new_tree(char *key, record *rec); 39 | node *make_new_root(node *left, node *right, char *key); 40 | node *insert(node *root, char *key, int value); 41 | node *insert_into_parent(node *root, node *left, node *right, char *key); 42 | void insert_into_node(node *nd, node *right, int index, char *key); 43 | node *insert_into_node_after_splitting(node *root, node *nd, node *right, 44 | int index, char *key); 45 | node *insert_into_leaf_after_splitting(node *root, node *leaf, int index, 46 | char *key, record *rec); 47 | void insert_into_leaf(node *leaf, int index, char *key, record *rec); 48 | 49 | // Deletion 50 | void destroy_node(node *nd); 51 | void *remove_entry(node *nd, int index); 52 | node *delete(node *root, char *key); 53 | node *delete_entry(node *root, node *nd, int index); 54 | node *adjust_root(node *root); 55 | int get_node_index(node *nd); 56 | node *coalesce_nodes(node *root, node *nd, node *neighbor, int nd_index); 57 | void distribute_nodes(node *nd, node *neighbor, int nd_index); 58 | 59 | 60 | // for test 61 | void test_find(node *root) 62 | { 63 | char *key; 64 | record *r; 65 | key = malloc(MAX_KEY_LEN); 66 | while (1){ 67 | scanf("%s", key); 68 | if (strcmp(key, "exit") == 0) 69 | break; 70 | r = find(root, key); 71 | if (r == NULL){ 72 | printf("Not found!!\n"); 73 | continue; 74 | } 75 | printf("Record of %s: %d\n", key, r->value); 76 | } 77 | } 78 | 79 | node *test_delete(node *root) 80 | { 81 | char *key; 82 | key = malloc(MAX_KEY_LEN); 83 | while (1){ 84 | scanf("%s", key); 85 | if (strcmp(key, "exit") == 0) 86 | break; 87 | root = delete(root, key); 88 | print_tree(root); 89 | } 90 | return root; 91 | } 92 | // end of test 93 | 94 | 95 | void print_tree(node *root) 96 | { 97 | node *p, *p_down; 98 | int i; 99 | if (root == NULL){ 100 | printf("Empty tree!\n"); 101 | return; 102 | } 103 | p = root; 104 | p_down = root; 105 | while (!p->is_leaf){ 106 | for (i = 0; i < p->num_keys; i++) 107 | printf("%s ", p->keys[i]); 108 | // printf("%d ", p->keys[i][0]); // for test 109 | printf("| "); 110 | p = p->next; 111 | if (!p){ 112 | p_down = p_down->pointers[0]; // next level 113 | p = p_down; 114 | printf("\n"); 115 | } 116 | } 117 | 118 | while (p){ 119 | for (i = 0; i < p->num_keys; i++) 120 | printf("%s ", p->keys[i]); 121 | // printf("%d ", p->keys[i][0]); // for test 122 | printf(" | "); 123 | p = p->pointers[size-1]; 124 | } 125 | printf("\n"); 126 | } 127 | 128 | record *find(node *root, char *key) 129 | { 130 | node *leaf; 131 | int i; 132 | leaf = find_leaf(root, key); 133 | if (leaf == NULL) 134 | return NULL; 135 | for (i = 0; i < leaf->num_keys && strcmp(leaf->keys[i], key) != 0; i++) 136 | ; 137 | if (i == leaf->num_keys) 138 | return NULL; 139 | return (record *)leaf->pointers[i]; 140 | } 141 | 142 | node *find_leaf(node *root, char *key) 143 | { 144 | node *nd; 145 | int i; 146 | if (root == NULL) 147 | return root; 148 | nd = root; 149 | while (!nd->is_leaf){ 150 | for (i = 0; i < nd->num_keys && strcmp(nd->keys[i], key) <= 0; i++) 151 | ; 152 | nd = (node *)nd->pointers[i]; 153 | } 154 | return nd; 155 | } 156 | 157 | record *new_record(int value) 158 | { 159 | record *rec; 160 | rec = (record *)malloc(sizeof(record)); 161 | rec->value = value; 162 | return rec; 163 | } 164 | 165 | node *make_new_node() 166 | { 167 | node *nd; 168 | nd = (node *)malloc(sizeof(node)); 169 | nd->pointers = malloc(size * sizeof(void *)); 170 | nd->keys = malloc((size - 1) * sizeof(char *)); 171 | nd->parent = NULL; 172 | nd->num_keys = 0; 173 | nd->is_leaf = false; 174 | nd->next = NULL; 175 | return nd; 176 | } 177 | 178 | node *make_new_leaf() 179 | { 180 | node *leaf; 181 | leaf = make_new_node(); 182 | leaf->is_leaf = true; 183 | return leaf; 184 | } 185 | 186 | node *make_new_tree(char *key, record *rec) 187 | { 188 | node *root; 189 | root = make_new_leaf(); 190 | root->pointers[0] = rec; 191 | root->keys[0] = malloc(MAX_KEY_LEN); 192 | strcpy(root->keys[0], key); 193 | root->pointers[size-1] = NULL; 194 | root->num_keys++; 195 | return root; 196 | } 197 | 198 | node *make_new_root(node *left, node *right, char *key) 199 | { 200 | node *root; 201 | root = make_new_node(); 202 | root->pointers[0] = left; 203 | root->pointers[1] = right; 204 | root->keys[0] = malloc(MAX_KEY_LEN); 205 | strcpy(root->keys[0], key); 206 | root->num_keys++; 207 | left->parent = root; 208 | right->parent = root; 209 | return root; 210 | } 211 | 212 | node *insert(node *root, char *key, int value) 213 | { 214 | record *rec; 215 | node *leaf; 216 | int index, cond; 217 | leaf = find_leaf(root, key); 218 | if (!leaf){ // cannot find the leaf, the tree is empty 219 | rec = new_record(value); 220 | return make_new_tree(key, rec); 221 | } 222 | for (index = 0; index < leaf->num_keys && (cond = strcmp(leaf->keys[index], key)) < 0; index++) 223 | ; 224 | if (cond == 0) // ignore duplicates 225 | return root; 226 | rec = new_record(value); 227 | if (leaf->num_keys < size - 1){ 228 | insert_into_leaf(leaf, index, key, rec); 229 | return root; // the root remains unchanged 230 | } 231 | return insert_into_leaf_after_splitting(root, leaf, index, key, rec); 232 | } 233 | 234 | node *insert_into_parent(node *root, node *left, node *right, char *key) 235 | { 236 | node *parent; 237 | int index, i; 238 | parent = left->parent; 239 | 240 | if (parent == NULL){ 241 | return make_new_root(left, right, key); 242 | } 243 | 244 | for (index = 0; index < parent->num_keys && parent->pointers[index] != left; index++); 245 | ; 246 | if (parent->num_keys < size - 1){ 247 | insert_into_node(parent, right, index, key); 248 | return root; // the root remains unchanged 249 | } 250 | return insert_into_node_after_splitting(root, parent, right, index, key); 251 | } 252 | 253 | void insert_into_node(node *nd, node *right, int index, char *key) 254 | { 255 | int i; 256 | for (i = nd->num_keys; i > index; i--){ 257 | nd->keys[i] = nd->keys[i-1]; 258 | nd->pointers[i+1] = nd->pointers[i]; 259 | } 260 | nd->keys[index] = malloc(MAX_KEY_LEN); 261 | strcpy(nd->keys[index], key); 262 | nd->pointers[index+1] = right; 263 | nd->num_keys++; 264 | } 265 | 266 | node *insert_into_node_after_splitting(node *root, node *nd, node *right, int index, char *key) 267 | { 268 | int i, split; 269 | node **temp_ps, *new_nd, *child; 270 | char **temp_ks, *new_key; 271 | temp_ps = malloc((size + 1) * sizeof(node *)); 272 | temp_ks = malloc(size * sizeof(char *)); 273 | 274 | for (i = 0; i < size + 1; i++){ 275 | if (i == index + 1) 276 | temp_ps[i] = right; 277 | else if (i < index + 1) 278 | temp_ps[i] = nd->pointers[i]; 279 | else 280 | temp_ps[i] = nd->pointers[i-1]; 281 | } 282 | for (i = 0; i < size; i++){ 283 | if (i == index){ 284 | temp_ks[i] = malloc(MAX_KEY_LEN); 285 | strcpy(temp_ks[i], key); 286 | } 287 | else if (i < index) 288 | temp_ks[i] = nd->keys[i]; 289 | else 290 | temp_ks[i] = nd->keys[i-1]; 291 | } 292 | 293 | 294 | split = size % 2 ? size / 2 + 1 : size / 2; // split is #pointers 295 | nd->num_keys = split - 1; 296 | for (i = 0; i < split - 1; i++){ 297 | nd->pointers[i] = temp_ps[i]; 298 | nd->keys[i] = temp_ks[i]; 299 | } 300 | nd->pointers[i] = temp_ps[i]; // i == split - 1 301 | new_key = temp_ks[split - 1]; 302 | 303 | new_nd = make_new_node(); 304 | new_nd->num_keys = size - split; 305 | for (++i; i < size; i++){ 306 | new_nd->pointers[i - split] = temp_ps[i]; 307 | new_nd->keys[i - split] = temp_ks[i]; 308 | } 309 | new_nd->pointers[i - split] = temp_ps[i]; 310 | new_nd->parent = nd->parent; 311 | for (i = 0; i <= new_nd->num_keys; i++){ // #pointers == num_keys + 1 312 | child = (node *)(new_nd->pointers[i]); 313 | child->parent = new_nd; 314 | } 315 | new_nd->next = nd->next; 316 | nd->next = new_nd; 317 | 318 | free(temp_ps); 319 | free(temp_ks); 320 | return insert_into_parent(root, nd, new_nd, new_key); 321 | } 322 | 323 | void insert_into_leaf(node *leaf, int index, char *key, record *rec) 324 | { 325 | int i; 326 | for (i = leaf->num_keys; i > index; i--){ 327 | leaf->keys[i] = leaf->keys[i-1]; 328 | leaf->pointers[i] = leaf->pointers[i-1]; 329 | } 330 | leaf->keys[index] = malloc(MAX_KEY_LEN); 331 | strcpy(leaf->keys[index], key); 332 | leaf->pointers[index] = rec; 333 | leaf->num_keys++; 334 | } 335 | 336 | node *insert_into_leaf_after_splitting(node *root, node *leaf, int index, char *key, record *rec) 337 | { 338 | node *new_leaf; 339 | record **temp_ps; 340 | char **temp_ks, *new_key; 341 | int i, split; 342 | 343 | temp_ps = malloc(size * sizeof(record *)); 344 | temp_ks = malloc(size * sizeof(char *)); 345 | for (i = 0; i < size; i++){ 346 | if (i == index){ 347 | temp_ps[i] = rec; 348 | temp_ks[i] = malloc(MAX_KEY_LEN); 349 | strcpy(temp_ks[i], key); 350 | } 351 | else if (i < index){ 352 | temp_ps[i] = leaf->pointers[i]; 353 | temp_ks[i] = leaf->keys[i]; 354 | } 355 | else{ 356 | temp_ps[i] = leaf->pointers[i-1]; 357 | temp_ks[i] = leaf->keys[i-1]; 358 | } 359 | } 360 | 361 | split = size / 2; 362 | leaf->num_keys = split; 363 | for (i = 0; i < split; i++){ 364 | leaf->pointers[i] = temp_ps[i]; 365 | leaf->keys[i] = temp_ks[i]; 366 | } 367 | 368 | new_leaf = make_new_leaf(); 369 | new_leaf->num_keys = size - split; 370 | for (; i < size; i++){ 371 | new_leaf->pointers[i - split] = temp_ps[i]; 372 | new_leaf->keys[i - split] = temp_ks[i]; 373 | } 374 | 375 | new_leaf->parent = leaf->parent; 376 | new_leaf->pointers[size - 1] = leaf->pointers[size - 1]; 377 | leaf->pointers[size - 1] = new_leaf; 378 | free(temp_ps); 379 | free(temp_ks); 380 | new_key = new_leaf->keys[0]; 381 | return insert_into_parent(root, leaf, new_leaf, new_key); 382 | } 383 | 384 | node *delete(node *root, char *key) 385 | { 386 | node *leaf; 387 | record *rec; 388 | int i; 389 | leaf = find_leaf(root, key); 390 | if (leaf == NULL) 391 | return root; 392 | for (i = 0; i < leaf->num_keys && strcmp(leaf->keys[i], key) != 0; i++) 393 | ; 394 | if (i == leaf->num_keys) // no such key 395 | return root; 396 | rec = (record *)leaf->pointers[i]; 397 | root = delete_entry(root, leaf, i); 398 | return root; 399 | } 400 | 401 | node *delete_entry(node *root, node *nd, int index) 402 | { 403 | int min_keys, cap, nd_index; 404 | node *neighbor; 405 | 406 | remove_entry(nd, index); 407 | if (nd == root) 408 | return adjust_root(nd); 409 | min_keys = nd->is_leaf ? size / 2 : (size - 1) / 2; 410 | if (nd->num_keys >= min_keys) { 411 | return root; 412 | } 413 | 414 | nd_index = get_node_index(nd); 415 | if (nd_index == 0) 416 | neighbor = nd->parent->pointers[1]; // right neighbor 417 | else 418 | neighbor = nd->parent->pointers[nd_index - 1]; // left neighbor 419 | 420 | cap = nd->is_leaf ? size - 1 : size - 2; 421 | if (neighbor->num_keys + nd->num_keys <= cap) 422 | return coalesce_nodes(root, nd, neighbor, nd_index); 423 | 424 | distribute_nodes(nd, neighbor, nd_index); 425 | return root; 426 | } 427 | 428 | void distribute_nodes(node *nd, node *neighbor, int nd_index) 429 | { 430 | int i; 431 | node *tmp; 432 | if (nd_index != 0) { 433 | if (!nd->is_leaf) 434 | nd->pointers[nd->num_keys + 1] = nd->pointers[nd->num_keys]; 435 | for (i = nd->num_keys; i > 0; i--){ // shift to right by 1 436 | nd->keys[i] = nd->keys[i - 1]; 437 | nd->pointers[i] = nd->pointers[i - 1]; 438 | } 439 | if (!nd->is_leaf){ 440 | nd->keys[0] = nd->parent->keys[nd_index - 1]; 441 | 442 | nd->pointers[0] = neighbor->pointers[neighbor->num_keys]; 443 | tmp = (node *)nd->pointers[0]; 444 | tmp->parent = nd; 445 | neighbor->pointers[neighbor->num_keys] = NULL; 446 | 447 | nd->parent->keys[nd_index - 1] = neighbor->keys[neighbor->num_keys - 1]; 448 | neighbor->keys[neighbor->num_keys - 1] = NULL; 449 | } 450 | else { 451 | nd->keys[0] = neighbor->keys[neighbor->num_keys - 1]; 452 | neighbor->keys[neighbor->num_keys - 1] = NULL; 453 | 454 | nd->pointers[0] = neighbor->pointers[neighbor->num_keys - 1]; 455 | neighbor->pointers[neighbor->num_keys - 1] = NULL; 456 | 457 | // nd->parent->keys[nd_index - 1] = nd->keys[0]; // share the same key with child !! 458 | strcpy(nd->parent->keys[nd_index - 1], nd->keys[0]); 459 | } 460 | } 461 | else { 462 | if (!nd->is_leaf){ 463 | nd->keys[nd->num_keys] = nd->parent->keys[0]; // link to father's key 464 | nd->pointers[nd->num_keys + 1] = neighbor->pointers[0]; 465 | tmp = (node *)nd->pointers[nd->num_keys + 1]; 466 | tmp->parent = nd; 467 | nd->parent->keys[0] = neighbor->keys[0]; // 468 | } 469 | else { 470 | nd->keys[nd->num_keys] = neighbor->keys[0]; 471 | nd->pointers[nd->num_keys] = neighbor->pointers[0]; 472 | // nd->parent->keys[0] = neighbor->keys[1]; // share the same key with chid !! 473 | strcpy(nd->parent->keys[0], neighbor->keys[1]); 474 | } 475 | for (i = 0; i < neighbor->num_keys - 1; i++){ 476 | neighbor->keys[i] = neighbor->keys[i + 1]; 477 | neighbor->pointers[i] = neighbor->pointers[i + 1]; 478 | } 479 | neighbor->keys[i] = NULL; 480 | if (!nd->is_leaf) 481 | neighbor->pointers[i] = neighbor->pointers[i + 1]; 482 | else 483 | neighbor->pointers[i] = NULL; 484 | } 485 | 486 | neighbor->num_keys--; 487 | nd->num_keys++; 488 | 489 | } 490 | 491 | node *coalesce_nodes(node *root, node *nd, node *neighbor, int nd_index) 492 | { 493 | int i, j, start, end; 494 | char *k_prime; 495 | node *tmp, *parent; 496 | 497 | if (nd_index == 0) { // make sure neighbor is on the left 498 | tmp = nd; 499 | nd = neighbor; 500 | neighbor = tmp; 501 | nd_index = 1; 502 | } 503 | parent = nd->parent; 504 | 505 | start = neighbor->num_keys; 506 | if (nd->is_leaf){ 507 | for (i = start, j = 0; j < nd->num_keys; i++, j++){ 508 | neighbor->keys[i] = nd->keys[j]; 509 | neighbor->pointers[i] = nd->pointers[j]; 510 | nd->keys[j] = NULL; 511 | nd->pointers[j] = NULL; 512 | } 513 | neighbor->num_keys += nd->num_keys; 514 | neighbor->pointers[size - 1] = nd->pointers[size - 1]; 515 | } 516 | else { 517 | neighbor->keys[start] = malloc(MAX_KEY_LEN); 518 | strcpy(neighbor->keys[start], parent->keys[nd_index - 1]); 519 | // neighbor->keys[start] = parent->keys[nd_index - 1]; 520 | for (i = start + 1, j = 0; j < nd->num_keys; i++, j++){ 521 | neighbor->keys[i] = nd->keys[j]; 522 | neighbor->pointers[i] = nd->pointers[j]; 523 | } 524 | neighbor->pointers[i] = nd->pointers[j]; 525 | neighbor->num_keys += nd->num_keys + 1; 526 | neighbor->next = nd->next; 527 | 528 | for (i = 0; i <= neighbor->num_keys; i++){ 529 | tmp = (node *)neighbor->pointers[i]; 530 | tmp->parent = neighbor; 531 | } 532 | } 533 | destroy_node(nd); 534 | return delete_entry(root, parent, nd_index); 535 | } 536 | 537 | int get_node_index(node *nd) 538 | { 539 | node *parent; 540 | int i; 541 | parent = nd->parent; 542 | for (i = 0; i < parent->num_keys && parent->pointers[i] != nd; i++) 543 | ; 544 | return i; 545 | } 546 | 547 | void destroy_node(node *nd) 548 | { 549 | free(nd->keys); 550 | free(nd->pointers); 551 | free(nd); 552 | } 553 | 554 | node *adjust_root(node *root) 555 | { 556 | node *new_root; 557 | if (root->num_keys > 0) // at least two childs 558 | return root; 559 | if (!root->is_leaf){ // root has only one child 560 | new_root = root->pointers[0]; 561 | new_root->parent = NULL; 562 | } 563 | else 564 | new_root = NULL; 565 | destroy_node(root); 566 | return new_root; 567 | } 568 | 569 | void *remove_entry(node *nd, int index) 570 | { 571 | int i, index_k; 572 | 573 | if (nd->is_leaf){ 574 | free(nd->keys[index]); 575 | free(nd->pointers[index]); // destroy the record 576 | for (i = index; i < nd->num_keys - 1; i++){ 577 | nd->keys[i] = nd->keys[i + 1]; 578 | nd->pointers[i] = nd->pointers[i + 1]; 579 | } 580 | nd->keys[i] = NULL; 581 | nd->pointers[i] = NULL; 582 | } 583 | else{ 584 | index_k = index - 1; // index_p == index 585 | free(nd->keys[index_k]); 586 | for (i = index_k; i < nd->num_keys - 1; i++){ 587 | nd->keys[i] = nd->keys[i + 1]; 588 | nd->pointers[i + 1] = nd->pointers[i + 2]; 589 | } 590 | nd->keys[i] = NULL; 591 | nd->pointers[i + 1] = NULL; 592 | } 593 | nd->num_keys--; 594 | } 595 | 596 | 597 | main() 598 | { 599 | node *root = NULL; 600 | record *r; 601 | char *terms[] = {"a", "b", "c", "d", "e", "f", "g", 602 | "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", 603 | "s", "t", "u", "v", "w", "x", "y", "z" }; 604 | int i; 605 | size = 4; 606 | for (i = 0; i < 26; i++){ 607 | root = insert(root, terms[i], i + 1000); 608 | } 609 | print_tree(root); 610 | } 611 | 612 | -------------------------------------------------------------------------------- /bptree_v1.5.c: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013 Peking University. 2 | // Author: Xiaosong Rong (rongxiaosong@gmail.com) 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #define MAX_KEY_LEN 15 10 | #define MAX_NUM_VALUE 50 11 | 12 | typedef struct node { 13 | void **pointers; 14 | char **keys; 15 | struct node *parent; 16 | int num_keys; 17 | bool is_leaf; 18 | struct node *next; // used for printing 19 | } node; 20 | 21 | typedef struct record { 22 | int value; 23 | } record; 24 | 25 | int size = 5; // number of pointers of each node 26 | 27 | void print_tree(node *root); 28 | record *find(node *root, char *key); 29 | node *find_leaf(node *root, char *key); 30 | 31 | // Insertion 32 | record *make_new_record(int value); 33 | node *make_new_node(); 34 | node *make_new_leaf(); 35 | node *make_new_tree(char *key, int value); 36 | node *make_new_root(node *left, node *right, char *key); 37 | node *insert(node *root, char *key, int value); 38 | node *insert_into_parent(node *root, node *left, node *right, char *key); 39 | void insert_into_node(node *nd, node *right, int index, char *key); 40 | node *insert_into_node_after_splitting(node *root, node *nd, node *right, 41 | int index, char *key); 42 | node *insert_into_leaf_after_splitting(node *root, node *leaf, int index, 43 | char *key, record *rec); 44 | void insert_into_leaf(node *leaf, int index, char *key, record *rec); 45 | 46 | // Deletion 47 | void destroy_node(node *nd); 48 | void *remove_entry(node *nd, int index); 49 | node *delete(node *root, char *key); 50 | node *delete_entry(node *root, node *nd, int index); 51 | node *adjust_root(node *root); 52 | int get_node_index(node *nd); 53 | node *coalesce_nodes(node *root, node *nd, node *neighbor, int nd_index); 54 | void distribute_nodes(node *nd, node *neighbor, int nd_index); 55 | 56 | // Bulk Loading -- build B+-tree after sorting 57 | node *bulk_load(char (*keys)[MAX_KEY_LEN], int *values, int n); 58 | int cmp(const void *, const void *); 59 | 60 | 61 | void print_tree(node *root) 62 | { 63 | node *p, *p_down; 64 | int i; 65 | if (root == NULL){ 66 | printf("Empty tree!\n"); 67 | return; 68 | } 69 | p = root; 70 | p_down = root; 71 | while (!p->is_leaf){ 72 | for (i = 0; i < p->num_keys; i++) 73 | printf("%s ", p->keys[i]); 74 | // printf("%d ", p->keys[i][0]); // for test 75 | printf("| "); 76 | p = p->next; 77 | if (!p){ 78 | p_down = p_down->pointers[0]; // next level 79 | p = p_down; 80 | printf("\n"); 81 | } 82 | } 83 | 84 | while (p){ 85 | for (i = 0; i < p->num_keys; i++) 86 | printf("%s ", p->keys[i]); 87 | // printf("%d ", p->keys[i][0]); // for test 88 | printf(" | "); 89 | p = p->pointers[size-1]; 90 | } 91 | printf("\n"); 92 | } 93 | 94 | record *find(node *root, char *key) 95 | { 96 | node *leaf; 97 | int i; 98 | leaf = find_leaf(root, key); 99 | if (leaf == NULL) 100 | return NULL; 101 | for (i = 0; i < leaf->num_keys && strcmp(leaf->keys[i], key) != 0; i++) 102 | ; 103 | if (i == leaf->num_keys) 104 | return NULL; 105 | return (record *)leaf->pointers[i]; 106 | } 107 | 108 | node *find_leaf(node *root, char *key) 109 | { 110 | node *nd; 111 | int i; 112 | if (root == NULL) 113 | return root; 114 | nd = root; 115 | while (!nd->is_leaf){ 116 | for (i = 0; i < nd->num_keys && strcmp(nd->keys[i], key) <= 0; i++) 117 | ; 118 | nd = (node *)nd->pointers[i]; 119 | } 120 | return nd; 121 | } 122 | 123 | record *make_new_record(int value) 124 | { 125 | record *rec; 126 | rec = (record *)malloc(sizeof(record)); 127 | rec->value = value; 128 | return rec; 129 | } 130 | 131 | node *make_new_node() 132 | { 133 | node *nd; 134 | nd = (node *)malloc(sizeof(node)); 135 | nd->pointers = malloc(size * sizeof(void *)); 136 | nd->keys = malloc((size - 1) * sizeof(char *)); 137 | nd->parent = NULL; 138 | nd->num_keys = 0; 139 | nd->is_leaf = false; 140 | nd->next = NULL; 141 | return nd; 142 | } 143 | 144 | node *make_new_leaf() 145 | { 146 | node *leaf; 147 | leaf = make_new_node(); 148 | leaf->is_leaf = true; 149 | return leaf; 150 | } 151 | 152 | node *make_new_tree(char *key, int value) 153 | { 154 | node *root; 155 | record *rec; 156 | root = make_new_leaf(); 157 | rec = make_new_record(value); 158 | root->pointers[0] = rec; 159 | root->keys[0] = malloc(MAX_KEY_LEN); 160 | strcpy(root->keys[0], key); 161 | root->pointers[size-1] = NULL; 162 | root->num_keys++; 163 | return root; 164 | } 165 | 166 | node *make_new_root(node *left, node *right, char *key) 167 | { 168 | node *root; 169 | root = make_new_node(); 170 | root->pointers[0] = left; 171 | root->pointers[1] = right; 172 | root->keys[0] = malloc(MAX_KEY_LEN); 173 | strcpy(root->keys[0], key); 174 | root->num_keys++; 175 | left->parent = root; 176 | right->parent = root; 177 | return root; 178 | } 179 | 180 | node *insert(node *root, char *key, int value) 181 | { 182 | record *rec; 183 | node *leaf; 184 | int index, cond; 185 | leaf = find_leaf(root, key); 186 | if (!leaf){ // cannot find the leaf, the tree is empty 187 | return make_new_tree(key, value); 188 | } 189 | for (index = 0; index < leaf->num_keys && (cond = strcmp(leaf->keys[index], key)) < 0; index++) 190 | ; 191 | if (cond == 0) // ignore duplicates 192 | return root; 193 | rec = make_new_record(value); 194 | if (leaf->num_keys < size - 1){ 195 | insert_into_leaf(leaf, index, key, rec); 196 | return root; // the root remains unchanged 197 | } 198 | return insert_into_leaf_after_splitting(root, leaf, index, key, rec); 199 | } 200 | 201 | node *insert_into_parent(node *root, node *left, node *right, char *key) 202 | { 203 | node *parent; 204 | int index, i; 205 | parent = left->parent; 206 | 207 | if (parent == NULL){ 208 | return make_new_root(left, right, key); 209 | } 210 | 211 | for (index = 0; index < parent->num_keys && parent->pointers[index] != left; index++); 212 | ; 213 | if (parent->num_keys < size - 1){ 214 | insert_into_node(parent, right, index, key); 215 | return root; // the root remains unchanged 216 | } 217 | return insert_into_node_after_splitting(root, parent, right, index, key); 218 | } 219 | 220 | void insert_into_node(node *nd, node *right, int index, char *key) 221 | { 222 | int i; 223 | for (i = nd->num_keys; i > index; i--){ 224 | nd->keys[i] = nd->keys[i-1]; 225 | nd->pointers[i+1] = nd->pointers[i]; 226 | } 227 | nd->keys[index] = malloc(MAX_KEY_LEN); 228 | strcpy(nd->keys[index], key); 229 | nd->pointers[index+1] = right; 230 | nd->num_keys++; 231 | } 232 | 233 | node *insert_into_node_after_splitting(node *root, node *nd, node *right, int index, char *key) 234 | { 235 | int i, split; 236 | node **temp_ps, *new_nd, *child; 237 | char **temp_ks, *new_key; 238 | temp_ps = malloc((size + 1) * sizeof(node *)); 239 | temp_ks = malloc(size * sizeof(char *)); 240 | 241 | for (i = 0; i < size + 1; i++){ 242 | if (i == index + 1) 243 | temp_ps[i] = right; 244 | else if (i < index + 1) 245 | temp_ps[i] = nd->pointers[i]; 246 | else 247 | temp_ps[i] = nd->pointers[i-1]; 248 | } 249 | for (i = 0; i < size; i++){ 250 | if (i == index){ 251 | temp_ks[i] = malloc(MAX_KEY_LEN); 252 | strcpy(temp_ks[i], key); 253 | } 254 | else if (i < index) 255 | temp_ks[i] = nd->keys[i]; 256 | else 257 | temp_ks[i] = nd->keys[i-1]; 258 | } 259 | 260 | 261 | split = size % 2 ? size / 2 + 1 : size / 2; // split is #pointers 262 | nd->num_keys = split - 1; 263 | for (i = 0; i < split - 1; i++){ 264 | nd->pointers[i] = temp_ps[i]; 265 | nd->keys[i] = temp_ks[i]; 266 | } 267 | nd->pointers[i] = temp_ps[i]; // i == split - 1 268 | new_key = temp_ks[split - 1]; 269 | 270 | new_nd = make_new_node(); 271 | new_nd->num_keys = size - split; 272 | for (++i; i < size; i++){ 273 | new_nd->pointers[i - split] = temp_ps[i]; 274 | new_nd->keys[i - split] = temp_ks[i]; 275 | } 276 | new_nd->pointers[i - split] = temp_ps[i]; 277 | new_nd->parent = nd->parent; 278 | for (i = 0; i <= new_nd->num_keys; i++){ // #pointers == num_keys + 1 279 | child = (node *)(new_nd->pointers[i]); 280 | child->parent = new_nd; 281 | } 282 | new_nd->next = nd->next; 283 | nd->next = new_nd; 284 | 285 | free(temp_ps); 286 | free(temp_ks); 287 | return insert_into_parent(root, nd, new_nd, new_key); 288 | } 289 | 290 | void insert_into_leaf(node *leaf, int index, char *key, record *rec) 291 | { 292 | int i; 293 | for (i = leaf->num_keys; i > index; i--){ 294 | leaf->keys[i] = leaf->keys[i-1]; 295 | leaf->pointers[i] = leaf->pointers[i-1]; 296 | } 297 | leaf->keys[index] = malloc(MAX_KEY_LEN); 298 | strcpy(leaf->keys[index], key); 299 | leaf->pointers[index] = rec; 300 | leaf->num_keys++; 301 | } 302 | 303 | node *insert_into_leaf_after_splitting(node *root, node *leaf, int index, char *key, record *rec) 304 | { 305 | node *new_leaf; 306 | record **temp_ps; 307 | char **temp_ks, *new_key; 308 | int i, split; 309 | 310 | temp_ps = malloc(size * sizeof(record *)); 311 | temp_ks = malloc(size * sizeof(char *)); 312 | for (i = 0; i < size; i++){ 313 | if (i == index){ 314 | temp_ps[i] = rec; 315 | temp_ks[i] = malloc(MAX_KEY_LEN); 316 | strcpy(temp_ks[i], key); 317 | } 318 | else if (i < index){ 319 | temp_ps[i] = leaf->pointers[i]; 320 | temp_ks[i] = leaf->keys[i]; 321 | } 322 | else{ 323 | temp_ps[i] = leaf->pointers[i-1]; 324 | temp_ks[i] = leaf->keys[i-1]; 325 | } 326 | } 327 | 328 | split = size / 2; 329 | leaf->num_keys = split; 330 | for (i = 0; i < split; i++){ 331 | leaf->pointers[i] = temp_ps[i]; 332 | leaf->keys[i] = temp_ks[i]; 333 | } 334 | 335 | new_leaf = make_new_leaf(); 336 | new_leaf->num_keys = size - split; 337 | for (; i < size; i++){ 338 | new_leaf->pointers[i - split] = temp_ps[i]; 339 | new_leaf->keys[i - split] = temp_ks[i]; 340 | } 341 | 342 | new_leaf->parent = leaf->parent; 343 | new_leaf->pointers[size - 1] = leaf->pointers[size - 1]; 344 | leaf->pointers[size - 1] = new_leaf; 345 | free(temp_ps); 346 | free(temp_ks); 347 | new_key = new_leaf->keys[0]; 348 | return insert_into_parent(root, leaf, new_leaf, new_key); 349 | } 350 | 351 | node *delete(node *root, char *key) 352 | { 353 | node *leaf; 354 | record *rec; 355 | int i; 356 | leaf = find_leaf(root, key); 357 | if (leaf == NULL) 358 | return root; 359 | for (i = 0; i < leaf->num_keys && strcmp(leaf->keys[i], key) != 0; i++) 360 | ; 361 | if (i == leaf->num_keys) // no such key 362 | return root; 363 | rec = (record *)leaf->pointers[i]; 364 | root = delete_entry(root, leaf, i); 365 | return root; 366 | } 367 | 368 | node *delete_entry(node *root, node *nd, int index) 369 | { 370 | int min_keys, cap, nd_index; 371 | node *neighbor; 372 | 373 | remove_entry(nd, index); 374 | if (nd == root) 375 | return adjust_root(nd); 376 | min_keys = nd->is_leaf ? size / 2 : (size - 1) / 2; 377 | if (nd->num_keys >= min_keys) { 378 | return root; 379 | } 380 | 381 | nd_index = get_node_index(nd); 382 | if (nd_index == 0) 383 | neighbor = nd->parent->pointers[1]; // right neighbor 384 | else 385 | neighbor = nd->parent->pointers[nd_index - 1]; // left neighbor 386 | 387 | cap = nd->is_leaf ? size - 1 : size - 2; 388 | if (neighbor->num_keys + nd->num_keys <= cap) 389 | return coalesce_nodes(root, nd, neighbor, nd_index); 390 | 391 | distribute_nodes(nd, neighbor, nd_index); 392 | return root; 393 | } 394 | 395 | void distribute_nodes(node *nd, node *neighbor, int nd_index) 396 | { 397 | int i; 398 | node *tmp; 399 | if (nd_index != 0) { 400 | if (!nd->is_leaf) 401 | nd->pointers[nd->num_keys + 1] = nd->pointers[nd->num_keys]; 402 | for (i = nd->num_keys; i > 0; i--){ // shift to right by 1 403 | nd->keys[i] = nd->keys[i - 1]; 404 | nd->pointers[i] = nd->pointers[i - 1]; 405 | } 406 | if (!nd->is_leaf){ 407 | nd->keys[0] = nd->parent->keys[nd_index - 1]; 408 | 409 | nd->pointers[0] = neighbor->pointers[neighbor->num_keys]; 410 | tmp = (node *)nd->pointers[0]; 411 | tmp->parent = nd; 412 | neighbor->pointers[neighbor->num_keys] = NULL; 413 | 414 | nd->parent->keys[nd_index - 1] = neighbor->keys[neighbor->num_keys - 1]; 415 | neighbor->keys[neighbor->num_keys - 1] = NULL; 416 | } 417 | else { 418 | nd->keys[0] = neighbor->keys[neighbor->num_keys - 1]; 419 | neighbor->keys[neighbor->num_keys - 1] = NULL; 420 | 421 | nd->pointers[0] = neighbor->pointers[neighbor->num_keys - 1]; 422 | neighbor->pointers[neighbor->num_keys - 1] = NULL; 423 | 424 | // nd->parent->keys[nd_index - 1] = nd->keys[0]; // share the same key with child !! 425 | strcpy(nd->parent->keys[nd_index - 1], nd->keys[0]); 426 | } 427 | } 428 | else { 429 | if (!nd->is_leaf){ 430 | nd->keys[nd->num_keys] = nd->parent->keys[0]; // link to father's key 431 | nd->pointers[nd->num_keys + 1] = neighbor->pointers[0]; 432 | tmp = (node *)nd->pointers[nd->num_keys + 1]; 433 | tmp->parent = nd; 434 | nd->parent->keys[0] = neighbor->keys[0]; // 435 | } 436 | else { 437 | nd->keys[nd->num_keys] = neighbor->keys[0]; 438 | nd->pointers[nd->num_keys] = neighbor->pointers[0]; 439 | // nd->parent->keys[0] = neighbor->keys[1]; // share the same key with chid !! 440 | strcpy(nd->parent->keys[0], neighbor->keys[1]); 441 | } 442 | for (i = 0; i < neighbor->num_keys - 1; i++){ 443 | neighbor->keys[i] = neighbor->keys[i + 1]; 444 | neighbor->pointers[i] = neighbor->pointers[i + 1]; 445 | } 446 | neighbor->keys[i] = NULL; 447 | if (!nd->is_leaf) 448 | neighbor->pointers[i] = neighbor->pointers[i + 1]; 449 | else 450 | neighbor->pointers[i] = NULL; 451 | } 452 | 453 | neighbor->num_keys--; 454 | nd->num_keys++; 455 | 456 | } 457 | 458 | node *coalesce_nodes(node *root, node *nd, node *neighbor, int nd_index) 459 | { 460 | int i, j, start, end; 461 | char *k_prime; 462 | node *tmp, *parent; 463 | 464 | if (nd_index == 0) { // make sure neighbor is on the left 465 | tmp = nd; 466 | nd = neighbor; 467 | neighbor = tmp; 468 | nd_index = 1; 469 | } 470 | parent = nd->parent; 471 | 472 | start = neighbor->num_keys; 473 | if (nd->is_leaf){ 474 | for (i = start, j = 0; j < nd->num_keys; i++, j++){ 475 | neighbor->keys[i] = nd->keys[j]; 476 | neighbor->pointers[i] = nd->pointers[j]; 477 | nd->keys[j] = NULL; 478 | nd->pointers[j] = NULL; 479 | } 480 | neighbor->num_keys += nd->num_keys; 481 | neighbor->pointers[size - 1] = nd->pointers[size - 1]; 482 | } 483 | else { 484 | neighbor->keys[start] = malloc(MAX_KEY_LEN); 485 | strcpy(neighbor->keys[start], parent->keys[nd_index - 1]); 486 | // neighbor->keys[start] = parent->keys[nd_index - 1]; 487 | for (i = start + 1, j = 0; j < nd->num_keys; i++, j++){ 488 | neighbor->keys[i] = nd->keys[j]; 489 | neighbor->pointers[i] = nd->pointers[j]; 490 | } 491 | neighbor->pointers[i] = nd->pointers[j]; 492 | neighbor->num_keys += nd->num_keys + 1; 493 | neighbor->next = nd->next; 494 | 495 | for (i = 0; i <= neighbor->num_keys; i++){ 496 | tmp = (node *)neighbor->pointers[i]; 497 | tmp->parent = neighbor; 498 | } 499 | } 500 | destroy_node(nd); 501 | return delete_entry(root, parent, nd_index); 502 | } 503 | 504 | int get_node_index(node *nd) 505 | { 506 | node *parent; 507 | int i; 508 | parent = nd->parent; 509 | for (i = 0; i < parent->num_keys && parent->pointers[i] != nd; i++) 510 | ; 511 | return i; 512 | } 513 | 514 | void destroy_node(node *nd) 515 | { 516 | free(nd->keys); 517 | free(nd->pointers); 518 | free(nd); 519 | } 520 | 521 | node *adjust_root(node *root) 522 | { 523 | node *new_root; 524 | if (root->num_keys > 0) // at least two childs 525 | return root; 526 | if (!root->is_leaf){ // root has only one child 527 | new_root = root->pointers[0]; 528 | new_root->parent = NULL; 529 | } 530 | else 531 | new_root = NULL; 532 | destroy_node(root); 533 | return new_root; 534 | } 535 | 536 | void *remove_entry(node *nd, int index) 537 | { 538 | int i, index_k; 539 | 540 | if (nd->is_leaf){ 541 | free(nd->keys[index]); 542 | free(nd->pointers[index]); // destroy the record 543 | for (i = index; i < nd->num_keys - 1; i++){ 544 | nd->keys[i] = nd->keys[i + 1]; 545 | nd->pointers[i] = nd->pointers[i + 1]; 546 | } 547 | nd->keys[i] = NULL; 548 | nd->pointers[i] = NULL; 549 | } 550 | else{ 551 | index_k = index - 1; // index_p == index 552 | free(nd->keys[index_k]); 553 | for (i = index_k; i < nd->num_keys - 1; i++){ 554 | nd->keys[i] = nd->keys[i + 1]; 555 | nd->pointers[i + 1] = nd->pointers[i + 2]; 556 | } 557 | nd->keys[i] = NULL; 558 | nd->pointers[i + 1] = NULL; 559 | } 560 | nd->num_keys--; 561 | } 562 | 563 | node *bulk_load(char (*keys)[MAX_KEY_LEN], int *values, int n) 564 | { 565 | node *root, *p; 566 | record *rec; 567 | int i; 568 | // qsort(keys, n, MAX_KEY_LEN, cmp); 569 | qsort(keys, n, MAX_KEY_LEN, (int (*)(const void *, const void *))strcmp); 570 | p = NULL; 571 | root = make_new_tree(keys[0], values[0]); 572 | for (i = 1; i < n; i++) { 573 | if (strcmp(keys[i], keys[i-1]) == 0) // ignore duplicates (key) 574 | continue; 575 | p = root; 576 | while (!p->is_leaf) { 577 | p = p->pointers[p->num_keys]; // right most child 578 | } // p is the right most child 579 | rec = make_new_record(values[i]); 580 | if (p->num_keys < size - 1) 581 | insert_into_leaf(p, p->num_keys, keys[i], rec); 582 | else 583 | root = insert_into_leaf_after_splitting(root, p, p->num_keys, keys[i], rec); 584 | } 585 | return root; 586 | } 587 | 588 | int cmp(const void *p, const void *q) 589 | { 590 | return strcmp((char *)p, (char *)q); 591 | } 592 | 593 | // for test 594 | void test_find(node *root) 595 | { 596 | char *key; 597 | record *r; 598 | key = malloc(MAX_KEY_LEN); 599 | while (1) { 600 | scanf("%s", key); 601 | if (strcmp(key, "exit") == 0) 602 | break; 603 | r = find(root, key); 604 | if (r == NULL) { 605 | printf("Not found!!\n"); 606 | continue; 607 | } 608 | printf("Record of %s: %d\n", key, r->value); 609 | } 610 | } 611 | 612 | node *test_delete(node *root) 613 | { 614 | char *key; 615 | key = malloc(MAX_KEY_LEN); 616 | while (1) { 617 | scanf("%s", key); 618 | if (strcmp(key, "exit") == 0) 619 | break; 620 | root = delete(root, key); 621 | print_tree(root); 622 | } 623 | return root; 624 | } 625 | // end of test 626 | 627 | main(int argc, char *argv[]) 628 | { 629 | node *root = NULL; 630 | char keys[MAX_NUM_VALUE][MAX_KEY_LEN]; 631 | int values[MAX_NUM_VALUE]; 632 | int i, n; 633 | FILE *fp; 634 | if (argc > 1) { 635 | fp = fopen(argv[1], "r"); 636 | for (n = 0; n < MAX_NUM_VALUE && fscanf(fp, "%s%d", keys[n], &values[n]) != EOF; n++) 637 | ; 638 | fclose(fp); 639 | } 640 | else { 641 | for (n = 0; n < MAX_NUM_VALUE && scanf("%s%d", keys[n], &values[n]) != EOF; n++) 642 | ; 643 | } 644 | root = bulk_load(keys, values, n); 645 | print_tree(root); 646 | 647 | root = NULL; 648 | while (n--){ 649 | root = insert(root, keys[n], values[n]); 650 | } 651 | print_tree(root); 652 | 653 | // test_find(root); 654 | test_delete(root); 655 | } 656 | 657 | -------------------------------------------------------------------------------- /bptree_v2.c: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2013 Peking University. 2 | // Author: Xiaosong Rong (rongxiaosong@gmail.com) 3 | // 4 | /* 5 | * Change of version 2.0: 6 | * void print_tree(node *root): 7 | * not need the pointer next 8 | * use a queue to handle printint by level 9 | * support Bulk Loading: 10 | * node *bulk_load(char **keys, int *values, int n); 11 | */ 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #define MAX_KEY_LEN 15 19 | #define MAX_NUM_VALUE 50 20 | 21 | // each key is a string pointer 22 | // num_keys if the number of keys, number of pointers = num_keys + 1 23 | typedef struct node { 24 | void **pointers; 25 | char **keys; 26 | struct node *parent; 27 | int num_keys; 28 | bool is_leaf; 29 | } node; 30 | 31 | typedef struct record { 32 | int value; 33 | } record; 34 | 35 | typedef struct queue { 36 | int capacity; 37 | int front; 38 | int rear; 39 | int size; 40 | node **items; 41 | } queue; 42 | 43 | int size = 5; // number of pointers of each node 44 | 45 | void print_tree(node *root); 46 | record *find(node *root, char *key); 47 | node *find_leaf(node *root, char *key); 48 | 49 | // Insertion 50 | record *make_new_record(int value); 51 | node *make_new_node(); 52 | node *make_new_leaf(); 53 | node *make_new_tree(char *key, int value); 54 | node *make_new_root(node *left, node *right, char *key); 55 | node *insert(node *root, char *key, int value); 56 | node *insert_into_parent(node *root, node *left, node *right, char *key); 57 | void insert_into_node(node *nd, node *right, int index, char *key); 58 | node *insert_into_node_after_splitting(node *root, node *nd, node *right, 59 | int index, char *key); 60 | node *insert_into_leaf_after_splitting(node *root, node *leaf, int index, 61 | char *key, record *rec); 62 | void insert_into_leaf(node *leaf, int index, char *key, record *rec); 63 | 64 | // Deletion 65 | void destroy_node(node *nd); 66 | void destroy_tree(node *root); 67 | void *remove_entry(node *nd, int index); 68 | node *delete(node *root, char *key); 69 | node *delete_entry(node *root, node *nd, int index); 70 | node *adjust_root(node *root); 71 | int get_node_index(node *nd); 72 | node *coalesce_nodes(node *root, node *nd, node *neighbor, int nd_index); 73 | void distribute_nodes(node *nd, node *neighbor, int nd_index); 74 | 75 | // Bulk Loading -- build B+-tree after sorting 76 | node *bulk_load(char **keys, int *values, int n); 77 | int cmp(const void *, const void *); 78 | 79 | queue *init_queue(int capacity) 80 | { 81 | queue *q; 82 | q = malloc(sizeof(queue)); 83 | q->items = malloc(sizeof(node *) * capacity); 84 | q->capacity = capacity; 85 | q->front = q->rear = 0; 86 | q->size = 0; 87 | return q; 88 | } 89 | 90 | void enqueue(queue *q, node *nd) 91 | { 92 | if (q->size == q->capacity) { 93 | fprintf(stderr, "the queue is full !!\n"); 94 | return ; 95 | } 96 | q->items[q->rear] = nd; 97 | q->rear = (q->rear + 1) % q->capacity; 98 | q->size++; 99 | } 100 | 101 | node *dequeue(queue *q) 102 | { 103 | node *nd; 104 | if (q->size == 0) { 105 | fprintf(stderr, "the queue is empty !!\n"); 106 | return ; 107 | } 108 | nd = q->items[q->front]; 109 | q->front = (q->front + 1) % q->capacity; 110 | q->size--; 111 | return nd; 112 | } 113 | 114 | int get_level(node *root, node *nd) 115 | { 116 | int level = 0; 117 | while (nd != root) { 118 | nd = nd->parent; 119 | level++; 120 | } 121 | return level; 122 | } 123 | 124 | void print_tree(node *root) 125 | { 126 | queue *q; 127 | node *nd; 128 | int level, new_level, i; 129 | if (root == NULL) { 130 | printf("Empty tree !\n"); 131 | return ; 132 | } 133 | q = init_queue(MAX_NUM_VALUE); 134 | enqueue(q, root); 135 | level = 0; 136 | while (q->size > 0) { 137 | nd = dequeue(q); 138 | new_level = get_level(root, nd); 139 | if (new_level > level) { 140 | printf("\n"); 141 | level = new_level; 142 | } 143 | for (i = 0; i < nd->num_keys; i++) 144 | printf("%s ", nd->keys[i]); 145 | printf("| "); 146 | if (!nd->is_leaf) { 147 | for (i = 0; i <= nd->num_keys; i++) 148 | enqueue(q, nd->pointers[i]); 149 | } 150 | } 151 | printf("\n"); 152 | free(q->items); 153 | free(q); 154 | } 155 | 156 | record *find(node *root, char *key) 157 | { 158 | node *leaf; 159 | int i; 160 | leaf = find_leaf(root, key); 161 | if (leaf == NULL) 162 | return NULL; 163 | for (i = 0; i < leaf->num_keys && strcmp(leaf->keys[i], key) != 0; i++) 164 | ; 165 | if (i == leaf->num_keys) 166 | return NULL; 167 | return (record *)leaf->pointers[i]; 168 | } 169 | 170 | node *find_leaf(node *root, char *key) 171 | { 172 | node *nd; 173 | int i; 174 | if (root == NULL) 175 | return root; 176 | nd = root; 177 | while (!nd->is_leaf){ 178 | for (i = 0; i < nd->num_keys && strcmp(nd->keys[i], key) <= 0; i++) 179 | ; 180 | nd = (node *)nd->pointers[i]; 181 | } 182 | return nd; 183 | } 184 | 185 | record *make_new_record(int value) 186 | { 187 | record *rec; 188 | rec = (record *)malloc(sizeof(record)); 189 | rec->value = value; 190 | return rec; 191 | } 192 | 193 | node *make_new_node() 194 | { 195 | node *nd; 196 | nd = (node *)malloc(sizeof(node)); 197 | nd->pointers = malloc(size * sizeof(void *)); 198 | nd->keys = malloc((size - 1) * sizeof(char *)); 199 | nd->parent = NULL; 200 | nd->num_keys = 0; 201 | nd->is_leaf = false; 202 | return nd; 203 | } 204 | 205 | node *make_new_leaf() 206 | { 207 | node *leaf; 208 | leaf = make_new_node(); 209 | leaf->is_leaf = true; 210 | return leaf; 211 | } 212 | 213 | void destroy_node(node *nd) 214 | { 215 | free(nd->keys); 216 | free(nd->pointers); 217 | free(nd); 218 | } 219 | 220 | node *make_new_tree(char *key, int value) 221 | { 222 | node *root; 223 | record *rec; 224 | root = make_new_leaf(); 225 | rec = make_new_record(value); 226 | root->pointers[0] = rec; 227 | root->keys[0] = malloc(MAX_KEY_LEN); 228 | strcpy(root->keys[0], key); 229 | root->pointers[size-1] = NULL; 230 | root->num_keys++; 231 | return root; 232 | } 233 | 234 | void destroy_tree(node *root) 235 | { 236 | int i; 237 | if (!root->is_leaf) { 238 | for (i = 0; i < root->num_keys; i++) { 239 | free(root->keys[i]); 240 | destroy_tree(root->pointers[i]); 241 | } 242 | destroy_tree(root->pointers[i]); 243 | } 244 | else { 245 | for (i = 0; i < root->num_keys; i++) { 246 | free(root->keys[i]); 247 | free(root->pointers[i]); // free record 248 | } 249 | } 250 | destroy_node(root); 251 | } 252 | 253 | node *make_new_root(node *left, node *right, char *key) 254 | { 255 | node *root; 256 | root = make_new_node(); 257 | root->pointers[0] = left; 258 | root->pointers[1] = right; 259 | root->keys[0] = malloc(MAX_KEY_LEN); 260 | strcpy(root->keys[0], key); 261 | root->num_keys++; 262 | left->parent = root; 263 | right->parent = root; 264 | return root; 265 | } 266 | 267 | node *insert(node *root, char *key, int value) 268 | { 269 | record *rec; 270 | node *leaf; 271 | int index, cond; 272 | leaf = find_leaf(root, key); 273 | if (!leaf){ // cannot find the leaf, the tree is empty 274 | return make_new_tree(key, value); 275 | } 276 | for (index = 0; index < leaf->num_keys && (cond = strcmp(leaf->keys[index], key)) < 0; index++) 277 | ; 278 | if (cond == 0) // ignore duplicates 279 | return root; 280 | rec = make_new_record(value); 281 | if (leaf->num_keys < size - 1){ 282 | insert_into_leaf(leaf, index, key, rec); 283 | return root; // the root remains unchanged 284 | } 285 | return insert_into_leaf_after_splitting(root, leaf, index, key, rec); 286 | } 287 | 288 | node *insert_into_parent(node *root, node *left, node *right, char *key) 289 | { 290 | node *parent; 291 | int index, i; 292 | parent = left->parent; 293 | 294 | if (parent == NULL){ 295 | return make_new_root(left, right, key); 296 | } 297 | 298 | for (index = 0; index < parent->num_keys && parent->pointers[index] != left; index++); 299 | ; 300 | if (parent->num_keys < size - 1){ 301 | insert_into_node(parent, right, index, key); 302 | return root; // the root remains unchanged 303 | } 304 | return insert_into_node_after_splitting(root, parent, right, index, key); 305 | } 306 | 307 | void insert_into_node(node *nd, node *right, int index, char *key) 308 | { 309 | int i; 310 | for (i = nd->num_keys; i > index; i--){ 311 | nd->keys[i] = nd->keys[i-1]; 312 | nd->pointers[i+1] = nd->pointers[i]; 313 | } 314 | nd->keys[index] = malloc(MAX_KEY_LEN); 315 | strcpy(nd->keys[index], key); 316 | nd->pointers[index+1] = right; 317 | nd->num_keys++; 318 | } 319 | 320 | node *insert_into_node_after_splitting(node *root, node *nd, node *right, int index, char *key) 321 | { 322 | int i, split; 323 | node **temp_ps, *new_nd, *child; 324 | char **temp_ks, *new_key; 325 | temp_ps = malloc((size + 1) * sizeof(node *)); 326 | temp_ks = malloc(size * sizeof(char *)); 327 | 328 | for (i = 0; i < size + 1; i++){ 329 | if (i == index + 1) 330 | temp_ps[i] = right; 331 | else if (i < index + 1) 332 | temp_ps[i] = nd->pointers[i]; 333 | else 334 | temp_ps[i] = nd->pointers[i-1]; 335 | } 336 | for (i = 0; i < size; i++){ 337 | if (i == index){ 338 | temp_ks[i] = malloc(MAX_KEY_LEN); 339 | strcpy(temp_ks[i], key); 340 | } 341 | else if (i < index) 342 | temp_ks[i] = nd->keys[i]; 343 | else 344 | temp_ks[i] = nd->keys[i-1]; 345 | } 346 | 347 | 348 | split = size % 2 ? size / 2 + 1 : size / 2; // split is #pointers 349 | nd->num_keys = split - 1; 350 | for (i = 0; i < split - 1; i++){ 351 | nd->pointers[i] = temp_ps[i]; 352 | nd->keys[i] = temp_ks[i]; 353 | } 354 | nd->pointers[i] = temp_ps[i]; // i == split - 1 355 | new_key = temp_ks[split - 1]; 356 | 357 | new_nd = make_new_node(); 358 | new_nd->num_keys = size - split; 359 | for (++i; i < size; i++){ 360 | new_nd->pointers[i - split] = temp_ps[i]; 361 | new_nd->keys[i - split] = temp_ks[i]; 362 | } 363 | new_nd->pointers[i - split] = temp_ps[i]; 364 | new_nd->parent = nd->parent; 365 | for (i = 0; i <= new_nd->num_keys; i++){ // #pointers == num_keys + 1 366 | child = (node *)(new_nd->pointers[i]); 367 | child->parent = new_nd; 368 | } 369 | 370 | free(temp_ps); 371 | free(temp_ks); 372 | return insert_into_parent(root, nd, new_nd, new_key); 373 | } 374 | 375 | void insert_into_leaf(node *leaf, int index, char *key, record *rec) 376 | { 377 | int i; 378 | for (i = leaf->num_keys; i > index; i--){ 379 | leaf->keys[i] = leaf->keys[i-1]; 380 | leaf->pointers[i] = leaf->pointers[i-1]; 381 | } 382 | leaf->keys[index] = malloc(MAX_KEY_LEN); 383 | strcpy(leaf->keys[index], key); 384 | leaf->pointers[index] = rec; 385 | leaf->num_keys++; 386 | } 387 | 388 | node *insert_into_leaf_after_splitting(node *root, node *leaf, int index, char *key, record *rec) 389 | { 390 | node *new_leaf; 391 | record **temp_ps; 392 | char **temp_ks, *new_key; 393 | int i, split; 394 | 395 | temp_ps = malloc(size * sizeof(record *)); 396 | temp_ks = malloc(size * sizeof(char *)); 397 | for (i = 0; i < size; i++){ 398 | if (i == index){ 399 | temp_ps[i] = rec; 400 | temp_ks[i] = malloc(MAX_KEY_LEN); 401 | strcpy(temp_ks[i], key); 402 | } 403 | else if (i < index){ 404 | temp_ps[i] = leaf->pointers[i]; 405 | temp_ks[i] = leaf->keys[i]; 406 | } 407 | else{ 408 | temp_ps[i] = leaf->pointers[i-1]; 409 | temp_ks[i] = leaf->keys[i-1]; 410 | } 411 | } 412 | 413 | split = size / 2; 414 | leaf->num_keys = split; 415 | for (i = 0; i < split; i++){ 416 | leaf->pointers[i] = temp_ps[i]; 417 | leaf->keys[i] = temp_ks[i]; 418 | } 419 | 420 | new_leaf = make_new_leaf(); 421 | new_leaf->num_keys = size - split; 422 | for (; i < size; i++){ 423 | new_leaf->pointers[i - split] = temp_ps[i]; 424 | new_leaf->keys[i - split] = temp_ks[i]; 425 | } 426 | 427 | new_leaf->parent = leaf->parent; 428 | new_leaf->pointers[size - 1] = leaf->pointers[size - 1]; 429 | leaf->pointers[size - 1] = new_leaf; 430 | free(temp_ps); 431 | free(temp_ks); 432 | new_key = new_leaf->keys[0]; 433 | return insert_into_parent(root, leaf, new_leaf, new_key); 434 | } 435 | 436 | node *delete(node *root, char *key) 437 | { 438 | node *leaf; 439 | record *rec; 440 | int i; 441 | leaf = find_leaf(root, key); 442 | if (leaf == NULL) 443 | return root; 444 | for (i = 0; i < leaf->num_keys && strcmp(leaf->keys[i], key) != 0; i++) 445 | ; 446 | if (i == leaf->num_keys) // no such key 447 | return root; 448 | rec = (record *)leaf->pointers[i]; 449 | root = delete_entry(root, leaf, i); 450 | return root; 451 | } 452 | 453 | node *delete_entry(node *root, node *nd, int index) 454 | { 455 | int min_keys, cap, nd_index; 456 | node *neighbor; 457 | 458 | remove_entry(nd, index); 459 | if (nd == root) 460 | return adjust_root(nd); 461 | min_keys = nd->is_leaf ? size / 2 : (size - 1) / 2; 462 | if (nd->num_keys >= min_keys) { 463 | return root; 464 | } 465 | 466 | nd_index = get_node_index(nd); 467 | if (nd_index == 0) 468 | neighbor = nd->parent->pointers[1]; // right neighbor 469 | else 470 | neighbor = nd->parent->pointers[nd_index - 1]; // left neighbor 471 | 472 | cap = nd->is_leaf ? size - 1 : size - 2; 473 | if (neighbor->num_keys + nd->num_keys <= cap) 474 | return coalesce_nodes(root, nd, neighbor, nd_index); 475 | 476 | distribute_nodes(nd, neighbor, nd_index); 477 | return root; 478 | } 479 | 480 | void distribute_nodes(node *nd, node *neighbor, int nd_index) 481 | { 482 | int i; 483 | node *tmp; 484 | if (nd_index != 0) { 485 | if (!nd->is_leaf) 486 | nd->pointers[nd->num_keys + 1] = nd->pointers[nd->num_keys]; 487 | for (i = nd->num_keys; i > 0; i--){ // shift to right by 1 488 | nd->keys[i] = nd->keys[i - 1]; 489 | nd->pointers[i] = nd->pointers[i - 1]; 490 | } 491 | if (!nd->is_leaf){ 492 | nd->keys[0] = nd->parent->keys[nd_index - 1]; 493 | 494 | nd->pointers[0] = neighbor->pointers[neighbor->num_keys]; 495 | tmp = (node *)nd->pointers[0]; 496 | tmp->parent = nd; 497 | neighbor->pointers[neighbor->num_keys] = NULL; 498 | 499 | nd->parent->keys[nd_index - 1] = neighbor->keys[neighbor->num_keys - 1]; 500 | neighbor->keys[neighbor->num_keys - 1] = NULL; 501 | } 502 | else { 503 | nd->keys[0] = neighbor->keys[neighbor->num_keys - 1]; 504 | neighbor->keys[neighbor->num_keys - 1] = NULL; 505 | 506 | nd->pointers[0] = neighbor->pointers[neighbor->num_keys - 1]; 507 | neighbor->pointers[neighbor->num_keys - 1] = NULL; 508 | 509 | // nd->parent->keys[nd_index - 1] = nd->keys[0]; // share the same key with child !! 510 | strcpy(nd->parent->keys[nd_index - 1], nd->keys[0]); 511 | } 512 | } 513 | else { 514 | if (!nd->is_leaf){ 515 | nd->keys[nd->num_keys] = nd->parent->keys[0]; // link to father's key 516 | nd->pointers[nd->num_keys + 1] = neighbor->pointers[0]; 517 | tmp = (node *)nd->pointers[nd->num_keys + 1]; 518 | tmp->parent = nd; 519 | nd->parent->keys[0] = neighbor->keys[0]; // 520 | } 521 | else { 522 | nd->keys[nd->num_keys] = neighbor->keys[0]; 523 | nd->pointers[nd->num_keys] = neighbor->pointers[0]; 524 | // nd->parent->keys[0] = neighbor->keys[1]; // share the same key with chid !! 525 | strcpy(nd->parent->keys[0], neighbor->keys[1]); 526 | } 527 | for (i = 0; i < neighbor->num_keys - 1; i++){ 528 | neighbor->keys[i] = neighbor->keys[i + 1]; 529 | neighbor->pointers[i] = neighbor->pointers[i + 1]; 530 | } 531 | neighbor->keys[i] = NULL; 532 | if (!nd->is_leaf) 533 | neighbor->pointers[i] = neighbor->pointers[i + 1]; 534 | else 535 | neighbor->pointers[i] = NULL; 536 | } 537 | 538 | neighbor->num_keys--; 539 | nd->num_keys++; 540 | 541 | } 542 | 543 | node *coalesce_nodes(node *root, node *nd, node *neighbor, int nd_index) 544 | { 545 | int i, j, start, end; 546 | char *k_prime; 547 | node *tmp, *parent; 548 | 549 | if (nd_index == 0) { // make sure neighbor is on the left 550 | tmp = nd; 551 | nd = neighbor; 552 | neighbor = tmp; 553 | nd_index = 1; 554 | } 555 | parent = nd->parent; 556 | 557 | start = neighbor->num_keys; 558 | if (nd->is_leaf){ 559 | for (i = start, j = 0; j < nd->num_keys; i++, j++){ 560 | neighbor->keys[i] = nd->keys[j]; 561 | neighbor->pointers[i] = nd->pointers[j]; 562 | nd->keys[j] = NULL; 563 | nd->pointers[j] = NULL; 564 | } 565 | neighbor->num_keys += nd->num_keys; 566 | neighbor->pointers[size - 1] = nd->pointers[size - 1]; 567 | } 568 | else { 569 | neighbor->keys[start] = malloc(MAX_KEY_LEN); 570 | strcpy(neighbor->keys[start], parent->keys[nd_index - 1]); 571 | // neighbor->keys[start] = parent->keys[nd_index - 1]; 572 | for (i = start + 1, j = 0; j < nd->num_keys; i++, j++){ 573 | neighbor->keys[i] = nd->keys[j]; 574 | neighbor->pointers[i] = nd->pointers[j]; 575 | } 576 | neighbor->pointers[i] = nd->pointers[j]; 577 | neighbor->num_keys += nd->num_keys + 1; 578 | 579 | for (i = 0; i <= neighbor->num_keys; i++){ 580 | tmp = (node *)neighbor->pointers[i]; 581 | tmp->parent = neighbor; 582 | } 583 | } 584 | destroy_node(nd); 585 | return delete_entry(root, parent, nd_index); 586 | } 587 | 588 | int get_node_index(node *nd) 589 | { 590 | node *parent; 591 | int i; 592 | parent = nd->parent; 593 | for (i = 0; i < parent->num_keys && parent->pointers[i] != nd; i++) 594 | ; 595 | return i; 596 | } 597 | 598 | node *adjust_root(node *root) 599 | { 600 | node *new_root; 601 | if (root->num_keys > 0) // at least two childs 602 | return root; 603 | if (!root->is_leaf){ // root has only one child 604 | new_root = root->pointers[0]; 605 | new_root->parent = NULL; 606 | } 607 | else 608 | new_root = NULL; 609 | destroy_node(root); 610 | return new_root; 611 | } 612 | 613 | void *remove_entry(node *nd, int index) 614 | { 615 | int i, index_k; 616 | 617 | if (nd->is_leaf){ 618 | free(nd->keys[index]); 619 | free(nd->pointers[index]); // destroy the record 620 | for (i = index; i < nd->num_keys - 1; i++){ 621 | nd->keys[i] = nd->keys[i + 1]; 622 | nd->pointers[i] = nd->pointers[i + 1]; 623 | } 624 | nd->keys[i] = NULL; 625 | nd->pointers[i] = NULL; 626 | } 627 | else{ 628 | index_k = index - 1; // index_p == index 629 | free(nd->keys[index_k]); 630 | for (i = index_k; i < nd->num_keys - 1; i++){ 631 | nd->keys[i] = nd->keys[i + 1]; 632 | nd->pointers[i + 1] = nd->pointers[i + 2]; 633 | } 634 | nd->keys[i] = NULL; 635 | nd->pointers[i + 1] = NULL; 636 | } 637 | nd->num_keys--; 638 | } 639 | 640 | node *bulk_load(char **keys, int *values, int n) 641 | { 642 | node *root, *p; 643 | record *rec; 644 | int i; 645 | qsort(keys, n, sizeof(char *), cmp); 646 | // qsort(keys, n, sizeof(char *), (int (*)(const char **, const char **))cmp); 647 | root = make_new_tree(keys[0], values[0]); 648 | for (i = 1; i < n; i++) { 649 | if (strcmp(keys[i], keys[i-1]) == 0) // ignore duplicates (key) 650 | continue; 651 | p = root; 652 | while (!p->is_leaf) { 653 | p = p->pointers[p->num_keys]; // right most child 654 | } // p is the right most child 655 | rec = make_new_record(values[i]); 656 | if (p->num_keys < size - 1) 657 | insert_into_leaf(p, p->num_keys, keys[i], rec); 658 | else 659 | root = insert_into_leaf_after_splitting(root, p, p->num_keys, keys[i], rec); 660 | } 661 | return root; 662 | } 663 | 664 | int cmp(const void *p, const void *q) 665 | { 666 | //keys' type is (char **), each item is a (char *) 667 | return strcmp(*(char **)p, *(char **)q); 668 | } 669 | 670 | // ** for testing 671 | node *test_insert(node *root) 672 | { 673 | char *key; 674 | int i, value; 675 | key = malloc(MAX_KEY_LEN); 676 | for (i = 0; i < MAX_NUM_VALUE; i++) { 677 | scanf("%s%d", key, &value); 678 | if (strcmp(key, "exit") == 0) 679 | break; 680 | root = insert(root, key, value); 681 | printf("After insert %s: \n", key); 682 | print_tree(root); 683 | } 684 | return root; 685 | } 686 | 687 | void test_find(node *root) 688 | { 689 | char *key; 690 | record *r; 691 | key = malloc(MAX_KEY_LEN); 692 | while (1) { 693 | scanf("%s", key); 694 | if (strcmp(key, "exit") == 0) 695 | break; 696 | r = find(root, key); 697 | if (r == NULL) 698 | printf("( %s ) Not found!!\n", key); 699 | else 700 | printf("Record of %s: %d\n", key, r->value); 701 | } 702 | } 703 | 704 | node *test_delete(node *root) 705 | { 706 | char *key; 707 | key = malloc(MAX_KEY_LEN); 708 | while (1) { 709 | scanf("%s", key); 710 | if (strcmp(key, "exit") == 0) 711 | break; 712 | root = delete(root, key); 713 | printf("After delete %s: \n", key); 714 | print_tree(root); 715 | } 716 | return root; 717 | } 718 | 719 | void test_bulk_load(void) 720 | { 721 | int i, n; 722 | node *root; 723 | char **keys; 724 | int *values; 725 | keys = malloc(sizeof(char *) * MAX_NUM_VALUE); 726 | values = malloc(sizeof(int) * MAX_NUM_VALUE); 727 | for (i = 0; i < MAX_NUM_VALUE; i++) { 728 | keys[i] = malloc(MAX_KEY_LEN); 729 | if (scanf("%s%d", keys[i], &values[i]) == EOF) { 730 | free(keys[i]); 731 | break; 732 | } 733 | } 734 | n = i; 735 | root = bulk_load(keys, values, n); 736 | print_tree(root); 737 | destroy_tree(root); 738 | 739 | for (i = 0; i < n; i++) { 740 | free(keys[i]); 741 | } 742 | free(keys); 743 | free(values); 744 | } 745 | // ** end of testing 746 | 747 | main(int argc, char *argv[]) 748 | { 749 | node *root = NULL; 750 | 751 | // test_bulk_load(); 752 | root = test_insert(root); 753 | test_find(root); 754 | test_delete(root); 755 | } 756 | 757 | --------------------------------------------------------------------------------