├── Makefile ├── README ├── critbit-test.cc ├── critbit.c ├── critbit.h ├── critbit.pdf └── critbit.w /Makefile: -------------------------------------------------------------------------------- 1 | targets: critbit.pdf critbit.o 2 | 3 | critbit.pdf: critbit.w 4 | cweave critbit.w 5 | pdftex critbit.tex 6 | 7 | critbit.c: critbit.w 8 | ctangle critbit.w 9 | 10 | critbit.o: critbit.c 11 | ctangle critbit.w 12 | gcc -Wall -c critbit.c -std=c99 -ggdb 13 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | This code is taken from Dan Bernstein's qhasm and implements a binary 2 | crit-bit (alsa known as PATRICIA) tree for |NUL| terminated strings. Crit-bit 3 | trees are underused and it's this author's hope that a good example will aid 4 | their adoption. 5 | 6 | Herein is the CWEB source (critbit.w) and the derived files (critbit.pdf and 7 | critbit.c) for those who don't wish to install CWEB and/or TeX. 8 | 9 | If in doubt, read the PDF file. 10 | -------------------------------------------------------------------------------- /critbit-test.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "critbit.h" 5 | 6 | using namespace std; 7 | 8 | static void 9 | test_contains() { 10 | critbit0_tree tree = {0}; 11 | 12 | static const char *elems[] = {"a", "aa", "b", "bb", "ab", "ba", "aba", "bab", NULL}; 13 | 14 | for (unsigned i = 0; elems[i]; ++i) critbit0_insert(&tree, elems[i]); 15 | 16 | for (unsigned i = 0; elems[i]; ++i) { 17 | if (!critbit0_contains(&tree, elems[i])) abort(); 18 | } 19 | 20 | critbit0_clear(&tree); 21 | } 22 | 23 | static void 24 | test_delete() { 25 | critbit0_tree tree = {0}; 26 | 27 | static const char *elems[] = {"a", "aa", "b", "bb", "ab", "ba", "aba", "bab", NULL}; 28 | 29 | for (unsigned i = 1; elems[i]; ++i) { 30 | critbit0_clear(&tree); 31 | 32 | for (unsigned j = 0; j < i; ++j) critbit0_insert(&tree, elems[j]); 33 | for (unsigned j = 0; j < i; ++j) { 34 | if (!critbit0_contains(&tree, elems[j])) abort(); 35 | } 36 | for (unsigned j = 0; j < i; ++j) { 37 | if (1 != critbit0_delete(&tree, elems[j])) abort(); 38 | } 39 | for (unsigned j = 0; j < i; ++j) { 40 | if (critbit0_contains(&tree, elems[j])) abort(); 41 | } 42 | } 43 | 44 | critbit0_clear(&tree); 45 | } 46 | 47 | static int 48 | allprefixed_cb(const char *elem, void *arg) { 49 | set *a = (set *) arg; 50 | a->insert(elem); 51 | 52 | return 1; 53 | } 54 | 55 | static void 56 | test_allprefixed() { 57 | critbit0_tree tree = {0}; 58 | 59 | static const char *elems[] = {"a", "aa", "aaz", "abz", "bba", "bbc", "bbd", NULL}; 60 | 61 | for (unsigned i = 0; elems[i]; ++i) critbit0_insert(&tree, elems[i]); 62 | 63 | set a; 64 | 65 | critbit0_allprefixed(&tree, "a", allprefixed_cb, &a); 66 | if (a.size() != 4 || 67 | a.find("a") == a.end() || 68 | a.find("aa") == a.end() || 69 | a.find("aaz") == a.end() || 70 | a.find("abz") == a.end()) { 71 | abort(); 72 | } 73 | a.clear(); 74 | 75 | critbit0_allprefixed(&tree, "aa", allprefixed_cb, &a); 76 | if (a.size() != 2 || 77 | a.find("aa") == a.end() || 78 | a.find("aaz") == a.end()) { 79 | abort(); 80 | } 81 | a.clear(); 82 | 83 | critbit0_clear(&tree); 84 | } 85 | 86 | int 87 | main() { 88 | test_contains(); 89 | test_delete(); 90 | test_allprefixed(); 91 | 92 | return 0; 93 | } 94 | -------------------------------------------------------------------------------- /critbit.c: -------------------------------------------------------------------------------- 1 | #define _POSIX_C_SOURCE 200112 2 | #define uint8 uint8_t 3 | #define uint32 uint32_t 4 | /*2:*/ 5 | #line 45 "./critbit.w" 6 | 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | #include 13 | 14 | typedef struct{ 15 | void*child[2]; 16 | uint32 byte; 17 | uint8 otherbits; 18 | }critbit0_node; 19 | 20 | typedef struct{ 21 | void*root; 22 | }critbit0_tree; 23 | 24 | /*:2*//*3:*/ 25 | #line 69 "./critbit.w" 26 | 27 | int 28 | critbit0_contains(critbit0_tree*t,const char*u){ 29 | const uint8*ubytes= (void*)u; 30 | const size_t ulen= strlen(u); 31 | uint8*p= t->root; 32 | 33 | /*4:*/ 34 | #line 86 "./critbit.w" 35 | 36 | if(!p)return 0; 37 | 38 | /*:4*/ 39 | #line 76 "./critbit.w" 40 | 41 | /*5:*/ 42 | #line 110 "./critbit.w" 43 | 44 | while(1&(intptr_t)p){ 45 | critbit0_node*q= (void*)(p-1); 46 | /*6:*/ 47 | #line 136 "./critbit.w" 48 | 49 | uint8 c= 0; 50 | if(q->bytebyte]; 51 | const int direction= (1+(q->otherbits|c))>>8; 52 | 53 | /*:6*/ 54 | #line 113 "./critbit.w" 55 | 56 | p= q->child[direction]; 57 | } 58 | 59 | /*:5*/ 60 | #line 77 "./critbit.w" 61 | 62 | /*7:*/ 63 | #line 152 "./critbit.w" 64 | 65 | return 0==strcmp(u,(const char*)p); 66 | 67 | /*:7*/ 68 | #line 78 "./critbit.w" 69 | 70 | } 71 | 72 | /*:3*//*8:*/ 73 | #line 167 "./critbit.w" 74 | 75 | int critbit0_insert(critbit0_tree*t,const char*u) 76 | { 77 | const uint8*const ubytes= (void*)u; 78 | const size_t ulen= strlen(u); 79 | uint8*p= t->root; 80 | 81 | /*9:*/ 82 | #line 191 "./critbit.w" 83 | 84 | if(!p){ 85 | char*x; 86 | int a= posix_memalign((void**)&x,sizeof(void*),ulen+1); 87 | if(a)return 0; 88 | memcpy(x,u,ulen+1); 89 | t->root= x; 90 | return 2; 91 | } 92 | 93 | /*:9*/ 94 | #line 174 "./critbit.w" 95 | 96 | /*5:*/ 97 | #line 110 "./critbit.w" 98 | 99 | while(1&(intptr_t)p){ 100 | critbit0_node*q= (void*)(p-1); 101 | /*6:*/ 102 | #line 136 "./critbit.w" 103 | 104 | uint8 c= 0; 105 | if(q->bytebyte]; 106 | const int direction= (1+(q->otherbits|c))>>8; 107 | 108 | /*:6*/ 109 | #line 113 "./critbit.w" 110 | 111 | p= q->child[direction]; 112 | } 113 | 114 | /*:5*/ 115 | #line 175 "./critbit.w" 116 | 117 | /*10:*/ 118 | #line 203 "./critbit.w" 119 | 120 | /*11:*/ 121 | #line 218 "./critbit.w" 122 | 123 | uint32 newbyte; 124 | uint32 newotherbits; 125 | 126 | for(newbyte= 0;newbyte>1; 148 | newotherbits|= newotherbits>>2; 149 | newotherbits|= newotherbits>>4; 150 | newotherbits= (newotherbits&~(newotherbits>>1))^255; 151 | uint8 c= p[newbyte]; 152 | int newdirection= (1+(newotherbits|c))>>8; 153 | 154 | /*:12*/ 155 | #line 205 "./critbit.w" 156 | 157 | 158 | /*:10*/ 159 | #line 176 "./critbit.w" 160 | 161 | /*13:*/ 162 | #line 260 "./critbit.w" 163 | 164 | /*14:*/ 165 | #line 271 "./critbit.w" 166 | 167 | critbit0_node*newnode; 168 | if(posix_memalign((void**)&newnode,sizeof(void*),sizeof(critbit0_node)))return 0; 169 | 170 | char*x; 171 | if(posix_memalign((void**)&x,sizeof(void*),ulen+1)){ 172 | free(newnode); 173 | return 0; 174 | } 175 | memcpy(x,ubytes,ulen+1); 176 | 177 | newnode->byte= newbyte; 178 | newnode->otherbits= newotherbits; 179 | newnode->child[1-newdirection]= x; 180 | 181 | /*:14*/ 182 | #line 261 "./critbit.w" 183 | 184 | /*15:*/ 185 | #line 326 "./critbit.w" 186 | 187 | void**wherep= &t->root; 188 | for(;;){ 189 | uint8*p= *wherep; 190 | if(!(1&(intptr_t)p))break; 191 | critbit0_node*q= (void*)(p-1); 192 | if(q->byte> newbyte)break; 193 | if(q->byte==newbyte&&q->otherbits> newotherbits)break; 194 | uint8 c= 0; 195 | if(q->bytebyte]; 196 | const int direction= (1+(q->otherbits|c))>>8; 197 | wherep= q->child+direction; 198 | } 199 | 200 | newnode->child[newdirection]= *wherep; 201 | *wherep= (void*)(1+(char*)newnode); 202 | 203 | /*:15*/ 204 | #line 262 "./critbit.w" 205 | 206 | 207 | /*:13*/ 208 | #line 177 "./critbit.w" 209 | 210 | 211 | return 2; 212 | } 213 | 214 | /*:8*//*16:*/ 215 | #line 349 "./critbit.w" 216 | 217 | int critbit0_delete(critbit0_tree*t,const char*u){ 218 | const uint8*ubytes= (void*)u; 219 | const size_t ulen= strlen(u); 220 | uint8*p= t->root; 221 | void**wherep= &t->root; 222 | void**whereq= 0; 223 | critbit0_node*q= 0; 224 | int direction= 0; 225 | 226 | /*17:*/ 227 | #line 372 "./critbit.w" 228 | 229 | if(!p)return 0; 230 | 231 | /*:17*/ 232 | #line 359 "./critbit.w" 233 | 234 | /*18:*/ 235 | #line 405 "./critbit.w" 236 | 237 | while(1&(intptr_t)p){ 238 | whereq= wherep; 239 | q= (void*)(p-1); 240 | uint8 c= 0; 241 | if(q->bytebyte]; 242 | direction= (1+(q->otherbits|c))>>8; 243 | wherep= q->child+direction; 244 | p= *wherep; 245 | } 246 | 247 | /*:18*/ 248 | #line 360 "./critbit.w" 249 | 250 | /*19:*/ 251 | #line 423 "./critbit.w" 252 | 253 | if(0!=strcmp(u,(const char*)p))return 0; 254 | free(p); 255 | 256 | /*:19*/ 257 | #line 361 "./critbit.w" 258 | 259 | /*20:*/ 260 | #line 437 "./critbit.w" 261 | 262 | if(!whereq){ 263 | t->root= 0; 264 | return 1; 265 | } 266 | 267 | *whereq= q->child[1-direction]; 268 | free(q); 269 | 270 | /*:20*/ 271 | #line 362 "./critbit.w" 272 | 273 | 274 | return 1; 275 | } 276 | 277 | /*:16*//*21:*/ 278 | #line 454 "./critbit.w" 279 | 280 | static void 281 | traverse(void*top){ 282 | /*22:*/ 283 | #line 472 "./critbit.w" 284 | 285 | uint8*p= top; 286 | 287 | if(1&(intptr_t)p){ 288 | critbit0_node*q= (void*)(p-1); 289 | traverse(q->child[0]); 290 | traverse(q->child[1]); 291 | free(q); 292 | }else{ 293 | free(p); 294 | } 295 | 296 | /*:22*/ 297 | #line 457 "./critbit.w" 298 | 299 | } 300 | 301 | void critbit0_clear(critbit0_tree*t) 302 | { 303 | if(t->root)traverse(t->root); 304 | t->root= NULL; 305 | } 306 | 307 | /*:21*//*23:*/ 308 | #line 500 "./critbit.w" 309 | 310 | static int 311 | allprefixed_traverse(uint8*top, 312 | int(*handle)(const char*,void*),void*arg){ 313 | /*26:*/ 314 | #line 560 "./critbit.w" 315 | 316 | if(1&(intptr_t)top){ 317 | critbit0_node*q= (void*)(top-1); 318 | for(int direction= 0;direction<2;++direction) 319 | switch(allprefixed_traverse(q->child[direction],handle,arg)){ 320 | case 1:break; 321 | case 0:return 0; 322 | default:return-1; 323 | } 324 | return 1; 325 | } 326 | 327 | /*:26*/ 328 | #line 504 "./critbit.w" 329 | 330 | /*27:*/ 331 | #line 577 "./critbit.w" 332 | 333 | return handle((const char*)top,arg);/*:27*/ 334 | #line 505 "./critbit.w" 335 | 336 | } 337 | 338 | int 339 | critbit0_allprefixed(critbit0_tree*t,const char*prefix, 340 | int(*handle)(const char*,void*),void*arg){ 341 | const uint8*ubytes= (void*)prefix; 342 | const size_t ulen= strlen(prefix); 343 | uint8*p= t->root; 344 | uint8*top= p; 345 | 346 | if(!p)return 1; 347 | /*24:*/ 348 | #line 531 "./critbit.w" 349 | 350 | while(1&(intptr_t)p){ 351 | critbit0_node*q= (void*)(p-1); 352 | uint8 c= 0; 353 | if(q->bytebyte]; 354 | const int direction= (1+(q->otherbits|c))>>8; 355 | p= q->child[direction]; 356 | if(q->byte 47 | #include 48 | #include 49 | 50 | #include 51 | #include 52 | 53 | typedef struct { 54 | void *child[2]; 55 | uint32 byte; 56 | uint8 otherbits; 57 | } critbit0_node; 58 | 59 | typedef struct { 60 | void *root; 61 | } critbit0_tree; 62 | 63 | @* Membership testing. 64 | 65 | The first function that we'll deal with will be membership testing. The 66 | following function takes a tree, |t|, and a |NUL| terminated string, |u|, and 67 | returns non-zero iff |u| $\in$ |t|. 68 | 69 | @c 70 | int 71 | critbit0_contains(critbit0_tree *t, const char *u) { 72 | const uint8 *ubytes = (void *) u; 73 | const size_t ulen = strlen(u); 74 | uint8 *p = t->root; 75 | 76 | @@; 77 | @@; 78 | @@; 79 | } 80 | 81 | @ An empty tree 82 | 83 | An empty tree simply is simply one where the root pointer is |NULL|, (that's 84 | {\tt NULL} for those who are new to {\tt CWEB}). 85 | 86 | @= 87 | if (!p) return 0; 88 | 89 | @ Searching the tree 90 | 91 | Once we have established that the tree is not empty, it therefore has one or 92 | more members. Now we need to distinguish between internal and external nodes. 93 | 94 | Internal nodes are |critbit0_node| structures. They record that the tree 95 | diverges at some point. External nodes are allocated strings. Thus, a tree with 96 | a single member is one where the root pointer points at an allocated string. 97 | However, we need to be able to test a given pointer to know if it points at an 98 | internal or external node. Several possibilities present themselves: a common 99 | structure as a prefix to both the internal and external nodes, tags associated 100 | with every pointer, {\it etc}. In this case, we include the tag in the pointer 101 | itself as the least-significant bit. We assume that both types of nodes are 102 | aligned to, at least, two bytes and thus the LSB is free to be used as a tag bit. 103 | Internal nodes are tagged with a |1| and external nodes are tagged with a 104 | |0|. 105 | 106 | When walking the tree we obviously want to break out when we reach an external 107 | node. Thus we use a |while| loop that tests that the current node pointer is 108 | always pointing at an internal node. 109 | 110 | @= 111 | while (1 & (intptr_t) p) { 112 | critbit0_node *q = (void *) (p - 1); 113 | @@; 114 | p = q->child[direction]; 115 | } 116 | 117 | @ Encoding a location 118 | 119 | Recall that a crit-bit tree works by encoding the bit-number that differs at 120 | each branch in the tree. The obvious way to do this would either be with a 121 | single number (the number of bits from the beginning of the string), or with a 122 | (byte number, bit number $\in [0..7]$) pair. 123 | 124 | However, for reasons that should become clear later, here we encode it as a 125 | byte number and a single byte where all the bits {\it except} the critical bit 126 | are true. By performing a bitwise OR with the correct byte there are only two 127 | results: If the byte did not have the critical bit set, the result is the same 128 | as the mask. If it did, the result is all ones. The latter case is the unique 129 | 8-bit value where adding one and right-shifting 8 places results in a 1. We use 130 | this to obtain the direction. 131 | 132 | Note also that our strings are treated as if they had an infinitely long suffix 133 | of |NUL| bytes following them. Thus, if the critical bit is beyond the end of 134 | our string, we treat it as if it had a zero bit there. 135 | 136 | @= 137 | uint8 c = 0; 138 | if (q->byte < ulen) c = ubytes[q->byte]; 139 | const int direction = (1 + (q->otherbits | c)) >> 8; 140 | 141 | @ The final test 142 | 143 | Once we have reached an external node we can only conclude that certain 144 | bits of the string are shared with a string in the tree. We still need to 145 | test the best match to make sure that it's correct. If the test fails, however, 146 | we can conclude that the string is not in the tree. 147 | 148 | Note that the pointer cannot be |NULL|. We tested that the root pointer was not 149 | |NULL| at the start of the function and, if an internal node had a |NULL| pointer 150 | then the tree would be invalid - that internal node should be removed. 151 | 152 | @= 153 | return 0 == strcmp(u, (const char *) p); 154 | 155 | @* Inserting into the tree. 156 | 157 | This is a more complex function. It takes a tree, |t|, and possibly mutates it 158 | such that a |NUL| terminated string, |u|, is a member on exit. It returns: 159 | 160 | $\cases{ 0 &if {\rm out of memory} \cr 161 | 1 &if {\it u} {\rm was already a member} \cr 162 | 2 &if {\it t} {\rm was mutated successfully}}$ 163 | 164 | Note that the section for walking the tree is the same as before and is not 165 | covered again. 166 | 167 | @c 168 | int critbit0_insert(critbit0_tree *t, const char *u) 169 | { 170 | const uint8 *const ubytes = (void *) u; 171 | const size_t ulen = strlen(u); 172 | uint8 *p = t->root; 173 | 174 | @@; 175 | @@; 176 | @@; 177 | @@; 178 | 179 | return 2; 180 | } 181 | 182 | @ Inserting into an empty tree 183 | 184 | Recall that an empty tree has a |NULL| root pointer. A singleton tree, the 185 | result of inserting into the empty tree, has the root pointing at an external 186 | node - the sole member of the tree. 187 | 188 | We require the ability to malloc a buffer with alignment 2 and so use 189 | |posix_memalign| to allocate memory. 190 | 191 | @= 192 | if (!p) { 193 | char *x; 194 | int a = posix_memalign((void **) &x, sizeof(void *), ulen + 1); 195 | if (a) return 0; 196 | memcpy(x, u, ulen + 1); 197 | t->root = x; 198 | return 2; 199 | } 200 | 201 | @ Finding the critical bit 202 | 203 | @= 204 | @@; 205 | @@; 206 | 207 | @ Finding the differing byte 208 | 209 | Now that we have found the best match for the new element in the tree we need 210 | to check to see where the new element differs from that element. If it doesn't 211 | differ, of course, then the new element already exists in the tree and we can 212 | return 1. Remember that we treat strings as if they had an infinite number of 213 | |NUL|s following them and that the best match string might be longer than |u|. 214 | 215 | While calculating the differing byte we also calculate |newotherbits|, the XOR 216 | of the differing byte. This will become clear in the next section. 217 | 218 | @= 219 | uint32 newbyte; 220 | uint32 newotherbits; 221 | 222 | for (newbyte = 0; newbyte < ulen; ++newbyte) { 223 | if (p[newbyte] != ubytes[newbyte]) { 224 | newotherbits = p[newbyte] ^ ubytes[newbyte]; 225 | goto different_byte_found; 226 | } 227 | } 228 | 229 | if (p[newbyte] != 0) { 230 | newotherbits = p[newbyte]; 231 | goto different_byte_found; 232 | } 233 | return 1; 234 | 235 | different_byte_found: 236 | 237 | @ Finding the differing bit 238 | 239 | Once we have the XOR of first differing byte in |newotherbits| we need to find 240 | the most significant differing bit. We could do this with a simple for loop, 241 | testing bits 7..0, instead we use the following trick: 242 | 243 | We recursively fold the upper bits into the lower bits to yield a byte |x| with 244 | all true bits below the most significant bit. Then |x & ~(x >> 1)| yields the 245 | most significant bit. 246 | 247 | Once we have this value, we invert all the bits resulting in a value suitable 248 | for our |otherbits| member. 249 | 250 | @= 251 | newotherbits |= newotherbits >> 1; 252 | newotherbits |= newotherbits >> 2; 253 | newotherbits |= newotherbits >> 4; 254 | newotherbits = (newotherbits & ~(newotherbits >> 1)) ^ 255; 255 | uint8 c = p[newbyte]; 256 | int newdirection = (1 + (newotherbits | c)) >> 8; 257 | 258 | @ Inserting the new node 259 | 260 | @= 261 | @@; 262 | @@; 263 | 264 | @ Allocating a new node 265 | 266 | This is obviously fairly pedestrian code. Again, we use |posix_memalign| to 267 | make sure that our node structures have an alignment of at least two. We store 268 | the new copy of the string into the correct |child| pointer and save the other 269 | for when we have worked out where to insert the new node 270 | 271 | @= 272 | critbit0_node *newnode; 273 | if (posix_memalign((void **) &newnode, sizeof(void *), sizeof(critbit0_node))) return 0; 274 | 275 | char *x; 276 | if (posix_memalign((void **) &x, sizeof(void *), ulen + 1)) { 277 | free(newnode); 278 | return 0; 279 | } 280 | memcpy(x, ubytes, ulen + 1); 281 | 282 | newnode->byte = newbyte; 283 | newnode->otherbits = newotherbits; 284 | newnode->child[1 - newdirection] = x; 285 | 286 | @ Inserting a new node in the tree 287 | 288 | Here we must recall that, for a given set of elements, there is a unique 289 | crit-bit tree representing them. This statement needs a little bit of 290 | qualification because it also requires that we define a total ordering of 291 | crit-bits. 292 | 293 | Consider the set of bitstrings $\{{\tt 000}, {\tt 001}, {\tt 101}\}$, inserted 294 | into a crit-bit tree in that order. One could imagine the resulting tree 295 | looking like this: 296 | 297 | \centerline{ 298 | \tikzpicture 299 | \usetikzlibrary{trees} 300 | \colorlet{lightgray}{black!25} 301 | [edge from parent fork down] 302 | \node{root} 303 | child {node [fill=lightgray, rounded corners] {$3^{rd}$} 304 | child {node {{\tt 000}}} 305 | child {node [fill=lightgray, rounded corners] {$1^{st}$} 306 | child {node {{\tt 001}}} 307 | child {node {{\tt 101}}}}}; 308 | \endtikzpicture 309 | } 310 | 311 | (Where internal nodes are shaded light gray and contain the critical bit, 312 | counting from the left.) 313 | 314 | That would be a valid tree for searching as far as our searching algorithm 315 | goes, but it does make a mess of predecessor and successor operations when the 316 | forks might not test the bits in any special order. 317 | 318 | So, in short, we need the order of the crit-bits to match the lexicographical 319 | order that we expect the predecessor and successor operations to follow. Thus, 320 | inserting the new node in the tree involves walking the tree from the root to 321 | find the correct position to insert at. 322 | 323 | We keep track of the pointer to be updated (to point to the new internal node) 324 | and, once the walk has finished, we can update that pointer. 325 | 326 | @= 327 | void **wherep = &t->root; 328 | for (;;) { 329 | uint8 *p = *wherep; 330 | if (!(1 & (intptr_t) p)) break; 331 | critbit0_node *q = (void *) (p - 1); 332 | if (q->byte > newbyte) break; 333 | if (q->byte == newbyte && q->otherbits > newotherbits) break; 334 | uint8 c = 0; 335 | if (q->byte < ulen) c = ubytes[q->byte]; 336 | const int direction = (1 + (q->otherbits | c)) >> 8; 337 | wherep = q->child + direction; 338 | } 339 | 340 | newnode->child[newdirection] = *wherep; 341 | *wherep = (void *) (1 + (char *) newnode); 342 | 343 | @* Deleting elements. 344 | 345 | This function takes a tree, |t|, and a |NUL| terminated string, |u|, and 346 | possibly mutates the tree such that $u \notin t$. It returns 1 if the tree was 347 | mutated, 0 otherwise. 348 | 349 | @c 350 | int critbit0_delete(critbit0_tree *t, const char *u) { 351 | const uint8 *ubytes = (void *) u; 352 | const size_t ulen = strlen(u); 353 | uint8 *p = t->root; 354 | void **wherep = &t->root; 355 | void **whereq = 0; 356 | critbit0_node *q = 0; 357 | int direction = 0; 358 | 359 | @@; 360 | @@; 361 | @@; 362 | @@; 363 | 364 | return 1; 365 | } 366 | 367 | @ Deleting from the empty tree 368 | 369 | Since no element is the member of the empty tree, this is a very easy case: we 370 | can just return 0. 371 | 372 | @= 373 | if (!p) return 0; 374 | 375 | @ Finding the best candidate to delete 376 | 377 | Walking the tree to find the best match for a given element is almost the same 378 | as the two previous versions that we've seen. The only exception is that we 379 | keep track of the last jump to an internal node in |whereq|. Actually, we keep 380 | track of a pointer to the last pointer that got us to an internal node. 381 | 382 | To see why, consider the typical case: 383 | 384 | \centerline{ 385 | \tikzpicture 386 | \usetikzlibrary{trees} 387 | \colorlet{lightgray}{black!25} 388 | [edge from parent fork down] 389 | \node{root} 390 | child {node(parent) [fill=lightgray, rounded corners] {$x$} 391 | child {node {$\ldots$}} 392 | child {node [fill=lightgray, rounded corners] {$y$} 393 | child {node {$\ldots$}} 394 | child {node {1100}}}}; 395 | \node[shift=(parent.center),xshift=2cm] (l) {whereq}; 396 | \draw[<-] (parent) -- (l); 397 | \endtikzpicture 398 | } 399 | 400 | Here we wish to remove {\tt 1100}, however if we left its parent with a single 401 | child pointer, that would make the parent nothing more than a bump in the road - it 402 | should also be removed. Thus we need a pointer to the grandparent in order to 403 | remove both the string and the internal node that pointed to it. 404 | 405 | @= 406 | while (1 & (intptr_t) p) { 407 | whereq = wherep; 408 | q = (void *) (p - 1); 409 | uint8 c = 0; 410 | if (q->byte < ulen) c = ubytes[q->byte]; 411 | direction = (1 + (q->otherbits | c)) >> 8; 412 | wherep = q->child + direction; 413 | p = *wherep; 414 | } 415 | 416 | @ Checking that we have the right element 417 | 418 | As usual, we have now found the best match, an external node, but we still need 419 | to compare the strings to check that we actually have a match. If we don't, 420 | then the element cannot be in the tree and we can return 0. Otherwise, the 421 | external node is no longer useful and can be freed. 422 | 423 | @= 424 | if (0 != strcmp(u, (const char *) p)) return 0; 425 | free(p); 426 | 427 | @ Removing the node 428 | 429 | We now have to deal with two cases. The simple case is as outlined in the 430 | diagram above: we remove the parent node and point the grand parent to to other 431 | child of the parent. 432 | 433 | We also have to keep in mind that there might not {\it be} a grandparent node. 434 | This is the case when the tree only has one element. In this case, we remove 435 | that element and set the root pointer to |NULL|. 436 | 437 | @= 438 | if (!whereq) { 439 | t->root = 0; 440 | return 1; 441 | } 442 | 443 | *whereq = q->child[1 - direction]; 444 | free(q); 445 | 446 | @* Clearing a tree. 447 | 448 | Clearing a tree (freeing all members) brings us our first code for walking the 449 | whole tree rather than just tracing a path through it. 450 | 451 | So, the |critbit0_clear| function takes a tree, |t|, and frees every member of 452 | it, mutating the tree such that it is empty on exit. 453 | 454 | @c 455 | static void 456 | traverse(void *top) { 457 | @@; 458 | } 459 | 460 | void critbit0_clear(critbit0_tree *t) 461 | { 462 | if (t->root) traverse(t->root); 463 | t->root = NULL; 464 | } 465 | 466 | @ Recursively clearing the tree 467 | 468 | Each pointer in the tree has to be tested to see if it's a pointer to an 469 | internal node (a |critbit0_node|) or to a malloced string. If it's a node, we 470 | need to recursively free its children. 471 | 472 | @= 473 | uint8 *p = top; 474 | 475 | if (1 & (intptr_t) p) { 476 | critbit0_node *q = (void *) (p - 1); 477 | traverse(q->child[0]); 478 | traverse(q->child[1]); 479 | free(q); 480 | } else { 481 | free(p); 482 | } 483 | 484 | @* Fetching elements with a given prefix. 485 | 486 | One of the operations which crit-bit trees can perform efficiently that hash 487 | tables cannot is the extraction of the subset of elements with a given prefix. 488 | 489 | The following function takes a tree, |t|, and a |NUL| terminated string, 490 | |prefix|. Let $S \subseteq t$ where $x \in S$ iff |prefix| is a prefix of |x|, 491 | then $\forall x : S.$ |handle| is called with arguments |x| and |arg|. It 492 | returns: 493 | 494 | $\cases{ 0 &if {\it handle} {\rm returned 0} \cr 495 | 1 &if {\rm successful} \cr 496 | 2 &if {\it handle} {\rm returned a value} $\notin [0,1]$}$ 497 | 498 | (Note that, if |handle| returns 0, the iteration is aborted) 499 | 500 | @c 501 | static int 502 | allprefixed_traverse(uint8 *top, 503 | int (*handle) (const char *, void *), void *arg) { 504 | @@; 505 | @@; 506 | } 507 | 508 | int 509 | critbit0_allprefixed(critbit0_tree *t, const char *prefix, 510 | int (*handle) (const char *, void *), void *arg) { 511 | const uint8 *ubytes = (void *) prefix; 512 | const size_t ulen = strlen(prefix); 513 | uint8 *p = t->root; 514 | uint8 *top = p; 515 | 516 | if (!p) return 1; /* S = $\emptyset$ */ 517 | @@; 518 | @@; 519 | 520 | return allprefixed_traverse(top, handle, arg); 521 | } 522 | 523 | @ Maintaining the |top| pointer 524 | 525 | The |top| pointer points to the internal node at the top of the subtree which 526 | contains exactly the subset of elements matching the given prefix. Since our 527 | critbit values are sorted as we descend the tree, this subtree exists (if the 528 | subset is non-empty) and can be detected by checking for the critbit advancing 529 | beyond the length of the prefix. 530 | 531 | @= 532 | while (1 & (intptr_t) p) { 533 | critbit0_node *q = (void *) (p - 1); 534 | uint8 c = 0; 535 | if (q->byte < ulen) c = ubytes[q->byte]; 536 | const int direction = (1 + (q->otherbits | c)) >> 8; 537 | p = q->child[direction]; 538 | if (q->byte < ulen) top = p; 539 | } 540 | 541 | @ Checking that the prefix exists 542 | 543 | As with our other functions, it's possible that the given prefix doesn't 544 | actually exist in the tree at this point. We need to check the actual contents 545 | of the external node that we have arrived at. 546 | 547 | @= 548 | for (size_t i = 0; i < ulen; ++i) { 549 | if (p[i] != ubytes[i]) return 1; 550 | } 551 | 552 | @ Dealing with an internal node while recursing 553 | 554 | The |allprefixed_traverse| function is called with the root of a subtree as the 555 | |top| argument. We need to test the LSB of this pointer to see if it's an 556 | internal node. If so, we recursively walk down the subtree and return. Otherwise 557 | we fall through into the code from the section below for handling an external 558 | node. 559 | 560 | @= 561 | if (1 & (intptr_t) top) { 562 | critbit0_node *q = (void *) (top - 1); 563 | for (int direction = 0; direction < 2; ++direction) 564 | switch(allprefixed_traverse(q->child[direction], handle, arg)) { 565 | case 1: break; 566 | case 0: return 0; 567 | default: return -1; 568 | } 569 | return 1; 570 | } 571 | 572 | @ Dealing with an external node while recursing 573 | 574 | An external node is a malloced string that matches the given prefix. Thus we 575 | call the callback and we're done. 576 | 577 | @= 578 | return handle((const char *) top, arg); 579 | --------------------------------------------------------------------------------