├── .gitignore ├── distance-test.c ├── judy-arrays.1 ├── judy-arrays.c ├── judy-arrays.xcodeproj └── project.pbxproj ├── judy-levenshtein.c ├── judy-utilities.c └── pairs-test.c /.gitignore: -------------------------------------------------------------------------------- 1 | # Xcode 2 | build/* 3 | *.pbxuser 4 | !default.pbxuser 5 | *.mode1v3 6 | !default.mode1v3 7 | *.mode2v3 8 | !default.mode2v3 9 | *.perspectivev3 10 | !default.perspectivev3 11 | *.xcworkspace 12 | !default.xcworkspace 13 | xcuserdata 14 | profile 15 | *.moved-aside 16 | 17 | ## Ignore incredibly annoying .DS_Store files 18 | .DS_Store 19 | -------------------------------------------------------------------------------- /distance-test.c: -------------------------------------------------------------------------------- 1 | /* 2 | * distance-test.c 3 | * judy-arrays 4 | * 5 | * Created by Jan on 21.01.11. 6 | * Copyright 2011 geheimwerk.de. All rights reserved. 7 | * 8 | * License: same as for judy-arrays.c 9 | */ 10 | 11 | #include 12 | #include 13 | 14 | #include "judy-levenshtein.c" 15 | 16 | 17 | #define DICTIONARY "/usr/share/dict/words" 18 | #define TARGET "goober" 19 | #define MAX_COST 1 20 | 21 | int main(int argc, char **argv) { 22 | void *judy; 23 | FILE *in, *out; 24 | 25 | const char *target; 26 | unsigned int maxCost; 27 | const char *dictionary; 28 | 29 | if (argc < 3) { 30 | fprintf(stderr, "usage: %s [ ] []\n", 31 | argv[0]); 32 | target = TARGET; 33 | maxCost = MAX_COST; 34 | } 35 | else { 36 | target = argv[1]; 37 | sscanf(argv[2], "%u", &maxCost); 38 | } 39 | 40 | if (argc > 3) { 41 | dictionary = argv[3]; 42 | } 43 | else { 44 | dictionary = DICTIONARY; 45 | } 46 | 47 | in = fopen(dictionary, "r"); 48 | out = stdout; 49 | 50 | if (!in) { 51 | fprintf(stderr, "unable to open input file\n"); 52 | } 53 | 54 | if (!out) { 55 | fprintf(stderr, "unable to open output file\n"); 56 | } 57 | 58 | 59 | uchar buff[1024]; 60 | judyslot max = 0; 61 | uint len; 62 | 63 | // CHANGEME: Can we adapt the judy stack size to the input size if it is known? 64 | judy = judy_open(512); 65 | 66 | while ( fgets((char *)buff, sizeof(buff), in) ) { 67 | len = strlen((const char *)buff); 68 | if (len > 1) { // We only want lines containing more than just the '\n' 69 | buff[len] = 0; // Remove '\n' 70 | len--; 71 | if ( len && buff[len - 1] == 0x0d ) { // Detect and remove Windows CR 72 | buff[len] = 0; 73 | len--; 74 | } 75 | *(judy_cell(judy, buff, len)) += 1; // count instances of string 76 | max++; 77 | } 78 | } 79 | 80 | fprintf(out, "Read %" PRIjudyvalue " words. \n", max); 81 | 82 | #if 1 83 | search(judy, (const char *)target, maxCost, out, processResult); 84 | #else 85 | #if 1 86 | char key_buffer[2]; 87 | judyslot *cell = NULL; 88 | 89 | key_buffer[1] = '\0'; 90 | 91 | for (key_buffer[0] = 'A'; key_buffer[0] != 0; key_buffer[0]++) { 92 | cell = judy_slot(judy, (uchar *)key_buffer, 1); 93 | } 94 | #else 95 | judyslot *cell; 96 | uint idx; 97 | 98 | cell = judy_strt(judy, NULL, 0); 99 | judy_key(judy, buff, sizeof(buff)); 100 | if (buff[0] == 0) { 101 | cell = judy_nxt(judy); 102 | } 103 | 104 | if (cell) do { 105 | judy_key(judy, buff, sizeof(buff)); 106 | for (idx = 0; idx < *cell; idx++) { // spit out duplicates 107 | fprintf(out, "%s\n", buff); 108 | } 109 | } while ( (cell = judy_nxt(judy)) ); 110 | 111 | // test deletion all the way to an empty tree 112 | 113 | if ( (cell = judy_prv(judy)) ) { 114 | do { 115 | max -= *cell; 116 | } while ( (cell = judy_del(judy)) ); 117 | } 118 | #endif 119 | #endif 120 | 121 | judy_close(judy); 122 | 123 | return 0; 124 | } 125 | -------------------------------------------------------------------------------- /judy-arrays.1: -------------------------------------------------------------------------------- 1 | .\"Modified from man(1) of FreeBSD, the NetBSD mdoc.template, and mdoc.samples. 2 | .\"See Also: 3 | .\"man mdoc.samples for a complete listing of options 4 | .\"man mdoc for the short list of editing options 5 | .\"/usr/share/misc/mdoc.template 6 | .Dd 11.12.10 \" DATE 7 | .Dt judy3 1 \" Program name and manual section number 8 | .Os Darwin 9 | .Sh NAME \" Section Header - required - don't modify 10 | .Nm judy3, 11 | .\" The following lines are read in generating the apropos(man -k) database. Use only key 12 | .\" words here as the database is built based on the words here and in the .ND line. 13 | .Nm Other_name_for_same_program(), 14 | .Nm Yet another name for the same program. 15 | .\" Use .Nm macro to designate other names for the documented program. 16 | .Nd This line parsed for whatis database. 17 | .Sh SYNOPSIS \" Section Header - required - don't modify 18 | .Nm 19 | .Op Fl abcd \" [-abcd] 20 | .Op Fl a Ar path \" [-a path] 21 | .Op Ar file \" [file] 22 | .Op Ar \" [file ...] 23 | .Ar arg0 \" Underlined argument - use .Ar anywhere to underline 24 | arg2 ... \" Arguments 25 | .Sh DESCRIPTION \" Section Header - required - don't modify 26 | Use the .Nm macro to refer to your program throughout the man page like such: 27 | .Nm 28 | Underlining is accomplished with the .Ar macro like this: 29 | .Ar underlined text . 30 | .Pp \" Inserts a space 31 | A list of items with descriptions: 32 | .Bl -tag -width -indent \" Begins a tagged list 33 | .It item a \" Each item preceded by .It macro 34 | Description of item a 35 | .It item b 36 | Description of item b 37 | .El \" Ends the list 38 | .Pp 39 | A list of flags and their descriptions: 40 | .Bl -tag -width -indent \" Differs from above in tag removed 41 | .It Fl a \"-a flag as a list item 42 | Description of -a flag 43 | .It Fl b 44 | Description of -b flag 45 | .El \" Ends the list 46 | .Pp 47 | .\" .Sh ENVIRONMENT \" May not be needed 48 | .\" .Bl -tag -width "ENV_VAR_1" -indent \" ENV_VAR_1 is width of the string ENV_VAR_1 49 | .\" .It Ev ENV_VAR_1 50 | .\" Description of ENV_VAR_1 51 | .\" .It Ev ENV_VAR_2 52 | .\" Description of ENV_VAR_2 53 | .\" .El 54 | .Sh FILES \" File used or created by the topic of the man page 55 | .Bl -tag -width "/Users/joeuser/Library/really_long_file_name" -compact 56 | .It Pa /usr/share/file_name 57 | FILE_1 description 58 | .It Pa /Users/joeuser/Library/really_long_file_name 59 | FILE_2 description 60 | .El \" Ends the list 61 | .\" .Sh DIAGNOSTICS \" May not be needed 62 | .\" .Bl -diag 63 | .\" .It Diagnostic Tag 64 | .\" Diagnostic informtion here. 65 | .\" .It Diagnostic Tag 66 | .\" Diagnostic informtion here. 67 | .\" .El 68 | .Sh SEE ALSO 69 | .\" List links in ascending order by section, alphabetically within a section. 70 | .\" Please do not reference files that do not exist without filing a bug report 71 | .Xr a 1 , 72 | .Xr b 1 , 73 | .Xr c 1 , 74 | .Xr a 2 , 75 | .Xr b 2 , 76 | .Xr a 3 , 77 | .Xr b 3 78 | .\" .Sh BUGS \" Document known, unremedied bugs 79 | .\" .Sh HISTORY \" Document history if command behaves in a unique manner -------------------------------------------------------------------------------- /judy-arrays.c: -------------------------------------------------------------------------------- 1 | // Judy arrays 22 DEC 2010 2 | 3 | // Author Karl Malbrain, malbrain@yahoo.com 4 | // with assistance from Jan Weiss. 5 | 6 | // Simplified judy arrays for strings 7 | // Adapted from the ideas of Douglas Baskins of HP. 8 | 9 | // Map a set of strings to corresponding memory cells (uints). 10 | // Each cell must be set to a non-zero value by the caller. 11 | 12 | // STANDALONE is defined to compile into a string sorter. 13 | 14 | //#define STANDALONE 15 | 16 | // functions: 17 | // judy_open: open a new judy array returning a judy object. 18 | // judy_close: close an open judy array, freeing all memory. 19 | // judy_data: allocate data memory within judy array for external use. 20 | // judy_cell: insert a string into the judy array, return cell pointer. 21 | // judy_strt: retrieve the cell pointer greater than or equal to given key 22 | // judy_slot: retrieve the cell pointer, or return NULL for a given key. 23 | // judy_key: retrieve the string value for the most recent judy query. 24 | // judy_end: retrieve the cell pointer for the last string in the array. 25 | // judy_nxt: retrieve the cell pointer for the next string in the array. 26 | // judy_prv: retrieve the cell pointer for the prev string in the array. 27 | // judy_del: delete the key and cell for the current stack entry. 28 | 29 | #include 30 | #include 31 | #include 32 | 33 | #if __STDC_VERSION__ >= 199901L 34 | #include 35 | #else 36 | typedef unsigned char uint8_t; 37 | typedef unsigned short uint16_t; 38 | typedef unsigned int uint32_t; 39 | typedef unsigned long long uint64_t; 40 | #define PRIu32 "u" 41 | #define PRIu64 "llu" 42 | #endif 43 | 44 | #ifdef linux 45 | #include 46 | #else 47 | #ifdef __BIG_ENDIAN__ 48 | #ifndef BYTE_ORDER 49 | #define BYTE_ORDER 4321 50 | #endif 51 | #else 52 | #ifndef BYTE_ORDER 53 | #define BYTE_ORDER 1234 54 | #endif 55 | #endif 56 | #ifndef BIG_ENDIAN 57 | #define BIG_ENDIAN 4321 58 | #endif 59 | #endif 60 | 61 | typedef uint8_t uchar; 62 | typedef uint32_t uint; 63 | #define PRIuint PRIu32 64 | 65 | #if defined(__LP64__) || \ 66 | defined(__x86_64__) || \ 67 | defined(__amd64__) || \ 68 | defined(_WIN64) || \ 69 | defined(__sparc64__) || \ 70 | defined(__arch64__) || \ 71 | defined(__powerpc64__) || \ 72 | defined (__s390x__) 73 | // defines for 64 bit 74 | 75 | typedef uint64_t judyvalue; 76 | typedef uint64_t judyslot; 77 | #define JUDY_key_mask (0x07) 78 | #define JUDY_key_size 8 79 | #define JUDY_slot_size 8 80 | #define JUDY_span_bytes (3 * JUDY_key_size) 81 | 82 | #define PRIjudyvalue PRIu64 83 | 84 | #else 85 | // defines for 32 bit 86 | 87 | typedef uint32_t judyvalue; 88 | typedef uint32_t judyslot; 89 | #define JUDY_key_mask (0x03) 90 | #define JUDY_key_size 4 91 | #define JUDY_slot_size 4 92 | #define JUDY_span_bytes (7 * JUDY_key_size) 93 | 94 | #define PRIjudyvalue PRIu32 95 | 96 | #endif 97 | 98 | #if CHAR_BIT != 8 99 | #error("Non 8-bit character size not supported") 100 | #endif 101 | 102 | #define JUDY_mask (~(judyslot)0x07) 103 | 104 | #ifdef STANDALONE 105 | #include 106 | #include 107 | 108 | uint MaxMem = 0; 109 | 110 | #if !defined(_WIN32) 111 | void judy_abort (char *msg) __attribute__ ((noreturn)); // Tell static analyser that this function will not return 112 | #endif 113 | 114 | void judy_abort (char *msg) 115 | { 116 | fprintf(stderr, "%s\n", msg); 117 | exit(1); 118 | } 119 | #endif 120 | 121 | #if !defined(_WIN32) 122 | void vfree (void *what, uint size) 123 | { 124 | free (what); 125 | } 126 | #elif defined(_WIN32) 127 | #include 128 | 129 | void *valloc (uint size) 130 | { 131 | return VirtualAlloc(NULL, size, MEM_COMMIT, PAGE_READWRITE); 132 | } 133 | 134 | void vfree (void *what, uint size) 135 | { 136 | VirtualFree(what, 0, MEM_RELEASE); 137 | } 138 | #endif 139 | 140 | #define JUDY_seg 65536 141 | 142 | enum JUDY_types { 143 | JUDY_radix = 0, // inner and outer radix fan-out 144 | JUDY_1 = 1, // linear list nodes of designated count 145 | JUDY_2 = 2, 146 | JUDY_4 = 3, 147 | JUDY_8 = 4, 148 | JUDY_16 = 5, 149 | JUDY_32 = 6, 150 | JUDY_span = 7, // up to 28 tail bytes of key contiguously stored 151 | }; 152 | 153 | int JudySize[] = { 154 | (JUDY_slot_size * 16), // JUDY_radix node size 155 | (JUDY_slot_size + JUDY_key_size), // JUDY_1 node size 156 | (2 * JUDY_slot_size + 2 * JUDY_key_size), 157 | (4 * JUDY_slot_size + 4 * JUDY_key_size), 158 | (8 * JUDY_slot_size + 8 * JUDY_key_size), 159 | (16 * JUDY_slot_size + 16 * JUDY_key_size), 160 | (32 * JUDY_slot_size + 32 * JUDY_key_size), 161 | (JUDY_span_bytes + JUDY_slot_size) 162 | }; 163 | 164 | judyvalue JudyMask[9] = { 165 | 0, 0xff, 0xffff, 0xffffff, 0xffffffff, 166 | #if JUDY_key_size > 4 167 | 0xffffffffffLL, 0xffffffffffffLL, 0xffffffffffffffLL, 0xffffffffffffffffLL 168 | #endif 169 | }; 170 | 171 | typedef struct { 172 | void *seg; // next used allocator 173 | uint next; // next available offset 174 | } JudySeg; 175 | 176 | typedef struct { 177 | judyslot next; // judy object 178 | uint off; // offset within key 179 | int slot; // slot within object 180 | } JudyStack; 181 | 182 | typedef struct { 183 | judyslot root[1]; // root of judy array 184 | void **reuse[8]; // reuse judy blocks 185 | JudySeg *seg; // current judy allocator 186 | uint level; // current height of stack 187 | uint max; // max height of stack 188 | JudyStack stack[1]; // current cursor 189 | } Judy; 190 | 191 | #define JUDY_max JUDY_32 192 | 193 | // open judy object 194 | 195 | void *judy_open (uint max) 196 | { 197 | JudySeg *seg; 198 | Judy *judy; 199 | uint amt; 200 | 201 | if( (seg = valloc(JUDY_seg)) ) { 202 | seg->next = JUDY_seg; 203 | } else { 204 | #ifdef STANDALONE 205 | judy_abort ("No virtual memory"); 206 | #else 207 | return NULL; 208 | #endif 209 | } 210 | 211 | 212 | amt = sizeof(Judy) + max * sizeof(JudyStack); 213 | #ifdef STANDALONE 214 | MaxMem += JUDY_seg; 215 | #endif 216 | 217 | if( amt & 0x07 ) 218 | amt |= 0x07, amt++; 219 | 220 | seg->next -= amt; 221 | judy = (Judy *)((uchar *)seg + seg->next); 222 | memset(judy, 0, amt); 223 | judy->seg = seg; 224 | judy->max = max; 225 | return judy; 226 | } 227 | 228 | void judy_close (Judy *judy) 229 | { 230 | JudySeg *seg, *nxt = judy->seg; 231 | 232 | while( (seg = nxt) ) 233 | nxt = seg->seg, vfree (seg, JUDY_seg); 234 | } 235 | 236 | // allocate judy node 237 | 238 | void *judy_alloc (Judy *judy, int type) 239 | { 240 | void **block; 241 | JudySeg *seg; 242 | uint amt; 243 | 244 | amt = JudySize[type]; 245 | 246 | if( amt & 0x07 ) 247 | amt |= 0x07, amt += 1; 248 | 249 | if( (block = judy->reuse[type]) ) { 250 | judy->reuse[type] = *block; 251 | memset (block, 0, amt); 252 | return (void *)block; 253 | } 254 | 255 | if( !judy->seg || judy->seg->next < amt + sizeof(*seg) ) { 256 | if( (seg = valloc (JUDY_seg)) ) { 257 | seg->next = JUDY_seg, seg->seg = judy->seg, judy->seg = seg; 258 | } else { 259 | #ifdef STANDALONE 260 | judy_abort("Out of virtual memory"); 261 | #else 262 | return NULL; 263 | #endif 264 | } 265 | 266 | #ifdef STANDALONE 267 | MaxMem += JUDY_seg; 268 | #endif 269 | } 270 | 271 | judy->seg->next -= amt; 272 | 273 | block = (void **)((uchar *)judy->seg + judy->seg->next); 274 | memset (block, 0, amt); 275 | return (void *)block; 276 | } 277 | 278 | void *judy_data (Judy *judy, uint amt) 279 | { 280 | JudySeg *seg; 281 | void *block; 282 | 283 | if( amt & 0x07 ) 284 | amt |= 0x07, amt += 1; 285 | 286 | if( !judy->seg || judy->seg->next < amt + sizeof(*seg) ) { 287 | if( (seg = valloc (JUDY_seg)) ) { 288 | seg->next = JUDY_seg, seg->seg = judy->seg, judy->seg = seg; 289 | } else { 290 | #ifdef STANDALONE 291 | judy_abort("Out of virtual memory"); 292 | #else 293 | return NULL; 294 | #endif 295 | } 296 | 297 | #ifdef STANDALONE 298 | MaxMem += JUDY_seg; 299 | #endif 300 | } 301 | 302 | judy->seg->next -= amt; 303 | 304 | block = (void *)((uchar *)judy->seg + judy->seg->next); 305 | memset (block, 0, amt); 306 | return block; 307 | } 308 | 309 | void judy_free (Judy *judy, void *block, int type) 310 | { 311 | *((void **)(block)) = judy->reuse[type]; 312 | judy->reuse[type] = (void **)block; 313 | return; 314 | } 315 | 316 | // assemble key from current path 317 | 318 | uint judy_key (Judy *judy, uchar *buff, uint max) 319 | { 320 | int slot, cnt, /*size, */off, type; 321 | uint len = 0, idx = 0; 322 | uchar *base; 323 | int keysize; 324 | 325 | max--; // leave room for zero terminator 326 | 327 | while( len < max && ++idx <= judy->level ) { 328 | slot = judy->stack[idx].slot; 329 | type = judy->stack[idx].next & 0x07; 330 | //size = JudySize[type]; 331 | switch( type ) { 332 | case JUDY_1: 333 | case JUDY_2: 334 | case JUDY_4: 335 | case JUDY_8: 336 | case JUDY_16: 337 | case JUDY_32: 338 | keysize = JUDY_key_size - (judy->stack[idx].off & JUDY_key_mask); 339 | base = (uchar *)(judy->stack[idx].next & JUDY_mask); 340 | //cnt = size / (sizeof(judyslot) + keysize); 341 | off = keysize; 342 | #if BYTE_ORDER != BIG_ENDIAN 343 | while( off-- && len < max ) 344 | buff[len++] = base[slot * keysize + off]; 345 | #else 346 | for( off = 0; off < keysize && len < max; off++ ) 347 | buff[len++] = base[slot * keysize + off]; 348 | #endif 349 | continue; 350 | case JUDY_radix: 351 | if( !slot ) 352 | break; 353 | buff[len++] = slot; 354 | continue; 355 | case JUDY_span: 356 | base = (uchar *)(judy->stack[idx].next & JUDY_mask); 357 | cnt = JUDY_span_bytes; 358 | 359 | for( slot = 0; slot < cnt && base[slot]; slot++ ) 360 | if( len < max ) 361 | buff[len++] = base[slot]; 362 | continue; 363 | } 364 | } 365 | buff[len] = '\0'; 366 | return len; 367 | } 368 | 369 | // find slot & setup cursor 370 | 371 | judyslot *judy_slot (Judy *judy, uchar *buff, uint max) 372 | { 373 | int slot, size, keysize, tst, cnt; 374 | judyslot next = *judy->root; 375 | judyvalue value, test = 0; 376 | judyslot *table; 377 | judyslot *node; 378 | uint off = 0; 379 | uchar *base; 380 | 381 | judy->level = 0; 382 | 383 | while( next ) { 384 | if( judy->level < judy->max ) 385 | judy->level++; 386 | 387 | judy->stack[judy->level].off = off; 388 | judy->stack[judy->level].next = next; 389 | size = JudySize[next & 0x07]; 390 | 391 | switch( next & 0x07 ) { 392 | 393 | case JUDY_1: 394 | case JUDY_2: 395 | case JUDY_4: 396 | case JUDY_8: 397 | case JUDY_16: 398 | case JUDY_32: 399 | base = (uchar *)(next & JUDY_mask); 400 | node = (judyslot *)((next & JUDY_mask) + size); 401 | keysize = JUDY_key_size - (off & JUDY_key_mask); 402 | cnt = size / (sizeof(judyslot) + keysize); 403 | slot = cnt; 404 | value = 0; 405 | 406 | do { 407 | value <<= 8; 408 | if( off < max ) 409 | value |= buff[off]; 410 | } while( ++off & JUDY_key_mask ); 411 | 412 | // find slot > key 413 | 414 | while( slot-- ) { 415 | test = *(judyvalue *)(base + slot * keysize); 416 | #if BYTE_ORDER == BIG_ENDIAN 417 | test >>= 8 * (JUDY_key_size - keysize); 418 | #else 419 | test &= JudyMask[keysize]; 420 | #endif 421 | if( test <= value ) 422 | break; 423 | } 424 | 425 | judy->stack[judy->level].slot = slot; 426 | 427 | if( test == value ) { 428 | 429 | // is this a leaf? 430 | 431 | if( !(value & 0xFF) ) 432 | return &node[-slot-1]; 433 | 434 | next = node[-slot-1]; 435 | continue; 436 | } 437 | 438 | return NULL; 439 | 440 | case JUDY_radix: 441 | table = (judyslot *)(next & JUDY_mask); // outer radix 442 | 443 | if( off < max ) 444 | slot = buff[off]; 445 | else 446 | slot = 0; 447 | 448 | // put radix slot on judy stack 449 | 450 | judy->stack[judy->level].slot = slot; 451 | 452 | if( (next = table[slot >> 4]) ) 453 | table = (judyslot *)(next & JUDY_mask); // inner radix 454 | else 455 | return NULL; 456 | 457 | if( !slot ) // leaf? 458 | return &table[slot & 0x0F]; 459 | 460 | next = table[slot & 0x0F]; 461 | off += 1; 462 | break; 463 | 464 | case JUDY_span: 465 | node = (judyslot *)((next & JUDY_mask) + JudySize[JUDY_span]); 466 | base = (uchar *)(next & JUDY_mask); 467 | cnt = tst = JUDY_span_bytes; 468 | if( tst > (int)(max - off) ) 469 | tst = max - off; 470 | value = strncmp((const char *)base, (const char *)(buff + off), tst); 471 | if( !value && tst < cnt && !base[tst] ) // leaf? 472 | return &node[-1]; 473 | 474 | if( !value && tst == cnt ) { 475 | next = node[-1]; 476 | off += cnt; 477 | continue; 478 | } 479 | return NULL; 480 | } 481 | } 482 | 483 | return NULL; 484 | } 485 | 486 | // promote full nodes to next larger size 487 | 488 | judyslot *judy_promote (Judy *judy, judyslot *next, int idx, judyvalue value, int keysize) 489 | { 490 | uchar *base = (uchar *)(*next & JUDY_mask); 491 | int oldcnt, newcnt, slot; 492 | #if BYTE_ORDER == BIG_ENDIAN 493 | int i; 494 | #endif 495 | judyslot *newnode, *node; 496 | judyslot *result; 497 | uchar *newbase; 498 | uint type; 499 | 500 | type = (*next & 0x07) + 1; 501 | node = (judyslot *)((*next & JUDY_mask) + JudySize[type-1]); 502 | oldcnt = JudySize[type-1] / (sizeof(judyslot) + keysize); 503 | newcnt = JudySize[type] / (sizeof(judyslot) + keysize); 504 | 505 | // promote node to next larger size 506 | 507 | newbase = judy_alloc (judy, type); 508 | newnode = (judyslot *)(newbase + JudySize[type]); 509 | *next = (judyslot)newbase | type; 510 | 511 | // open up slot at idx 512 | 513 | memcpy(newbase + (newcnt - oldcnt - 1) * keysize, base, idx * keysize); // copy keys 514 | 515 | for( slot = 0; slot < idx; slot++ ) 516 | newnode[-(slot + newcnt - oldcnt)] = node[-(slot + 1)]; // copy ptr 517 | 518 | // fill in new node 519 | 520 | #if BYTE_ORDER != BIG_ENDIAN 521 | memcpy(newbase + (idx + newcnt - oldcnt - 1) * keysize, &value, keysize); // copy key 522 | #else 523 | i = keysize; 524 | 525 | while( i-- ) 526 | newbase[(idx + newcnt - oldcnt - 1) * keysize + i] = value, value >>= 8; 527 | #endif 528 | result = &newnode[-(idx + newcnt - oldcnt)]; 529 | 530 | // copy rest of old node 531 | 532 | memcpy(newbase + (idx + newcnt - oldcnt) * keysize, base + (idx * keysize), (oldcnt - slot) * keysize); // copy keys 533 | 534 | for( ; slot < oldcnt; slot++ ) 535 | newnode[-(slot + newcnt - oldcnt + 1)] = node[-(slot + 1)]; // copy ptr 536 | 537 | judy->stack[judy->level].next = *next; 538 | judy->stack[judy->level].slot = idx + newcnt - oldcnt - 1; 539 | judy_free (judy, (void **)base, type - 1); 540 | return result; 541 | } 542 | 543 | // construct new node for JUDY_radix entry 544 | // make node with slot - start entries 545 | // moving key over one offset 546 | 547 | void judy_radix (Judy *judy, judyslot *radix, uchar *old, int start, int slot, int keysize, uchar key) 548 | { 549 | int size, idx, cnt = slot - start, newcnt; 550 | judyslot *node, *oldnode; 551 | uint type = JUDY_1 - 1; 552 | judyslot *table; 553 | uchar *base; 554 | 555 | // if necessary, setup inner radix node 556 | 557 | if( !(table = (judyslot *)(radix[key >> 4] & JUDY_mask)) ) { 558 | table = judy_alloc (judy, JUDY_radix); 559 | radix[key >> 4] = (judyslot)table | JUDY_radix; 560 | } 561 | 562 | oldnode = (judyslot *)(old + JudySize[JUDY_max]); 563 | 564 | // is this slot a leaf? 565 | 566 | if( !key || !keysize ) { 567 | table[key & 0x0F] = oldnode[-start-1]; 568 | return; 569 | } 570 | 571 | // calculate new node big enough to contain slots 572 | 573 | do { 574 | type++; 575 | size = JudySize[type]; 576 | newcnt = size / (sizeof(judyslot) + keysize); 577 | } while( cnt > newcnt && type < JUDY_max ); 578 | 579 | // store new node pointer in inner table 580 | 581 | base = judy_alloc (judy, type); 582 | node = (judyslot *)(base + size); 583 | table[key & 0x0F] = (judyslot)base | type; 584 | 585 | // allocate node and copy old contents 586 | // shorten keys by 1 byte during copy 587 | 588 | for( idx = 0; idx < cnt; idx++ ) { 589 | #if BYTE_ORDER != BIG_ENDIAN 590 | memcpy (base + (newcnt - idx - 1) * keysize, old + (start + cnt - idx - 1) * (keysize + 1), keysize); 591 | #else 592 | memcpy (base + (newcnt - idx - 1) * keysize, old + (start + cnt - idx - 1) * (keysize + 1) + 1, keysize); 593 | #endif 594 | node[-(newcnt - idx)] = oldnode[-(start + cnt - idx)]; 595 | } 596 | } 597 | 598 | // decompose full node to radix nodes 599 | 600 | void judy_splitnode (Judy *judy, judyslot *next, uint size, uint keysize) 601 | { 602 | int cnt, slot, start = 0; 603 | uint key = 0x0100, nxt; 604 | judyslot *newradix; 605 | uchar *base; 606 | 607 | base = (uchar *)(*next & JUDY_mask); 608 | cnt = size / (sizeof(judyslot) + keysize); 609 | 610 | // allocate outer judy_radix node 611 | 612 | newradix = judy_alloc (judy, JUDY_radix); 613 | *next = (judyslot)newradix | JUDY_radix; 614 | 615 | for( slot = 0; slot < cnt; slot++ ) { 616 | #if BYTE_ORDER != BIG_ENDIAN 617 | nxt = base[slot * keysize + keysize - 1]; 618 | #else 619 | nxt = base[slot * keysize]; 620 | #endif 621 | 622 | if( key > 0xFF ) 623 | key = nxt; 624 | if( nxt == key ) 625 | continue; 626 | 627 | // decompose portion of old node into radix nodes 628 | 629 | judy_radix (judy, newradix, base, start, slot, keysize - 1, key); 630 | start = slot; 631 | key = nxt; 632 | } 633 | 634 | judy_radix (judy, newradix, base, start, slot, keysize - 1, key); 635 | judy_free (judy, (void **)base, JUDY_max); 636 | } 637 | 638 | // return first leaf 639 | 640 | judyslot *judy_first (Judy *judy, judyslot next, uint off) 641 | { 642 | judyslot *table, *inner; 643 | uint keysize, size; 644 | judyslot *node; 645 | int slot, cnt; 646 | uchar *base; 647 | 648 | while( next ) { 649 | if( judy->level < judy->max ) 650 | judy->level++; 651 | 652 | judy->stack[judy->level].off = off; 653 | judy->stack[judy->level].next = next; 654 | size = JudySize[next & 0x07]; 655 | 656 | switch( next & 0x07 ) { 657 | case JUDY_1: 658 | case JUDY_2: 659 | case JUDY_4: 660 | case JUDY_8: 661 | case JUDY_16: 662 | case JUDY_32: 663 | keysize = JUDY_key_size - (off & JUDY_key_mask); 664 | node = (judyslot *)((next & JUDY_mask) + size); 665 | base = (uchar *)(next & JUDY_mask); 666 | cnt = size / (sizeof(judyslot) + keysize); 667 | 668 | for( slot = 0; slot < cnt; slot++ ) 669 | if( node[-slot-1] ) 670 | break; 671 | 672 | judy->stack[judy->level].slot = slot; 673 | #if BYTE_ORDER != BIG_ENDIAN 674 | if( !base[slot * keysize] ) 675 | return &node[-slot-1]; 676 | #else 677 | if( !base[slot * keysize + keysize - 1] ) 678 | return &node[-slot-1]; 679 | #endif 680 | next = node[-slot - 1]; 681 | off = (off | JUDY_key_mask) + 1; 682 | continue; 683 | case JUDY_radix: 684 | table = (judyslot *)(next & JUDY_mask); 685 | for( slot = 0; slot < 256; slot++ ) 686 | if( (inner = (judyslot *)(table[slot >> 4] & JUDY_mask)) ) { 687 | if( (next = inner[slot & 0x0F]) ) { 688 | judy->stack[judy->level].slot = slot; 689 | if( !slot ) 690 | return &inner[slot & 0x0F]; 691 | else 692 | break; 693 | } 694 | } else 695 | slot |= 0x0F; 696 | off++; 697 | continue; 698 | case JUDY_span: 699 | node = (judyslot *)((next & JUDY_mask) + JudySize[JUDY_span]); 700 | base = (uchar *)(next & JUDY_mask); 701 | cnt = JUDY_span_bytes; 702 | if( !base[cnt - 1] ) // leaf node? 703 | return &node[-1]; 704 | next = node[-1]; 705 | off += cnt; 706 | continue; 707 | } 708 | } 709 | return NULL; 710 | } 711 | 712 | // return last leaf cell pointer 713 | 714 | judyslot *judy_last (Judy *judy, judyslot next, uint off) 715 | { 716 | judyslot *table, *inner; 717 | uint keysize, size; 718 | judyslot *node; 719 | int slot, cnt; 720 | uchar *base; 721 | 722 | while( next ) { 723 | if( judy->level < judy->max ) 724 | judy->level++; 725 | 726 | judy->stack[judy->level].off = off; 727 | judy->stack[judy->level].next = next; 728 | size = JudySize[next & 0x07]; 729 | switch( next & 0x07 ) { 730 | case JUDY_1: 731 | case JUDY_2: 732 | case JUDY_4: 733 | case JUDY_8: 734 | case JUDY_16: 735 | case JUDY_32: 736 | keysize = JUDY_key_size - (off & JUDY_key_mask); 737 | slot = size / (sizeof(judyslot) + keysize); 738 | base = (uchar *)(next & JUDY_mask); 739 | node = (judyslot *)((next & JUDY_mask) + size); 740 | judy->stack[judy->level].slot = --slot; 741 | 742 | #if BYTE_ORDER != BIG_ENDIAN 743 | if( !base[slot * keysize] ) 744 | #else 745 | if( !base[slot * keysize + keysize - 1] ) 746 | #endif 747 | return &node[-slot-1]; 748 | 749 | next = node[-slot-1]; 750 | off += keysize; 751 | continue; 752 | case JUDY_radix: 753 | table = (judyslot *)(next & JUDY_mask); 754 | for( slot = 256; slot--; ) { 755 | judy->stack[judy->level].slot = slot; 756 | if( (inner = (judyslot *)(table[slot >> 4] & JUDY_mask)) ) { 757 | if( (next = inner[slot & 0x0F]) ) 758 | if( !slot ) 759 | return &inner[0]; 760 | else 761 | break; 762 | } else 763 | slot &= 0xF0; 764 | } 765 | off++; 766 | continue; 767 | case JUDY_span: 768 | node = (judyslot *)((next & JUDY_mask) + JudySize[JUDY_span]); 769 | base = (uchar *)(next & JUDY_mask); 770 | cnt = JUDY_span_bytes; 771 | if( !base[cnt - 1] ) // leaf node? 772 | return &node[-1]; 773 | next = node[-1]; 774 | off += cnt; 775 | continue; 776 | } 777 | } 778 | return NULL; 779 | } 780 | 781 | // judy_end: return last entry 782 | 783 | judyslot *judy_end (Judy *judy) 784 | { 785 | judy->level = 0; 786 | return judy_last (judy, *judy->root, 0); 787 | } 788 | 789 | // judy_nxt: return next entry 790 | 791 | judyslot *judy_nxt (Judy *judy) 792 | { 793 | judyslot *table, *inner; 794 | int slot, size, cnt; 795 | judyslot *node; 796 | judyslot next; 797 | uint keysize; 798 | uchar *base; 799 | uint off; 800 | 801 | if( !judy->level ) 802 | return judy_first (judy, *judy->root, 0); 803 | 804 | while( judy->level ) { 805 | next = judy->stack[judy->level].next; 806 | slot = judy->stack[judy->level].slot; 807 | off = judy->stack[judy->level].off; 808 | keysize = JUDY_key_size - (off & JUDY_key_mask); 809 | size = JudySize[next & 0x07]; 810 | 811 | switch( next & 0x07 ) { 812 | case JUDY_1: 813 | case JUDY_2: 814 | case JUDY_4: 815 | case JUDY_8: 816 | case JUDY_16: 817 | case JUDY_32: 818 | cnt = size / (sizeof(judyslot) + keysize); 819 | node = (judyslot *)((next & JUDY_mask) + size); 820 | base = (uchar *)(next & JUDY_mask); 821 | if( ++slot < cnt ) 822 | #if BYTE_ORDER != BIG_ENDIAN 823 | if( !base[slot * keysize] ) 824 | #else 825 | if( !base[slot * keysize + keysize - 1] ) 826 | #endif 827 | { 828 | judy->stack[judy->level].slot = slot; 829 | return &node[-slot - 1]; 830 | } else { 831 | judy->stack[judy->level].slot = slot; 832 | return judy_first (judy, node[-slot-1], (off | JUDY_key_mask) + 1); 833 | } 834 | judy->level--; 835 | continue; 836 | 837 | case JUDY_radix: 838 | table = (judyslot *)(next & JUDY_mask); 839 | 840 | while( ++slot < 256 ) 841 | if( (inner = (judyslot *)(table[slot >> 4] & JUDY_mask)) ) { 842 | if( inner[slot & 0x0F] ) { 843 | judy->stack[judy->level].slot = slot; 844 | return judy_first(judy, inner[slot & 0x0F], off + 1); 845 | } 846 | } else 847 | slot |= 0x0F; 848 | 849 | judy->level--; 850 | continue; 851 | case JUDY_span: 852 | judy->level--; 853 | continue; 854 | } 855 | } 856 | return NULL; 857 | } 858 | 859 | // judy_prv: return ptr to previous entry 860 | 861 | judyslot *judy_prv (Judy *judy) 862 | { 863 | int slot, size, keysize; 864 | judyslot *table, *inner; 865 | judyslot *node; 866 | judyslot next; 867 | uchar *base; 868 | uint off; 869 | 870 | if( !judy->level ) 871 | return judy_last (judy, *judy->root, 0); 872 | 873 | while( judy->level ) { 874 | next = judy->stack[judy->level].next; 875 | slot = judy->stack[judy->level].slot; 876 | off = judy->stack[judy->level].off; 877 | size = JudySize[next & 0x07]; 878 | 879 | switch( next & 0x07 ) { 880 | case JUDY_1: 881 | case JUDY_2: 882 | case JUDY_4: 883 | case JUDY_8: 884 | case JUDY_16: 885 | case JUDY_32: 886 | node = (judyslot *)((next & JUDY_mask) + size); 887 | if( !slot || !node[-slot] ) { 888 | judy->level--; 889 | continue; 890 | } 891 | 892 | base = (uchar *)(next & JUDY_mask); 893 | judy->stack[judy->level].slot--; 894 | keysize = JUDY_key_size - (off & JUDY_key_mask); 895 | 896 | #if BYTE_ORDER != BIG_ENDIAN 897 | if( base[(slot - 1) * keysize] ) 898 | #else 899 | if( base[(slot - 1) * keysize + keysize - 1] ) 900 | #endif 901 | return judy_last (judy, node[-slot], (off | JUDY_key_mask) + 1); 902 | 903 | return &node[-slot]; 904 | 905 | case JUDY_radix: 906 | table = (judyslot *)(next & JUDY_mask); 907 | 908 | while( slot-- ) { 909 | judy->stack[judy->level].slot--; 910 | if( (inner = (judyslot *)(table[slot >> 4] & JUDY_mask)) ) 911 | if( inner[slot & 0x0F] ) 912 | if( slot ) 913 | return judy_last(judy, inner[slot & 0x0F], off + 1); 914 | else 915 | return &inner[0]; 916 | } 917 | 918 | judy->level--; 919 | continue; 920 | 921 | case JUDY_span: 922 | judy->level--; 923 | continue; 924 | } 925 | } 926 | return NULL; 927 | } 928 | 929 | // judy_del: delete string from judy array 930 | // returning previous entry. 931 | 932 | judyslot *judy_del (Judy *judy) 933 | { 934 | int slot, off, size, type, high; 935 | judyslot *table, *inner; 936 | judyslot next, *node; 937 | int keysize, cnt; 938 | uchar *base; 939 | 940 | while( judy->level ) { 941 | next = judy->stack[judy->level].next; 942 | slot = judy->stack[judy->level].slot; 943 | off = judy->stack[judy->level].off; 944 | size = JudySize[next & 0x07]; 945 | 946 | switch( type = next & 0x07 ) { 947 | case JUDY_1: 948 | case JUDY_2: 949 | case JUDY_4: 950 | case JUDY_8: 951 | case JUDY_16: 952 | case JUDY_32: 953 | keysize = JUDY_key_size - (off & JUDY_key_mask); 954 | cnt = size / (sizeof(judyslot) + keysize); 955 | node = (judyslot *)((next & JUDY_mask) + size); 956 | base = (uchar *)(next & JUDY_mask); 957 | 958 | // move deleted slot to first slot 959 | 960 | while( slot ) { 961 | node[-slot-1] = node[-slot]; 962 | memcpy (base + slot * keysize, base + (slot - 1) * keysize, keysize); 963 | slot--; 964 | } 965 | 966 | // zero out first slot 967 | 968 | node[-1] = 0; 969 | memset (base, 0, keysize); 970 | 971 | if( node[-cnt] ) { // does node have any slots left? 972 | judy->stack[judy->level].slot++; 973 | return judy_prv (judy); 974 | } 975 | 976 | judy_free (judy, base, type); 977 | judy->level--; 978 | continue; 979 | 980 | case JUDY_radix: 981 | table = (judyslot *)(next & JUDY_mask); 982 | inner = (judyslot *)(table[slot >> 4] & JUDY_mask); 983 | inner[slot & 0x0F] = 0; 984 | high = slot & 0xF0; 985 | 986 | for( cnt = 16; cnt--; ) 987 | if( inner[cnt] ) 988 | return judy_prv (judy); 989 | 990 | judy_free (judy, inner, JUDY_radix); 991 | table[slot >> 4] = 0; 992 | 993 | for( cnt = 16; cnt--; ) 994 | if( table[cnt] ) 995 | return judy_prv (judy); 996 | 997 | judy_free (judy, table, JUDY_radix); 998 | judy->level--; 999 | continue; 1000 | 1001 | case JUDY_span: 1002 | base = (uchar *)(next & JUDY_mask); 1003 | judy_free (judy, base, type); 1004 | judy->level--; 1005 | continue; 1006 | } 1007 | } 1008 | 1009 | // tree is now empty 1010 | 1011 | *judy->root = 0; 1012 | return NULL; 1013 | } 1014 | 1015 | // return cell for first key greater than or equal to given key 1016 | 1017 | judyslot *judy_strt (Judy *judy, uchar *buff, uint max) 1018 | { 1019 | judyslot *cell; 1020 | 1021 | judy->level = 0; 1022 | 1023 | if( !max ) 1024 | return judy_first (judy, *judy->root, 0); 1025 | 1026 | if( (cell = judy_slot (judy, buff, max)) ) 1027 | return cell; 1028 | 1029 | return judy_nxt (judy); 1030 | } 1031 | 1032 | // split open span node 1033 | 1034 | void judy_splitspan (Judy *judy, judyslot *next, uchar *base) 1035 | { 1036 | judyslot *node = (judyslot *)(base + JudySize[JUDY_span]); 1037 | uint cnt = JUDY_span_bytes; 1038 | uchar *newbase; 1039 | uint off = 0; 1040 | #if BYTE_ORDER != BIG_ENDIAN 1041 | int i; 1042 | #endif 1043 | 1044 | do { 1045 | newbase = judy_alloc (judy, JUDY_1); 1046 | *next = (judyslot)newbase | JUDY_1; 1047 | 1048 | #if BYTE_ORDER != BIG_ENDIAN 1049 | i = JUDY_key_size; 1050 | while( i-- ) 1051 | *newbase++ = base[off + i]; 1052 | #else 1053 | memcpy (newbase, base + off, JUDY_key_size); 1054 | newbase += JUDY_key_size; 1055 | #endif 1056 | next = (judyslot *)newbase; 1057 | 1058 | off += JUDY_key_size; 1059 | cnt -= JUDY_key_size; 1060 | } while( cnt && base[off - 1] ); 1061 | 1062 | *next = node[-1]; 1063 | judy_free (judy, base, JUDY_span); 1064 | } 1065 | 1066 | // judy_cell: add string to judy array 1067 | 1068 | judyslot *judy_cell (Judy *judy, uchar *buff, uint max) 1069 | { 1070 | int size, idx, slot, cnt, tst; 1071 | judyslot *next = judy->root; 1072 | judyvalue test, value; 1073 | uint off = 0, start; 1074 | judyslot *table; 1075 | judyslot *node; 1076 | uint keysize; 1077 | uchar *base; 1078 | 1079 | judy->level = 0; 1080 | 1081 | while( *next ) { 1082 | if( judy->level < judy->max ) 1083 | judy->level++; 1084 | 1085 | judy->stack[judy->level].off = off; 1086 | judy->stack[judy->level].next = *next; 1087 | size = JudySize[*next & 0x07]; 1088 | 1089 | switch( *next & 0x07 ) { 1090 | case JUDY_1: 1091 | case JUDY_2: 1092 | case JUDY_4: 1093 | case JUDY_8: 1094 | case JUDY_16: 1095 | case JUDY_32: 1096 | keysize = JUDY_key_size - (off & JUDY_key_mask); 1097 | cnt = size / (sizeof(judyslot) + keysize); 1098 | base = (uchar *)(*next & JUDY_mask); 1099 | node = (judyslot *)((*next & JUDY_mask) + size); 1100 | start = off; 1101 | slot = cnt; 1102 | value = 0; 1103 | 1104 | do { 1105 | value <<= 8; 1106 | if( off < max ) 1107 | value |= buff[off]; 1108 | } while( ++off & JUDY_key_mask ); 1109 | 1110 | // find slot > key 1111 | 1112 | while( slot-- ) { 1113 | test = *(judyvalue *)(base + slot * keysize); 1114 | #if BYTE_ORDER == BIG_ENDIAN 1115 | test >>= 8 * (JUDY_key_size - keysize); 1116 | #else 1117 | test &= JudyMask[keysize]; 1118 | #endif 1119 | if( test <= value ) 1120 | break; 1121 | } 1122 | 1123 | judy->stack[judy->level].slot = slot - 1; 1124 | 1125 | if( test == value ) { // new key is equal to slot key 1126 | next = &node[-slot-1]; 1127 | 1128 | // is this a leaf? 1129 | 1130 | if( !(value & 0xFF) ) 1131 | return next; 1132 | 1133 | continue; 1134 | } 1135 | 1136 | // if this node is not full 1137 | // open up cell after slot 1138 | 1139 | if( !node[-1] ) { // if the entry before node is empty/zero 1140 | memmove(base, base + keysize, slot * keysize); // move keys less than new key down one slot 1141 | #if BYTE_ORDER != BIG_ENDIAN 1142 | memcpy(base + slot * keysize, &value, keysize); // copy new key into slot 1143 | #else 1144 | test = value; 1145 | idx = keysize; 1146 | 1147 | while( idx-- ) 1148 | base[slot * keysize + idx] = test, test >>= 8; 1149 | #endif 1150 | for( idx = 0; idx < slot; idx++ ) 1151 | node[-idx-1] = node[-idx-2];// copy tree ptrs/cells down one slot 1152 | 1153 | node[-slot-1] = 0; // set new tree ptr/cell 1154 | next = &node[-slot-1]; 1155 | 1156 | if( !(value & 0xFF) ) 1157 | return next; 1158 | 1159 | continue; 1160 | } 1161 | 1162 | if( size < JudySize[JUDY_max] ) { 1163 | next = judy_promote (judy, next, slot+1, value, keysize); 1164 | 1165 | if( !(value & 0xFF) ) 1166 | return next; 1167 | 1168 | continue; 1169 | } 1170 | 1171 | // split full maximal node into JUDY_radix nodes 1172 | // loop to reprocess new insert 1173 | 1174 | judy_splitnode (judy, next, size, keysize); 1175 | judy->level--; 1176 | off = start; 1177 | continue; 1178 | 1179 | case JUDY_radix: 1180 | table = (judyslot *)(*next & JUDY_mask); // outer radix 1181 | 1182 | if( off < max ) 1183 | slot = buff[off++]; 1184 | else 1185 | slot = 0; 1186 | 1187 | // allocate inner radix if empty 1188 | 1189 | if( !table[slot >> 4] ) 1190 | table[slot >> 4] = (judyslot)judy_alloc (judy, JUDY_radix) | JUDY_radix; 1191 | 1192 | table = (judyslot *)(table[slot >> 4] & JUDY_mask); 1193 | judy->stack[judy->level].slot = slot; 1194 | next = &table[slot & 0x0F]; 1195 | 1196 | if( !slot ) // leaf? 1197 | return next; 1198 | continue; 1199 | 1200 | case JUDY_span: 1201 | base = (uchar *)(*next & JUDY_mask); 1202 | node = (judyslot *)((*next & JUDY_mask) + JudySize[JUDY_span]); 1203 | cnt = JUDY_span_bytes; 1204 | tst = cnt; 1205 | 1206 | if( tst > (int)(max - off) ) 1207 | tst = max - off; 1208 | 1209 | value = strncmp((const char *)base, (const char *)(buff + off), tst); 1210 | 1211 | if( !value && tst < cnt && !base[tst] ) // leaf? 1212 | return &node[-1]; 1213 | 1214 | if( !value && tst == cnt ) { 1215 | next = &node[-1]; 1216 | off += cnt; 1217 | continue; 1218 | } 1219 | 1220 | // bust up JUDY_span node and produce JUDY_1 nodes 1221 | // then loop to reprocess insert 1222 | 1223 | judy_splitspan (judy, next, base); 1224 | judy->level--; 1225 | continue; 1226 | } 1227 | } 1228 | 1229 | // place JUDY_1 node under JUDY_radix node(s) 1230 | 1231 | if( off & JUDY_key_mask && off <= max ) { 1232 | base = judy_alloc (judy, JUDY_1); 1233 | keysize = JUDY_key_size - (off & JUDY_key_mask); 1234 | node = (judyslot *)(base + JudySize[JUDY_1]); 1235 | *next = (judyslot)base | JUDY_1; 1236 | 1237 | // fill in slot 0 with bytes of key 1238 | 1239 | #if BYTE_ORDER != BIG_ENDIAN 1240 | while( keysize ) 1241 | if( off + --keysize < max ) 1242 | *base++ = buff[off + keysize]; 1243 | else 1244 | base++; 1245 | #else 1246 | tst = keysize; 1247 | 1248 | if( tst > (int)(max - off) ) 1249 | tst = max - off; 1250 | 1251 | memcpy (base, buff + off, tst); 1252 | #endif 1253 | if( judy->level < judy->max ) 1254 | judy->level++; 1255 | 1256 | judy->stack[judy->level].next = *next; 1257 | judy->stack[judy->level].slot = 0; 1258 | judy->stack[judy->level].off = off; 1259 | next = &node[-1]; 1260 | off |= JUDY_key_mask; 1261 | off++; 1262 | } 1263 | 1264 | // produce span nodes to consume rest of key 1265 | 1266 | while( off <= max ) { 1267 | base = judy_alloc (judy, JUDY_span); 1268 | *next = (judyslot)base | JUDY_span; 1269 | node = (judyslot *)(base + JudySize[JUDY_span]); 1270 | cnt = tst = JUDY_span_bytes; 1271 | if( tst > (int)(max - off) ) 1272 | tst = max - off; 1273 | memcpy (base, buff + off, tst); 1274 | 1275 | if( judy->level < judy->max ) 1276 | judy->level++; 1277 | 1278 | judy->stack[judy->level].next = *next; 1279 | judy->stack[judy->level].slot = 0; 1280 | judy->stack[judy->level].off = off; 1281 | 1282 | next = &node[-1]; 1283 | off += tst; 1284 | if( !base[cnt-1] ) // done on leaf 1285 | break; 1286 | } 1287 | return next; 1288 | } 1289 | 1290 | #ifdef STANDALONE 1291 | int main (int argc, char **argv) 1292 | { 1293 | uchar buff[1024]; 1294 | judyslot max = 0; 1295 | judyslot *cell; 1296 | FILE *in, *out; 1297 | void *judy; 1298 | uint len; 1299 | uint idx; 1300 | 1301 | if( argc > 1 ) 1302 | in = fopen (argv[1], "r"); 1303 | else 1304 | in = stdin; 1305 | 1306 | if( argc > 2 ) 1307 | out = fopen (argv[2], "w"); 1308 | else 1309 | out = stdout; 1310 | 1311 | if( !in ) 1312 | fprintf (stderr, "unable to open input file\n"); 1313 | 1314 | if( !out ) 1315 | fprintf (stderr, "unable to open output file\n"); 1316 | 1317 | judy = judy_open (512); 1318 | 1319 | while( fgets((char *)buff, sizeof(buff), in) ) { 1320 | len = strlen((const char *)buff); 1321 | buff[--len] = '\0'; 1322 | if( len && buff[len - 1] == 0x0d ) // Detect and remove Windows CR 1323 | buff[--len] = '\0'; 1324 | *(judy_cell (judy, buff, len)) += 1; // count instances of string 1325 | max++; 1326 | } 1327 | 1328 | cell = judy_strt (judy, NULL, 0); 1329 | 1330 | if( cell ) do { 1331 | judy_key(judy, buff, sizeof(buff)); 1332 | for( idx = 0; idx < *cell; idx++ ) // spit out duplicates 1333 | fprintf(out, "%s\n", buff); 1334 | } while( (cell = judy_nxt (judy)) ); 1335 | 1336 | fprintf(stderr, "%" PRIuint " memory used\n", MaxMem); 1337 | 1338 | #if 1 1339 | // test deletion all the way to an empty tree 1340 | 1341 | if( (cell = judy_prv (judy)) ) { 1342 | do { 1343 | max -= *cell; 1344 | } while( (cell = judy_del (judy)) ); 1345 | } 1346 | 1347 | assert (max == 0); 1348 | #endif 1349 | judy_close(judy); 1350 | return 0; 1351 | } 1352 | #endif // of STANDALONE 1353 | -------------------------------------------------------------------------------- /judy-arrays.xcodeproj/project.pbxproj: -------------------------------------------------------------------------------- 1 | // !$*UTF8*$! 2 | { 3 | archiveVersion = 1; 4 | classes = { 5 | }; 6 | objectVersion = 45; 7 | objects = { 8 | 9 | /* Begin PBXBuildFile section */ 10 | 3D8072DD12E9161300DDD165 /* distance-test.c in Sources */ = {isa = PBXBuildFile; fileRef = 3D8072B312E914D700DDD165 /* distance-test.c */; }; 11 | 3DB3623612B379AA0036C0E1 /* judy-arrays.c in Sources */ = {isa = PBXBuildFile; fileRef = 3DB3623512B379AA0036C0E1 /* judy-arrays.c */; }; 12 | 3DE7835E12C5F52B0046031C /* pairs-test.c in Sources */ = {isa = PBXBuildFile; fileRef = 3DE7835D12C5F52B0046031C /* pairs-test.c */; }; 13 | 8DD76FB00486AB0100D96B5E /* judy-arrays.1 in CopyFiles */ = {isa = PBXBuildFile; fileRef = C6A0FF2C0290799A04C91782 /* judy-arrays.1 */; }; 14 | /* End PBXBuildFile section */ 15 | 16 | /* Begin PBXCopyFilesBuildPhase section */ 17 | 8DD76FAF0486AB0100D96B5E /* CopyFiles */ = { 18 | isa = PBXCopyFilesBuildPhase; 19 | buildActionMask = 8; 20 | dstPath = /usr/share/man/man1/; 21 | dstSubfolderSpec = 0; 22 | files = ( 23 | 8DD76FB00486AB0100D96B5E /* judy-arrays.1 in CopyFiles */, 24 | ); 25 | runOnlyForDeploymentPostprocessing = 1; 26 | }; 27 | /* End PBXCopyFilesBuildPhase section */ 28 | 29 | /* Begin PBXFileReference section */ 30 | 3D10CC0212ED56FB000DE9D4 /* judy-levenshtein.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "judy-levenshtein.c"; sourceTree = ""; }; 31 | 3D2F99B9130302FA006D7433 /* judy-utilities.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "judy-utilities.c"; sourceTree = ""; }; 32 | 3D8072B312E914D700DDD165 /* distance-test.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "distance-test.c"; sourceTree = ""; }; 33 | 3D8072D212E915DA00DDD165 /* distance-test */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "distance-test"; sourceTree = BUILT_PRODUCTS_DIR; }; 34 | 3DB3623512B379AA0036C0E1 /* judy-arrays.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "judy-arrays.c"; sourceTree = ""; }; 35 | 3DE7835412C5F4D90046031C /* pairs-test */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "pairs-test"; sourceTree = BUILT_PRODUCTS_DIR; }; 36 | 3DE7835D12C5F52B0046031C /* pairs-test.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = "pairs-test.c"; sourceTree = ""; }; 37 | 8DD76FB20486AB0100D96B5E /* judy-arrays */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = "judy-arrays"; sourceTree = BUILT_PRODUCTS_DIR; }; 38 | C6A0FF2C0290799A04C91782 /* judy-arrays.1 */ = {isa = PBXFileReference; lastKnownFileType = text.man; path = "judy-arrays.1"; sourceTree = ""; }; 39 | /* End PBXFileReference section */ 40 | 41 | /* Begin PBXFrameworksBuildPhase section */ 42 | 3D8072D012E915DA00DDD165 /* Frameworks */ = { 43 | isa = PBXFrameworksBuildPhase; 44 | buildActionMask = 2147483647; 45 | files = ( 46 | ); 47 | runOnlyForDeploymentPostprocessing = 0; 48 | }; 49 | 3DE7835212C5F4D90046031C /* Frameworks */ = { 50 | isa = PBXFrameworksBuildPhase; 51 | buildActionMask = 2147483647; 52 | files = ( 53 | ); 54 | runOnlyForDeploymentPostprocessing = 0; 55 | }; 56 | 8DD76FAD0486AB0100D96B5E /* Frameworks */ = { 57 | isa = PBXFrameworksBuildPhase; 58 | buildActionMask = 2147483647; 59 | files = ( 60 | ); 61 | runOnlyForDeploymentPostprocessing = 0; 62 | }; 63 | /* End PBXFrameworksBuildPhase section */ 64 | 65 | /* Begin PBXGroup section */ 66 | 08FB7794FE84155DC02AAC07 /* judy-arrays */ = { 67 | isa = PBXGroup; 68 | children = ( 69 | 08FB7795FE84155DC02AAC07 /* Source */, 70 | 3DE7835912C5F4EA0046031C /* Tests */, 71 | C6A0FF2B0290797F04C91782 /* Documentation */, 72 | 1AB674ADFE9D54B511CA2CBB /* Products */, 73 | ); 74 | name = "judy-arrays"; 75 | sourceTree = ""; 76 | }; 77 | 08FB7795FE84155DC02AAC07 /* Source */ = { 78 | isa = PBXGroup; 79 | children = ( 80 | 3DB3623512B379AA0036C0E1 /* judy-arrays.c */, 81 | 3D2F99B9130302FA006D7433 /* judy-utilities.c */, 82 | 3D10CC0212ED56FB000DE9D4 /* judy-levenshtein.c */, 83 | ); 84 | name = Source; 85 | sourceTree = ""; 86 | }; 87 | 1AB674ADFE9D54B511CA2CBB /* Products */ = { 88 | isa = PBXGroup; 89 | children = ( 90 | 8DD76FB20486AB0100D96B5E /* judy-arrays */, 91 | 3DE7835412C5F4D90046031C /* pairs-test */, 92 | 3D8072D212E915DA00DDD165 /* distance-test */, 93 | ); 94 | name = Products; 95 | sourceTree = ""; 96 | }; 97 | 3DE7835912C5F4EA0046031C /* Tests */ = { 98 | isa = PBXGroup; 99 | children = ( 100 | 3DE7835D12C5F52B0046031C /* pairs-test.c */, 101 | 3D8072B312E914D700DDD165 /* distance-test.c */, 102 | ); 103 | name = Tests; 104 | sourceTree = ""; 105 | }; 106 | C6A0FF2B0290797F04C91782 /* Documentation */ = { 107 | isa = PBXGroup; 108 | children = ( 109 | C6A0FF2C0290799A04C91782 /* judy-arrays.1 */, 110 | ); 111 | name = Documentation; 112 | sourceTree = ""; 113 | }; 114 | /* End PBXGroup section */ 115 | 116 | /* Begin PBXNativeTarget section */ 117 | 3D8072D112E915DA00DDD165 /* distance-test */ = { 118 | isa = PBXNativeTarget; 119 | buildConfigurationList = 3D8072E012E9163100DDD165 /* Build configuration list for PBXNativeTarget "distance-test" */; 120 | buildPhases = ( 121 | 3D8072CF12E915DA00DDD165 /* Sources */, 122 | 3D8072D012E915DA00DDD165 /* Frameworks */, 123 | ); 124 | buildRules = ( 125 | ); 126 | dependencies = ( 127 | ); 128 | name = "distance-test"; 129 | productName = "distance-test"; 130 | productReference = 3D8072D212E915DA00DDD165 /* distance-test */; 131 | productType = "com.apple.product-type.tool"; 132 | }; 133 | 3DE7835312C5F4D90046031C /* pairs-test */ = { 134 | isa = PBXNativeTarget; 135 | buildConfigurationList = 3DE7835A12C5F4EA0046031C /* Build configuration list for PBXNativeTarget "pairs-test" */; 136 | buildPhases = ( 137 | 3DE7835112C5F4D90046031C /* Sources */, 138 | 3DE7835212C5F4D90046031C /* Frameworks */, 139 | ); 140 | buildRules = ( 141 | ); 142 | dependencies = ( 143 | ); 144 | name = "pairs-test"; 145 | productName = "pairs-test"; 146 | productReference = 3DE7835412C5F4D90046031C /* pairs-test */; 147 | productType = "com.apple.product-type.tool"; 148 | }; 149 | 8DD76FA90486AB0100D96B5E /* judy-arrays */ = { 150 | isa = PBXNativeTarget; 151 | buildConfigurationList = 1DEB928508733DD80010E9CD /* Build configuration list for PBXNativeTarget "judy-arrays" */; 152 | buildPhases = ( 153 | 8DD76FAB0486AB0100D96B5E /* Sources */, 154 | 8DD76FAD0486AB0100D96B5E /* Frameworks */, 155 | 8DD76FAF0486AB0100D96B5E /* CopyFiles */, 156 | ); 157 | buildRules = ( 158 | ); 159 | dependencies = ( 160 | ); 161 | name = "judy-arrays"; 162 | productInstallPath = "$(HOME)/bin"; 163 | productName = "judy-arrays"; 164 | productReference = 8DD76FB20486AB0100D96B5E /* judy-arrays */; 165 | productType = "com.apple.product-type.tool"; 166 | }; 167 | /* End PBXNativeTarget section */ 168 | 169 | /* Begin PBXProject section */ 170 | 08FB7793FE84155DC02AAC07 /* Project object */ = { 171 | isa = PBXProject; 172 | buildConfigurationList = 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "judy-arrays" */; 173 | compatibilityVersion = "Xcode 3.1"; 174 | developmentRegion = English; 175 | hasScannedForEncodings = 1; 176 | knownRegions = ( 177 | English, 178 | Japanese, 179 | French, 180 | German, 181 | ); 182 | mainGroup = 08FB7794FE84155DC02AAC07 /* judy-arrays */; 183 | projectDirPath = ""; 184 | projectRoot = ""; 185 | targets = ( 186 | 8DD76FA90486AB0100D96B5E /* judy-arrays */, 187 | 3DE7835312C5F4D90046031C /* pairs-test */, 188 | 3D8072D112E915DA00DDD165 /* distance-test */, 189 | ); 190 | }; 191 | /* End PBXProject section */ 192 | 193 | /* Begin PBXSourcesBuildPhase section */ 194 | 3D8072CF12E915DA00DDD165 /* Sources */ = { 195 | isa = PBXSourcesBuildPhase; 196 | buildActionMask = 2147483647; 197 | files = ( 198 | 3D8072DD12E9161300DDD165 /* distance-test.c in Sources */, 199 | ); 200 | runOnlyForDeploymentPostprocessing = 0; 201 | }; 202 | 3DE7835112C5F4D90046031C /* Sources */ = { 203 | isa = PBXSourcesBuildPhase; 204 | buildActionMask = 2147483647; 205 | files = ( 206 | 3DE7835E12C5F52B0046031C /* pairs-test.c in Sources */, 207 | ); 208 | runOnlyForDeploymentPostprocessing = 0; 209 | }; 210 | 8DD76FAB0486AB0100D96B5E /* Sources */ = { 211 | isa = PBXSourcesBuildPhase; 212 | buildActionMask = 2147483647; 213 | files = ( 214 | 3DB3623612B379AA0036C0E1 /* judy-arrays.c in Sources */, 215 | ); 216 | runOnlyForDeploymentPostprocessing = 0; 217 | }; 218 | /* End PBXSourcesBuildPhase section */ 219 | 220 | /* Begin XCBuildConfiguration section */ 221 | 1DEB928608733DD80010E9CD /* Debug */ = { 222 | isa = XCBuildConfiguration; 223 | buildSettings = { 224 | ALWAYS_SEARCH_USER_PATHS = NO; 225 | COPY_PHASE_STRIP = NO; 226 | GCC_DYNAMIC_NO_PIC = NO; 227 | GCC_ENABLE_FIX_AND_CONTINUE = YES; 228 | GCC_MODEL_TUNING = G5; 229 | GCC_OPTIMIZATION_LEVEL = 0; 230 | GCC_PREPROCESSOR_DEFINITIONS = STANDALONE; 231 | INSTALL_PATH = /usr/local/bin; 232 | PRODUCT_NAME = "judy-arrays"; 233 | }; 234 | name = Debug; 235 | }; 236 | 1DEB928708733DD80010E9CD /* Release */ = { 237 | isa = XCBuildConfiguration; 238 | buildSettings = { 239 | ALWAYS_SEARCH_USER_PATHS = NO; 240 | DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; 241 | GCC_MODEL_TUNING = G5; 242 | GCC_PREPROCESSOR_DEFINITIONS = STANDALONE; 243 | INSTALL_PATH = /usr/local/bin; 244 | PRODUCT_NAME = "judy-arrays"; 245 | }; 246 | name = Release; 247 | }; 248 | 1DEB928A08733DD80010E9CD /* Debug */ = { 249 | isa = XCBuildConfiguration; 250 | buildSettings = { 251 | ARCHS = "$(ARCHS_STANDARD_32_64_BIT)"; 252 | GCC_C_LANGUAGE_STANDARD = c99; 253 | GCC_OPTIMIZATION_LEVEL = 0; 254 | GCC_VERSION = com.apple.compilers.llvm.clang.1_0; 255 | GCC_WARN_ABOUT_RETURN_TYPE = YES; 256 | GCC_WARN_UNUSED_VARIABLE = YES; 257 | ONLY_ACTIVE_ARCH = YES; 258 | PREBINDING = NO; 259 | SDKROOT = macosx10.5; 260 | }; 261 | name = Debug; 262 | }; 263 | 1DEB928B08733DD80010E9CD /* Release */ = { 264 | isa = XCBuildConfiguration; 265 | buildSettings = { 266 | ARCHS = "$(ARCHS_STANDARD_32_64_BIT)"; 267 | GCC_C_LANGUAGE_STANDARD = c99; 268 | GCC_VERSION = com.apple.compilers.llvm.clang.1_0; 269 | GCC_WARN_ABOUT_RETURN_TYPE = YES; 270 | GCC_WARN_UNUSED_VARIABLE = YES; 271 | PREBINDING = NO; 272 | SDKROOT = macosx10.5; 273 | }; 274 | name = Release; 275 | }; 276 | 3D8072D412E915DC00DDD165 /* Debug */ = { 277 | isa = XCBuildConfiguration; 278 | buildSettings = { 279 | ALWAYS_SEARCH_USER_PATHS = NO; 280 | COPY_PHASE_STRIP = NO; 281 | GCC_DYNAMIC_NO_PIC = NO; 282 | GCC_ENABLE_FIX_AND_CONTINUE = YES; 283 | GCC_MODEL_TUNING = G5; 284 | GCC_OPTIMIZATION_LEVEL = 0; 285 | INSTALL_PATH = /usr/local/bin; 286 | PREBINDING = NO; 287 | PRODUCT_NAME = "distance-test"; 288 | }; 289 | name = Debug; 290 | }; 291 | 3D8072D512E915DC00DDD165 /* Release */ = { 292 | isa = XCBuildConfiguration; 293 | buildSettings = { 294 | ALWAYS_SEARCH_USER_PATHS = NO; 295 | COPY_PHASE_STRIP = YES; 296 | DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; 297 | GCC_ENABLE_FIX_AND_CONTINUE = NO; 298 | GCC_MODEL_TUNING = G5; 299 | INSTALL_PATH = /usr/local/bin; 300 | PREBINDING = NO; 301 | PRODUCT_NAME = "distance-test"; 302 | ZERO_LINK = NO; 303 | }; 304 | name = Release; 305 | }; 306 | 3DE7835612C5F4DB0046031C /* Debug */ = { 307 | isa = XCBuildConfiguration; 308 | buildSettings = { 309 | ALWAYS_SEARCH_USER_PATHS = NO; 310 | COPY_PHASE_STRIP = NO; 311 | GCC_DYNAMIC_NO_PIC = NO; 312 | GCC_ENABLE_FIX_AND_CONTINUE = YES; 313 | GCC_MODEL_TUNING = G5; 314 | GCC_OPTIMIZATION_LEVEL = 0; 315 | INSTALL_PATH = /usr/local/bin; 316 | PREBINDING = NO; 317 | PRODUCT_NAME = "pairs-test"; 318 | }; 319 | name = Debug; 320 | }; 321 | 3DE7835712C5F4DB0046031C /* Release */ = { 322 | isa = XCBuildConfiguration; 323 | buildSettings = { 324 | ALWAYS_SEARCH_USER_PATHS = NO; 325 | COPY_PHASE_STRIP = YES; 326 | DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; 327 | GCC_ENABLE_FIX_AND_CONTINUE = NO; 328 | GCC_MODEL_TUNING = G5; 329 | INSTALL_PATH = /usr/local/bin; 330 | PREBINDING = NO; 331 | PRODUCT_NAME = "pairs-test"; 332 | ZERO_LINK = NO; 333 | }; 334 | name = Release; 335 | }; 336 | /* End XCBuildConfiguration section */ 337 | 338 | /* Begin XCConfigurationList section */ 339 | 1DEB928508733DD80010E9CD /* Build configuration list for PBXNativeTarget "judy-arrays" */ = { 340 | isa = XCConfigurationList; 341 | buildConfigurations = ( 342 | 1DEB928608733DD80010E9CD /* Debug */, 343 | 1DEB928708733DD80010E9CD /* Release */, 344 | ); 345 | defaultConfigurationIsVisible = 0; 346 | defaultConfigurationName = Release; 347 | }; 348 | 1DEB928908733DD80010E9CD /* Build configuration list for PBXProject "judy-arrays" */ = { 349 | isa = XCConfigurationList; 350 | buildConfigurations = ( 351 | 1DEB928A08733DD80010E9CD /* Debug */, 352 | 1DEB928B08733DD80010E9CD /* Release */, 353 | ); 354 | defaultConfigurationIsVisible = 0; 355 | defaultConfigurationName = Release; 356 | }; 357 | 3D8072E012E9163100DDD165 /* Build configuration list for PBXNativeTarget "distance-test" */ = { 358 | isa = XCConfigurationList; 359 | buildConfigurations = ( 360 | 3D8072D412E915DC00DDD165 /* Debug */, 361 | 3D8072D512E915DC00DDD165 /* Release */, 362 | ); 363 | defaultConfigurationIsVisible = 0; 364 | defaultConfigurationName = Release; 365 | }; 366 | 3DE7835A12C5F4EA0046031C /* Build configuration list for PBXNativeTarget "pairs-test" */ = { 367 | isa = XCConfigurationList; 368 | buildConfigurations = ( 369 | 3DE7835612C5F4DB0046031C /* Debug */, 370 | 3DE7835712C5F4DB0046031C /* Release */, 371 | ); 372 | defaultConfigurationIsVisible = 0; 373 | defaultConfigurationName = Release; 374 | }; 375 | /* End XCConfigurationList section */ 376 | }; 377 | rootObject = 08FB7793FE84155DC02AAC07 /* Project object */; 378 | } 379 | -------------------------------------------------------------------------------- /judy-levenshtein.c: -------------------------------------------------------------------------------- 1 | /* 2 | * judy-levenshtein.c 3 | * judy-arrays 4 | * 5 | * Created by Jan on 24.01.11. 6 | * Copyright 2011 geheimwerk.de. All rights reserved. 7 | * 8 | */ 9 | 10 | #include 11 | 12 | #include "judy-utilities.c" 13 | 14 | #define DEBUG_KEY_BUFFER 1 15 | 16 | #ifndef MIN 17 | #if defined(__GNUC__) && !defined(__STRICT_ANSI__) 18 | #define MIN(a,b) ({__typeof__(a) _a = (a); __typeof__(b) _b = (b); (_a < _b) ? _a : _b; }) 19 | #else 20 | #define MIN(A,B) ((A) < (B) ? (A) : (B)) 21 | #endif 22 | //#else 23 | // #warning MIN is already defined, MIN(a, b) may not behave as expected. 24 | #endif 25 | 26 | typedef signed long ldint; 27 | #define PRIldint "ld" 28 | 29 | typedef struct _search_data_struct { 30 | void *judy; 31 | void (*resultCallback)(FILE *out, const char *word, ldint distance); 32 | #if DEBUG_KEY_BUFFER 33 | char *key_buffer_char; 34 | #endif 35 | uchar *key_buffer; 36 | int key_buffer_size; 37 | const char *word; 38 | int columns; 39 | void *results; 40 | ldint maxCost; 41 | } search_data_struct; 42 | 43 | // Return the minimum of a, b and c 44 | ldint jxld_smallestLDInt(ldint a, ldint b, ldint c) { 45 | ldint min = a; 46 | if ( b < min ) 47 | min = b; 48 | 49 | if ( c < min ) 50 | min = c; 51 | 52 | return min; 53 | } 54 | 55 | void processResult(FILE *out, const char *word, ldint distance) { 56 | fprintf(out, "('%s', %" PRIldint ")\n", word, distance); 57 | } 58 | 59 | // This recursive helper is used by the search function below. 60 | // It assumes that the previousRow has been filled in already. 61 | void searchRecursive(judyslot *cell, search_data_struct *d, int key_index, char prevLetter, char thisLetter, ldint *penultimateRow, ldint *previousRow) { 62 | 63 | const char *word = d->word; 64 | int columns = d->columns; 65 | 66 | int currentRowLastIndex = columns - 1; 67 | #if __STDC_VERSION__ >= 199901L 68 | ldint currentRow[columns]; 69 | #else 70 | ldint *currentRow = calloc(columns, sizeof(ldint)); 71 | #endif 72 | 73 | currentRow[0] = previousRow[0] + 1; 74 | 75 | ldint cost; 76 | ldint insertCost; 77 | ldint deleteCost; 78 | ldint replaceCost; 79 | 80 | int column; 81 | 82 | // Build one row for the letter, with a column for each letter in the target 83 | // word, plus one for the empty string at column 0 84 | for (column = 1; column < columns; column++) { 85 | 86 | insertCost = currentRow[column - 1] + 1; 87 | deleteCost = previousRow[column] + 1; 88 | 89 | if (word[column - 1] != thisLetter) { 90 | cost = 1; 91 | } 92 | else { 93 | cost = 0; 94 | } 95 | replaceCost = previousRow[column - 1] + cost; 96 | 97 | currentRow[column] = jxld_smallestLDInt(insertCost, deleteCost, replaceCost); 98 | 99 | #ifndef DISABLE_DAMERAU_TRANSPOSITION 100 | // This conditional adds Damerau transposition to the Levenshtein distance 101 | if (column > 1 && penultimateRow != NULL 102 | && word[column - 1] == prevLetter 103 | && word[column - 2] == thisLetter ) 104 | { 105 | currentRow[column] = MIN(currentRow[column], 106 | penultimateRow[column - 2] + cost ); 107 | } 108 | #endif 109 | } 110 | 111 | cell = judy_slot(d->judy, d->key_buffer, key_index); 112 | 113 | // If the last entry in the row indicates the optimal cost is less than the 114 | // maximum cost, and there is a word in this trie cell, then add it. 115 | if (currentRow[currentRowLastIndex] <= d->maxCost && cell != NULL && *cell > 0) { 116 | judy_key(d->judy, d->key_buffer, d->key_buffer_size); 117 | d->resultCallback((FILE *)d->results, (const char *)d->key_buffer, currentRow[currentRowLastIndex]); 118 | } 119 | 120 | ldint currentRowMinCost = currentRow[0]; 121 | for (column = 1; column < columns; column++) { 122 | currentRowMinCost = MIN(currentRowMinCost, currentRow[column]); 123 | } 124 | 125 | // If any entries in the row are less than the maximum cost, then 126 | // recursively search each branch of the trie 127 | if (currentRowMinCost <= d->maxCost) { 128 | char key_chars[256]; 129 | int key_char_count = judy_key_chars_below_key(d->judy, d->key_buffer, key_index, d->key_buffer_size, (uchar *)key_chars); 130 | 131 | char nextLetter; 132 | for (int key_char_index = 0; key_char_index < key_char_count; key_char_index++) { 133 | nextLetter = key_chars[key_char_index]; 134 | d->key_buffer[key_index] = nextLetter; 135 | searchRecursive(NULL, d, key_index+1, thisLetter, nextLetter, previousRow, currentRow); 136 | } 137 | } 138 | 139 | #if __STDC_VERSION__ >= 199901L 140 | #else 141 | free(currentRow); 142 | #endif 143 | 144 | } 145 | 146 | void search(void *judy, const char *word, ldint maxCost, void *results, void (*resultCallback)(FILE *out, const char *word, ldint distance)) { 147 | int word_length = strlen(word); 148 | 149 | // Build first row 150 | int currentRowSize = word_length + 1; 151 | 152 | ldint *currentRow = calloc(currentRowSize, sizeof(ldint)); 153 | 154 | for (int k = 0; k < currentRowSize; k++) { 155 | currentRow[k] = k; 156 | } 157 | 158 | // Prepare key_buffer 159 | int key_buffer_size = word_length + maxCost + 2; 160 | uchar *key_buffer = calloc(key_buffer_size, sizeof(uchar)); 161 | 162 | // Prepare unchanging data struct 163 | search_data_struct d; 164 | d.judy = judy; 165 | d.resultCallback = resultCallback; 166 | #if DEBUG_KEY_BUFFER 167 | d.key_buffer_char = (char *)key_buffer; 168 | #endif 169 | d.key_buffer = key_buffer; 170 | d.key_buffer_size = key_buffer_size; 171 | d.word = word; 172 | d.columns = word_length+1; 173 | d.results = results; 174 | d.maxCost = maxCost; 175 | 176 | int key_index = 0; 177 | 178 | char key_chars[256]; 179 | int key_char_count = judy_key_chars_below_key(judy, (uchar *)key_buffer, key_index, key_buffer_size, (uchar *)key_chars); 180 | 181 | char letter; 182 | for (int key_char_index = 0; key_char_index < key_char_count; key_char_index++) { 183 | letter = key_chars[key_char_index]; 184 | key_buffer[key_index] = letter; 185 | searchRecursive(NULL, &d, key_index+1, 0, letter, NULL, currentRow); 186 | } 187 | 188 | free(currentRow); 189 | free(key_buffer); 190 | } 191 | -------------------------------------------------------------------------------- /judy-utilities.c: -------------------------------------------------------------------------------- 1 | /* 2 | * judy-utilities.c 3 | * judy-arrays 4 | * 5 | * Created by Jan on 09.02.11. 6 | * Copyright 2011 geheimwerk.de. All rights reserved. 7 | * 8 | */ 9 | 10 | #include "judy-arrays.c" 11 | 12 | #if defined(__LP64__) || \ 13 | defined(__x86_64__) || \ 14 | defined(__amd64__) || \ 15 | defined(_WIN64) || \ 16 | defined(__sparc64__) || \ 17 | defined(__arch64__) || \ 18 | defined(__powerpc64__) || \ 19 | defined (__s390x__) 20 | 21 | #define BOTTOM_UP_SIZE 9 22 | #define BOTTOM_UP_LAST 8 23 | #define BOTTOM_UP_ALL_ZEROS 0x00 24 | 25 | #define BOTTOM_UP_MAX_JUDY_STACK_LEVELS 6 26 | 27 | #if defined (__APPLE__) 28 | #include 29 | #define judyvalue_reverse_bytes(A) OSSwapHostToBigInt64(A) 30 | #elif (BYTE_ORDER != BIG_ENDIAN) 31 | #warning "Big endian 64-bit implementation untested." 32 | inline judyvalue judyvalue_reverse_bytes(judyvalue val) { 33 | return ((val<<56) & 0xFF00000000000000) | 34 | ((val<<40) & 0x00FF000000000000) | 35 | ((val<<24) & 0x0000FF0000000000) | 36 | ((val<< 8) & 0x000000FF00000000) | 37 | ((val>> 8) & 0x00000000FF000000) | 38 | ((val>>24) & 0x0000000000FF0000) | 39 | ((val>>40) & 0x000000000000FF00) | 40 | ((val>>56) & 0x00000000000000FF)) 41 | } 42 | #endif 43 | 44 | #else 45 | #define BOTTOM_UP_SIZE 5 46 | #define BOTTOM_UP_LAST 4 47 | #define BOTTOM_UP_ALL_ZEROS 0xF0 48 | 49 | #define BOTTOM_UP_MAX_JUDY_STACK_LEVELS 3 50 | 51 | #if defined (__APPLE__) 52 | #include 53 | #define judyvalue_reverse_bytes(A) OSSwapHostToBigInt32(A) 54 | #elif (BYTE_ORDER != BIG_ENDIAN) 55 | inline judyvalue judyvalue_reverse_bytes(judyvalue val) { 56 | return ((val<<24) & 0xFF000000) | 57 | ((val<< 8) & 0x00FF0000) | 58 | ((val>> 8) & 0x0000FF00) | 59 | ((val>>24) & 0x000000FF); 60 | } 61 | #endif 62 | 63 | #endif 64 | 65 | #if (BYTE_ORDER == BIG_ENDIAN) 66 | #define judyvalue_bottom_up_bytes(A) A 67 | #else 68 | #define judyvalue_bottom_up_bytes(A) judyvalue_reverse_bytes(A) 69 | #endif 70 | 71 | 72 | void judyvalue_native_to_bottom_up(judyvalue index, uchar *buff) { 73 | judyvalue *judyvalue_in_buff = (judyvalue *)buff; 74 | *judyvalue_in_buff = judyvalue_bottom_up_bytes(index); 75 | 76 | uchar *zero_toggles = &(buff[BOTTOM_UP_LAST]); 77 | *zero_toggles = 0xFF; 78 | 79 | int j = sizeof(judyvalue); 80 | for (int i = 0; i < sizeof(judyvalue); i++) { 81 | j--; 82 | if (buff[i] == 0x00) { 83 | *zero_toggles ^= (0x01 << j); 84 | buff[i] = 0x01; 85 | } 86 | } 87 | } 88 | 89 | judyvalue judyvalue_bottom_up_to_native(uchar *buff) { 90 | judyvalue index; 91 | 92 | uchar *zero_toggles = &(buff[BOTTOM_UP_LAST]); 93 | 94 | if (*zero_toggles == BOTTOM_UP_ALL_ZEROS) { 95 | return 0; 96 | } 97 | else if (*zero_toggles != 0xFF) { 98 | int j = sizeof(judyvalue); 99 | for (int i = 0; i < sizeof(judyvalue); i++) { 100 | j--; 101 | if ((*zero_toggles & (0x01 << j)) == 0x00) { 102 | buff[i] = 0x00; 103 | } 104 | } 105 | } 106 | 107 | index = judyvalue_bottom_up_bytes(*((judyvalue *)buff)); 108 | return index; 109 | } 110 | 111 | 112 | /* 113 | buff_size has to be >= buff_used_size + 2 114 | out_array should be a pointer to a uchar array of size 256. 115 | */ 116 | 117 | uint judy_key_chars_below_key(Judy *judy, uchar *buff, uint buff_used_size, uint buff_size, uchar *out_array) { 118 | uint count = 0; 119 | int key_index = 0; 120 | char this_letter; 121 | char next_letter; 122 | judyslot *cell; 123 | 124 | const uchar *orig_buff = buff; 125 | 126 | size_t temp_buff_size = (buff_used_size == 0) ? 1+2 : buff_used_size+2; 127 | uchar temp_buff_array[temp_buff_size]; // Allocate on stack 128 | uchar *temp_buff = temp_buff_array; 129 | memcpy(temp_buff, orig_buff, buff_used_size); 130 | 131 | if (buff_used_size == 0) { 132 | cell = judy_strt(judy, NULL, 0); 133 | judy_key(judy, temp_buff, temp_buff_size); 134 | if (temp_buff[0] == 0) { 135 | cell = judy_nxt(judy); 136 | } 137 | 138 | if (!cell) { 139 | return 0; 140 | } 141 | 142 | // Recursively search each branch of the trie 143 | do { 144 | judy_key(judy, temp_buff, temp_buff_size); 145 | next_letter = temp_buff[key_index]; 146 | 147 | if (next_letter != '\0') { 148 | out_array[count] = next_letter; 149 | count++; 150 | } 151 | 152 | next_letter += 1; 153 | temp_buff[key_index] = next_letter; 154 | temp_buff[key_index+1] = '\0'; 155 | 156 | cell = judy_strt(judy, temp_buff, key_index+1); 157 | 158 | } while (cell); 159 | } 160 | else { 161 | key_index = buff_used_size-1; 162 | this_letter = temp_buff[key_index]; 163 | int next_key_index = key_index + 1; 164 | 165 | cell = judy_strt(judy, temp_buff, key_index+1); 166 | 167 | //assert(next_key_index < temp_buff_size); 168 | 169 | do { 170 | judy_key(judy, temp_buff, temp_buff_size); 171 | 172 | if (temp_buff[key_index] == this_letter 173 | && memcmp(orig_buff, temp_buff, (size_t)key_index+1) == 0) { 174 | next_letter = temp_buff[next_key_index]; 175 | 176 | if (next_letter != '\0') { 177 | out_array[count] = next_letter; 178 | count++; 179 | } 180 | 181 | next_letter += 1; 182 | memcpy(temp_buff, orig_buff, next_key_index); 183 | temp_buff[next_key_index] = next_letter; 184 | temp_buff[next_key_index+1] = '\0'; 185 | 186 | cell = judy_strt(judy, temp_buff, next_key_index+1); 187 | } 188 | else { 189 | break; 190 | } 191 | 192 | } while (cell); 193 | } 194 | 195 | return count; 196 | } 197 | -------------------------------------------------------------------------------- /pairs-test.c: -------------------------------------------------------------------------------- 1 | /* 2 | * pairs-test.c 3 | * judy-arrays 4 | * 5 | * Created by Jan on 18.12.10. 6 | * Copyright 2010 geheimwerk.de. All rights reserved. 7 | * 8 | */ 9 | 10 | #include 11 | #include "judy-utilities.c" 12 | 13 | int main(int argc, char **argv) { 14 | uchar buff[1024]; 15 | uchar key[BOTTOM_UP_SIZE+1] = {0}; 16 | FILE *in, *out; 17 | 18 | judyvalue index; // array index 19 | judyvalue value; // array element value 20 | judyslot *cell; // pointer to array element value 21 | 22 | void *judy; // pointer to Judy array 23 | 24 | if( argc > 1 ) 25 | in = fopen(argv[1], "r"); 26 | else 27 | in = stdin; 28 | 29 | if( argc > 2 ) 30 | out = fopen(argv[2], "w"); 31 | else 32 | out = stdout; 33 | 34 | if( !in ) 35 | fprintf(stderr, "unable to open input file\n"); 36 | 37 | if( !out ) 38 | fprintf(stderr, "unable to open output file\n"); 39 | 40 | 41 | #if 0 42 | judyvalue test = 0; 43 | 44 | do { 45 | index = test; 46 | judyvalue_native_to_bottom_up(index, key); 47 | index = judyvalue_bottom_up_to_native(key); 48 | if (index != test) { 49 | printf("Encoding error: %"PRIjudyvalue "\n", test); 50 | } 51 | test++; 52 | } while (test != 0); 53 | #endif 54 | 55 | judy = judy_open(BOTTOM_UP_MAX_JUDY_STACK_LEVELS); 56 | 57 | while( fgets((char *)buff, sizeof(buff), in) ) { 58 | if (sscanf((char *)buff, "%"PRIjudyvalue " %"PRIjudyvalue, &index, &value)) { 59 | judyvalue_native_to_bottom_up(index, key); 60 | #define ENABLE_READ_LOGGING 0 61 | #if ENABLE_READ_LOGGING 62 | for (int i = 0; i < BOTTOM_UP_SIZE; i++) { 63 | printf("%02x", key[i]); 64 | } 65 | printf(" "); 66 | printf("%"PRIjudyvalue " %"PRIjudyvalue "\n", index, value); 67 | #endif 68 | cell = judy_cell(judy, key, BOTTOM_UP_SIZE); 69 | if (value) { 70 | *cell = value; // store new value 71 | } else { 72 | *cell = -1; 73 | } 74 | } 75 | } 76 | 77 | // Next, visit all the stored indexes in sorted order, first ascending, 78 | // then descending, and delete each index during the descending pass. 79 | 80 | index = 0; 81 | cell = judy_strt(judy, NULL, 0); 82 | while (cell != NULL) 83 | { 84 | judy_key(judy, key, sizeof(key)); 85 | index = judyvalue_bottom_up_to_native(key); 86 | 87 | value = *cell; 88 | if (value == -1) value = 0; 89 | printf("%"PRIjudyvalue " %"PRIjudyvalue "\n", index, value); 90 | 91 | cell = judy_nxt(judy); 92 | #define SYMMETRY_TEST 0 93 | #if SYMMETRY_TEST 94 | cell = judy_prv(judy); // This will work if judy_prv() and judy_nxt() are symmetric. 95 | cell = judy_nxt(judy); 96 | #endif 97 | } 98 | 99 | printf("\n"); 100 | 101 | cell = judy_end(judy); 102 | while (cell != NULL) 103 | { 104 | judy_key(judy, key, sizeof(key)); 105 | index = judyvalue_bottom_up_to_native(key); 106 | 107 | value = *cell; 108 | if (value == -1) value = 0; 109 | printf("%"PRIjudyvalue " %"PRIjudyvalue "\n", index, value); 110 | 111 | cell = judy_del(judy); 112 | //cell = judy_prv(judy); 113 | } 114 | 115 | return 0; 116 | } --------------------------------------------------------------------------------