├── CHANGES ├── COMPARE ├── biblio ├── dba.c ├── dbd.c ├── dbe.1 ├── dbe.c ├── dbm.c ├── dbm.h ├── dbu.c ├── grind ├── hash.c ├── makefile ├── pair.c ├── pair.h ├── readme.ms ├── readme.txt ├── sdbm.3 ├── sdbm.bun ├── sdbm.c ├── sdbm.h ├── tune.h └── util.c /CHANGES: -------------------------------------------------------------------------------- 1 | June 1997: 2 | 3 | o fixed a long-hidden memmove bug in delpair that causes database 4 | corruption in MEMMOVE versions of sdbm. [sdbm defaults to duff's 5 | device to move data, so memmove version is almost never used.] 6 | 7 | Changes from the earlier BETA releases. 8 | 9 | o dbm_prep does everything now, so dbm_open is just a simple 10 | wrapper that builds the default filenames. dbm_prep no longer 11 | requires a (DBM *) db parameter: it allocates one itself. It 12 | returns (DBM *) db or (DBM *) NULL. 13 | 14 | o makroom is now reliable. In the common-case optimization of the page 15 | split, the page into which the incoming key/value pair is to be inserted 16 | is write-deferred (if the split is successful), thereby saving a cosly 17 | write. BUT, if the split does not make enough room (unsuccessful), the 18 | deferred page is written out, as the failure-window is now dependent on 19 | the number of split attempts. 20 | 21 | o if -DDUFF is defined, hash function will also use the DUFF construct. 22 | This may look like a micro-performance tweak (maybe it is), but in fact, 23 | the hash function is the third most-heavily used function, after read 24 | and write. 25 | -------------------------------------------------------------------------------- /COMPARE: -------------------------------------------------------------------------------- 1 | 2 | Script started on Thu Sep 28 15:41:06 1989 3 | % uname -a 4 | titan titan 4_0 UMIPS mips 5 | % make all x-dbm 6 | cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c dbm.c 7 | cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c sdbm.c 8 | cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c pair.c 9 | cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c hash.c 10 | ar cr libsdbm.a sdbm.o pair.o hash.o 11 | ranlib libsdbm.a 12 | cc -o dbm dbm.o libsdbm.a 13 | cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c dba.c 14 | cc -o dba dba.o 15 | cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c dbd.c 16 | cc -o dbd dbd.o 17 | cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -o x-dbm dbm.o 18 | % 19 | % 20 | % wc history 21 | 65110 218344 3204883 history 22 | % 23 | % /bin/time dbm build foo 6 | #include 7 | #include "sdbm.h" 8 | 9 | char *progname; 10 | extern void oops(); 11 | 12 | int 13 | main(argc, argv) 14 | char **argv; 15 | { 16 | int n; 17 | char *p; 18 | char *name; 19 | int pagf; 20 | 21 | progname = argv[0]; 22 | 23 | if (p = argv[1]) { 24 | name = (char *) malloc((n = strlen(p)) + 5); 25 | strcpy(name, p); 26 | strcpy(name + n, ".pag"); 27 | 28 | if ((pagf = open(name, O_RDONLY)) < 0) 29 | oops("cannot open %s.", name); 30 | 31 | sdump(pagf); 32 | } 33 | else 34 | oops("usage: %s dbname", progname); 35 | 36 | return 0; 37 | } 38 | 39 | sdump(pagf) 40 | int pagf; 41 | { 42 | register b; 43 | register n = 0; 44 | register t = 0; 45 | register o = 0; 46 | register e; 47 | char pag[PBLKSIZ]; 48 | 49 | while ((b = read(pagf, pag, PBLKSIZ)) > 0) { 50 | printf("#%d: ", n); 51 | if (!okpage(pag)) 52 | printf("bad\n"); 53 | else { 54 | printf("ok. "); 55 | if (!(e = pagestat(pag))) 56 | o++; 57 | else 58 | t += e; 59 | } 60 | n++; 61 | } 62 | 63 | if (b == 0) 64 | printf("%d pages (%d holes): %d entries\n", n, o, t); 65 | else 66 | oops("read failed: block %d", n); 67 | } 68 | 69 | pagestat(pag) 70 | char *pag; 71 | { 72 | register n; 73 | register free; 74 | register short *ino = (short *) pag; 75 | 76 | if (!(n = ino[0])) 77 | printf("no entries.\n"); 78 | else { 79 | free = ino[n] - (n + 1) * sizeof(short); 80 | printf("%3d entries %2d%% used free %d.\n", 81 | n / 2, ((PBLKSIZ - free) * 100) / PBLKSIZ, free); 82 | } 83 | return n / 2; 84 | } 85 | -------------------------------------------------------------------------------- /dbd.c: -------------------------------------------------------------------------------- 1 | /* 2 | * dbd - dump a dbm data file 3 | */ 4 | 5 | #include 6 | #include 7 | #include "sdbm.h" 8 | 9 | char *progname; 10 | extern void oops(); 11 | 12 | 13 | #define empty(page) (((short *) page)[0] == 0) 14 | 15 | int 16 | main(argc, argv) 17 | char **argv; 18 | { 19 | int n; 20 | char *p; 21 | char *name; 22 | int pagf; 23 | 24 | progname = argv[0]; 25 | 26 | if (p = argv[1]) { 27 | name = (char *) malloc((n = strlen(p)) + 5); 28 | strcpy(name, p); 29 | strcpy(name + n, ".pag"); 30 | 31 | if ((pagf = open(name, O_RDONLY)) < 0) 32 | oops("cannot open %s.", name); 33 | 34 | sdump(pagf); 35 | } 36 | else 37 | oops("usage: %s dbname", progname); 38 | return 0; 39 | } 40 | 41 | sdump(pagf) 42 | int pagf; 43 | { 44 | register r; 45 | register n = 0; 46 | register o = 0; 47 | char pag[PBLKSIZ]; 48 | 49 | while ((r = read(pagf, pag, PBLKSIZ)) > 0) { 50 | if (!okpage(pag)) 51 | fprintf(stderr, "%d: bad page.\n", n); 52 | else if (empty(pag)) 53 | o++; 54 | else 55 | dispage(pag); 56 | n++; 57 | } 58 | 59 | if (r == 0) 60 | fprintf(stderr, "%d pages (%d holes).\n", n, o); 61 | else 62 | oops("read failed: block %d", n); 63 | } 64 | 65 | 66 | #ifdef OLD 67 | dispage(pag) 68 | char *pag; 69 | { 70 | register i, n; 71 | register off; 72 | register short *ino = (short *) pag; 73 | 74 | off = PBLKSIZ; 75 | for (i = 1; i < ino[0]; i += 2) { 76 | printf("\t[%d]: ", ino[i]); 77 | for (n = ino[i]; n < off; n++) 78 | putchar(pag[n]); 79 | putchar(' '); 80 | off = ino[i]; 81 | printf("[%d]: ", ino[i + 1]); 82 | for (n = ino[i + 1]; n < off; n++) 83 | putchar(pag[n]); 84 | off = ino[i + 1]; 85 | putchar('\n'); 86 | } 87 | } 88 | #else 89 | dispage(pag) 90 | char *pag; 91 | { 92 | register i, n; 93 | register off; 94 | register short *ino = (short *) pag; 95 | 96 | off = PBLKSIZ; 97 | for (i = 1; i < ino[0]; i += 2) { 98 | for (n = ino[i]; n < off; n++) 99 | if (pag[n] != 0) 100 | putchar(pag[n]); 101 | putchar('\t'); 102 | off = ino[i]; 103 | for (n = ino[i + 1]; n < off; n++) 104 | if (pag[n] != 0) 105 | putchar(pag[n]); 106 | putchar('\n'); 107 | off = ino[i + 1]; 108 | } 109 | } 110 | #endif 111 | -------------------------------------------------------------------------------- /dbe.1: -------------------------------------------------------------------------------- 1 | .TH dbe 1 "ndbm(3) EDITOR" 2 | .SH NAME 3 | dbe \- Edit a ndbm(3) database 4 | .SH USAGE 5 | dbe [-m r|w|rw] [-crtvx] -a|-d|-f|-F|-s [ []] 6 | .SH DESCRIPTION 7 | \fIdbme\fP operates on ndbm(3) databases. 8 | It can be used to create them, look at them or change them. 9 | When specifying the value of a key or the content of its associated entry, 10 | \\nnn, \\0, \\n, \\t, \\f and \\r are interpreted as usual. 11 | When displaying key/content pairs, non-printable characters are displayed 12 | using the \\nnn notation. 13 | .SH OPTIONS 14 | .IP -a 15 | List all entries in the database. 16 | .IP -c 17 | Create the database if it does not exist. 18 | .IP -d 19 | Delete the entry associated with the specified key. 20 | .IP -f 21 | Fetch and display the entry associated with the specified key. 22 | .IP -F 23 | Fetch and display all the entries whose key match the specified 24 | regular-expression 25 | .IP "-m r|w|rw" 26 | Open the database in read-only, write-only or read-write mode 27 | .IP -r 28 | Replace the entry associated with the specified key if it already exists. 29 | See option -s. 30 | .IP -s 31 | Store an entry under a specific key. 32 | An error occurs if the key already exists and the option -r was not specified. 33 | .IP -t 34 | Re-initialize the database before executing the command. 35 | .IP -v 36 | Verbose mode. 37 | Confirm stores and deletions. 38 | .IP -x 39 | If option -x is used with option -c, then if the database already exists, 40 | an error occurs. 41 | This can be used to implement a simple exclusive access locking mechanism. 42 | .SH SEE ALSO 43 | ndbm(3) 44 | .SH AUTHOR 45 | janick@bnr.ca 46 | 47 | -------------------------------------------------------------------------------- /dbe.c: -------------------------------------------------------------------------------- 1 | #include 2 | #ifndef VMS 3 | #include 4 | #include 5 | #else 6 | #include "file.h" 7 | #include "ndbm.h" 8 | #endif 9 | #include 10 | 11 | /***************************************************************************\ 12 | ** ** 13 | ** Function name: getopt() ** 14 | ** Author: Henry Spencer, UofT ** 15 | ** Coding date: 84/04/28 ** 16 | ** ** 17 | ** Description: ** 18 | ** ** 19 | ** Parses argv[] for arguments. ** 20 | ** Works with Whitesmith's C compiler. ** 21 | ** ** 22 | ** Inputs - The number of arguments ** 23 | ** - The base address of the array of arguments ** 24 | ** - A string listing the valid options (':' indicates an ** 25 | ** argument to the preceding option is required, a ';' ** 26 | ** indicates an argument to the preceding option is optional) ** 27 | ** ** 28 | ** Outputs - Returns the next option character, ** 29 | ** '?' for non '-' arguments ** 30 | ** or ':' when there is no more arguments. ** 31 | ** ** 32 | ** Side Effects + The argument to an option is pointed to by 'optarg' ** 33 | ** ** 34 | ***************************************************************************** 35 | ** ** 36 | ** REVISION HISTORY: ** 37 | ** ** 38 | ** DATE NAME DESCRIPTION ** 39 | ** YY/MM/DD ------------------ ------------------------------------ ** 40 | ** 88/10/20 Janick Bergeron Returns '?' on unamed arguments ** 41 | ** returns '!' on unknown options ** 42 | ** and 'EOF' only when exhausted. ** 43 | ** 88/11/18 Janick Bergeron Return ':' when no more arguments ** 44 | ** 89/08/11 Janick Bergeron Optional optarg when ';' in optstring ** 45 | ** ** 46 | \***************************************************************************/ 47 | 48 | char *optarg; /* Global argument pointer. */ 49 | 50 | #ifdef VMS 51 | #define index strchr 52 | #endif 53 | 54 | char 55 | getopt(argc, argv, optstring) 56 | int argc; 57 | char **argv; 58 | char *optstring; 59 | { 60 | register int c; 61 | register char *place; 62 | extern char *index(); 63 | static int optind = 0; 64 | static char *scan = NULL; 65 | 66 | optarg = NULL; 67 | 68 | if (scan == NULL || *scan == '\0') { 69 | 70 | if (optind == 0) 71 | optind++; 72 | if (optind >= argc) 73 | return ':'; 74 | 75 | optarg = place = argv[optind++]; 76 | if (place[0] != '-' || place[1] == '\0') 77 | return '?'; 78 | if (place[1] == '-' && place[2] == '\0') 79 | return '?'; 80 | scan = place + 1; 81 | } 82 | 83 | c = *scan++; 84 | place = index(optstring, c); 85 | if (place == NULL || c == ':' || c == ';') { 86 | 87 | (void) fprintf(stderr, "%s: unknown option %c\n", argv[0], c); 88 | scan = NULL; 89 | return '!'; 90 | } 91 | if (*++place == ':') { 92 | 93 | if (*scan != '\0') { 94 | 95 | optarg = scan; 96 | scan = NULL; 97 | 98 | } 99 | else { 100 | 101 | if (optind >= argc) { 102 | 103 | (void) fprintf(stderr, "%s: %c requires an argument\n", 104 | argv[0], c); 105 | return '!'; 106 | } 107 | optarg = argv[optind]; 108 | optind++; 109 | } 110 | } 111 | else if (*place == ';') { 112 | 113 | if (*scan != '\0') { 114 | 115 | optarg = scan; 116 | scan = NULL; 117 | 118 | } 119 | else { 120 | 121 | if (optind >= argc || *argv[optind] == '-') 122 | optarg = NULL; 123 | else { 124 | optarg = argv[optind]; 125 | optind++; 126 | } 127 | } 128 | } 129 | return c; 130 | } 131 | 132 | 133 | void 134 | print_datum(db) 135 | datum db; 136 | { 137 | int i; 138 | 139 | putchar('"'); 140 | for (i = 0; i < db.dsize; i++) { 141 | if (isprint(db.dptr[i])) 142 | putchar(db.dptr[i]); 143 | else { 144 | putchar('\\'); 145 | putchar('0' + ((db.dptr[i] >> 6) & 0x07)); 146 | putchar('0' + ((db.dptr[i] >> 3) & 0x07)); 147 | putchar('0' + (db.dptr[i] & 0x07)); 148 | } 149 | } 150 | putchar('"'); 151 | } 152 | 153 | 154 | datum 155 | read_datum(s) 156 | char *s; 157 | { 158 | datum db; 159 | char *p; 160 | int i; 161 | 162 | db.dsize = 0; 163 | db.dptr = (char *) malloc(strlen(s) * sizeof(char)); 164 | for (p = db.dptr; *s != '\0'; p++, db.dsize++, s++) { 165 | if (*s == '\\') { 166 | if (*++s == 'n') 167 | *p = '\n'; 168 | else if (*s == 'r') 169 | *p = '\r'; 170 | else if (*s == 'f') 171 | *p = '\f'; 172 | else if (*s == 't') 173 | *p = '\t'; 174 | else if (isdigit(*s) && isdigit(*(s + 1)) && isdigit(*(s + 2))) { 175 | i = (*s++ - '0') << 6; 176 | i |= (*s++ - '0') << 3; 177 | i |= *s - '0'; 178 | *p = i; 179 | } 180 | else if (*s == '0') 181 | *p = '\0'; 182 | else 183 | *p = *s; 184 | } 185 | else 186 | *p = *s; 187 | } 188 | 189 | return db; 190 | } 191 | 192 | 193 | char * 194 | key2s(db) 195 | datum db; 196 | { 197 | char *buf; 198 | char *p1, *p2; 199 | 200 | buf = (char *) malloc((db.dsize + 1) * sizeof(char)); 201 | for (p1 = buf, p2 = db.dptr; *p2 != '\0'; *p1++ = *p2++); 202 | *p1 = '\0'; 203 | return buf; 204 | } 205 | 206 | 207 | main(argc, argv) 208 | int argc; 209 | char **argv; 210 | { 211 | typedef enum { 212 | YOW, FETCH, STORE, DELETE, SCAN, REGEXP 213 | } commands; 214 | char opt; 215 | int flags; 216 | int giveusage = 0; 217 | int verbose = 0; 218 | commands what = YOW; 219 | char *comarg[3]; 220 | int st_flag = DBM_INSERT; 221 | int argn; 222 | DBM *db; 223 | datum key; 224 | datum content; 225 | 226 | flags = O_RDWR; 227 | argn = 0; 228 | 229 | while ((opt = getopt(argc, argv, "acdfFm:rstvx")) != ':') { 230 | switch (opt) { 231 | case 'a': 232 | what = SCAN; 233 | break; 234 | case 'c': 235 | flags |= O_CREAT; 236 | break; 237 | case 'd': 238 | what = DELETE; 239 | break; 240 | case 'f': 241 | what = FETCH; 242 | break; 243 | case 'F': 244 | what = REGEXP; 245 | break; 246 | case 'm': 247 | flags &= ~(000007); 248 | if (strcmp(optarg, "r") == 0) 249 | flags |= O_RDONLY; 250 | else if (strcmp(optarg, "w") == 0) 251 | flags |= O_WRONLY; 252 | else if (strcmp(optarg, "rw") == 0) 253 | flags |= O_RDWR; 254 | else { 255 | fprintf(stderr, "Invalid mode: \"%s\"\n", optarg); 256 | giveusage = 1; 257 | } 258 | break; 259 | case 'r': 260 | st_flag = DBM_REPLACE; 261 | break; 262 | case 's': 263 | what = STORE; 264 | break; 265 | case 't': 266 | flags |= O_TRUNC; 267 | break; 268 | case 'v': 269 | verbose = 1; 270 | break; 271 | case 'x': 272 | flags |= O_EXCL; 273 | break; 274 | case '!': 275 | giveusage = 1; 276 | break; 277 | case '?': 278 | if (argn < 3) 279 | comarg[argn++] = optarg; 280 | else { 281 | fprintf(stderr, "Too many arguments.\n"); 282 | giveusage = 1; 283 | } 284 | break; 285 | } 286 | } 287 | 288 | if (giveusage | what == YOW | argn < 1) { 289 | fprintf(stderr, "Usage: %s databse [-m r|w|rw] [-crtx] -a|-d|-f|-F|-s [key [content]]\n", argv[0]); 290 | exit(-1); 291 | } 292 | 293 | if ((db = dbm_open(comarg[0], flags, 0777)) == NULL) { 294 | fprintf(stderr, "Error opening database \"%s\"\n", comarg[0]); 295 | exit(-1); 296 | } 297 | 298 | if (argn > 1) 299 | key = read_datum(comarg[1]); 300 | if (argn > 2) 301 | content = read_datum(comarg[2]); 302 | 303 | switch (what) { 304 | 305 | case SCAN: 306 | key = dbm_firstkey(db); 307 | if (dbm_error(db)) { 308 | fprintf(stderr, "Error when fetching first key\n"); 309 | goto db_exit; 310 | } 311 | while (key.dptr != NULL) { 312 | content = dbm_fetch(db, key); 313 | if (dbm_error(db)) { 314 | fprintf(stderr, "Error when fetching "); 315 | print_datum(key); 316 | printf("\n"); 317 | goto db_exit; 318 | } 319 | print_datum(key); 320 | printf(": "); 321 | print_datum(content); 322 | printf("\n"); 323 | if (dbm_error(db)) { 324 | fprintf(stderr, "Error when fetching next key\n"); 325 | goto db_exit; 326 | } 327 | key = dbm_nextkey(db); 328 | } 329 | break; 330 | 331 | case REGEXP: 332 | if (argn < 2) { 333 | fprintf(stderr, "Missing regular expression.\n"); 334 | goto db_exit; 335 | } 336 | if (re_comp(comarg[1])) { 337 | fprintf(stderr, "Invalid regular expression\n"); 338 | goto db_exit; 339 | } 340 | key = dbm_firstkey(db); 341 | if (dbm_error(db)) { 342 | fprintf(stderr, "Error when fetching first key\n"); 343 | goto db_exit; 344 | } 345 | while (key.dptr != NULL) { 346 | if (re_exec(key2s(key))) { 347 | content = dbm_fetch(db, key); 348 | if (dbm_error(db)) { 349 | fprintf(stderr, "Error when fetching "); 350 | print_datum(key); 351 | printf("\n"); 352 | goto db_exit; 353 | } 354 | print_datum(key); 355 | printf(": "); 356 | print_datum(content); 357 | printf("\n"); 358 | if (dbm_error(db)) { 359 | fprintf(stderr, "Error when fetching next key\n"); 360 | goto db_exit; 361 | } 362 | } 363 | key = dbm_nextkey(db); 364 | } 365 | break; 366 | 367 | case FETCH: 368 | if (argn < 2) { 369 | fprintf(stderr, "Missing fetch key.\n"); 370 | goto db_exit; 371 | } 372 | content = dbm_fetch(db, key); 373 | if (dbm_error(db)) { 374 | fprintf(stderr, "Error when fetching "); 375 | print_datum(key); 376 | printf("\n"); 377 | goto db_exit; 378 | } 379 | if (content.dptr == NULL) { 380 | fprintf(stderr, "Cannot find "); 381 | print_datum(key); 382 | printf("\n"); 383 | goto db_exit; 384 | } 385 | print_datum(key); 386 | printf(": "); 387 | print_datum(content); 388 | printf("\n"); 389 | break; 390 | 391 | case DELETE: 392 | if (argn < 2) { 393 | fprintf(stderr, "Missing delete key.\n"); 394 | goto db_exit; 395 | } 396 | if (dbm_delete(db, key) || dbm_error(db)) { 397 | fprintf(stderr, "Error when deleting "); 398 | print_datum(key); 399 | printf("\n"); 400 | goto db_exit; 401 | } 402 | if (verbose) { 403 | print_datum(key); 404 | printf(": DELETED\n"); 405 | } 406 | break; 407 | 408 | case STORE: 409 | if (argn < 3) { 410 | fprintf(stderr, "Missing key and/or content.\n"); 411 | goto db_exit; 412 | } 413 | if (dbm_store(db, key, content, st_flag) || dbm_error(db)) { 414 | fprintf(stderr, "Error when storing "); 415 | print_datum(key); 416 | printf("\n"); 417 | goto db_exit; 418 | } 419 | if (verbose) { 420 | print_datum(key); 421 | printf(": "); 422 | print_datum(content); 423 | printf(" STORED\n"); 424 | } 425 | break; 426 | } 427 | 428 | db_exit: 429 | dbm_clearerr(db); 430 | dbm_close(db); 431 | if (dbm_error(db)) { 432 | fprintf(stderr, "Error closing database \"%s\"\n", comarg[0]); 433 | exit(-1); 434 | } 435 | } 436 | -------------------------------------------------------------------------------- /dbm.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 1985 The Regents of the University of California. 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms are permitted 6 | * provided that the above copyright notice and this paragraph are 7 | * duplicated in all such forms and that any documentation, 8 | * advertising materials, and other materials related to such 9 | * distribution and use acknowledge that the software was developed 10 | * by the University of California, Berkeley. The name of the 11 | * University may not be used to endorse or promote products derived 12 | * from this software without specific prior written permission. 13 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 14 | * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 15 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 16 | */ 17 | 18 | #ifndef lint 19 | static char sccsid[] = "@(#)dbm.c 5.4 (Berkeley) 5/24/89"; 20 | #endif /* not lint */ 21 | 22 | #include "dbm.h" 23 | 24 | #define NODB ((DBM *)0) 25 | 26 | static DBM *cur_db = NODB; 27 | 28 | static char no_db[] = "dbm: no open database\n"; 29 | 30 | dbminit(file) 31 | char *file; 32 | { 33 | if (cur_db != NODB) 34 | dbm_close(cur_db); 35 | 36 | cur_db = dbm_open(file, 2, 0); 37 | if (cur_db == NODB) { 38 | cur_db = dbm_open(file, 0, 0); 39 | if (cur_db == NODB) 40 | return (-1); 41 | } 42 | return (0); 43 | } 44 | 45 | long 46 | forder(key) 47 | datum key; 48 | { 49 | if (cur_db == NODB) { 50 | printf(no_db); 51 | return (0L); 52 | } 53 | return (dbm_forder(cur_db, key)); 54 | } 55 | 56 | datum 57 | fetch(key) 58 | datum key; 59 | { 60 | datum item; 61 | 62 | if (cur_db == NODB) { 63 | printf(no_db); 64 | item.dptr = 0; 65 | return (item); 66 | } 67 | return (dbm_fetch(cur_db, key)); 68 | } 69 | 70 | delete(key) 71 | datum key; 72 | { 73 | if (cur_db == NODB) { 74 | printf(no_db); 75 | return (-1); 76 | } 77 | if (dbm_rdonly(cur_db)) 78 | return (-1); 79 | return (dbm_delete(cur_db, key)); 80 | } 81 | 82 | store(key, dat) 83 | datum key, dat; 84 | { 85 | if (cur_db == NODB) { 86 | printf(no_db); 87 | return (-1); 88 | } 89 | if (dbm_rdonly(cur_db)) 90 | return (-1); 91 | 92 | return (dbm_store(cur_db, key, dat, DBM_REPLACE)); 93 | } 94 | 95 | datum 96 | firstkey() 97 | { 98 | datum item; 99 | 100 | if (cur_db == NODB) { 101 | printf(no_db); 102 | item.dptr = 0; 103 | return (item); 104 | } 105 | return (dbm_firstkey(cur_db)); 106 | } 107 | 108 | datum 109 | nextkey(key) 110 | datum key; 111 | { 112 | datum item; 113 | 114 | if (cur_db == NODB) { 115 | printf(no_db); 116 | item.dptr = 0; 117 | return (item); 118 | } 119 | return (dbm_nextkey(cur_db, key)); 120 | } 121 | -------------------------------------------------------------------------------- /dbm.h: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 1983 The Regents of the University of California. 3 | * All rights reserved. 4 | * 5 | * Redistribution and use in source and binary forms are permitted 6 | * provided that the above copyright notice and this paragraph are 7 | * duplicated in all such forms and that any documentation, 8 | * advertising materials, and other materials related to such 9 | * distribution and use acknowledge that the software was developed 10 | * by the University of California, Berkeley. The name of the 11 | * University may not be used to endorse or promote products derived 12 | * from this software without specific prior written permission. 13 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 14 | * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 15 | * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 16 | * 17 | * @(#)dbm.h 5.2 (Berkeley) 5/24/89 18 | */ 19 | 20 | #ifndef NULL 21 | /* 22 | * this is lunacy, we no longer use it (and never should have 23 | * unconditionally defined it), but, this whole file is for 24 | * backwards compatability - someone may rely on this. 25 | */ 26 | #define NULL ((char *) 0) 27 | #endif 28 | 29 | #include 30 | 31 | datum fetch(); 32 | datum firstkey(); 33 | datum nextkey(); 34 | -------------------------------------------------------------------------------- /dbu.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #ifdef SDBM 4 | #include "sdbm.h" 5 | #else 6 | #include 7 | #endif 8 | #include 9 | 10 | #ifdef BSD42 11 | #define strchr index 12 | #endif 13 | 14 | extern int getopt(); 15 | extern char *strchr(); 16 | extern void oops(); 17 | 18 | char *progname; 19 | 20 | static int rflag; 21 | static char *usage = "%s [-R] cat | look |... dbmname"; 22 | 23 | #define DERROR 0 24 | #define DLOOK 1 25 | #define DINSERT 2 26 | #define DDELETE 3 27 | #define DCAT 4 28 | #define DBUILD 5 29 | #define DPRESS 6 30 | #define DCREAT 7 31 | 32 | #define LINEMAX 8192 33 | 34 | typedef struct { 35 | char *sname; 36 | int scode; 37 | int flags; 38 | } cmd; 39 | 40 | static cmd cmds[] = { 41 | 42 | "fetch", DLOOK, O_RDONLY, 43 | "get", DLOOK, O_RDONLY, 44 | "look", DLOOK, O_RDONLY, 45 | "add", DINSERT, O_RDWR, 46 | "insert", DINSERT, O_RDWR, 47 | "store", DINSERT, O_RDWR, 48 | "delete", DDELETE, O_RDWR, 49 | "remove", DDELETE, O_RDWR, 50 | "dump", DCAT, O_RDONLY, 51 | "list", DCAT, O_RDONLY, 52 | "cat", DCAT, O_RDONLY, 53 | "creat", DCREAT, O_RDWR | O_CREAT | O_TRUNC, 54 | "new", DCREAT, O_RDWR | O_CREAT | O_TRUNC, 55 | "build", DBUILD, O_RDWR | O_CREAT, 56 | "squash", DPRESS, O_RDWR, 57 | "compact", DPRESS, O_RDWR, 58 | "compress", DPRESS, O_RDWR 59 | }; 60 | 61 | #define CTABSIZ (sizeof (cmds)/sizeof (cmd)) 62 | 63 | static cmd *parse(); 64 | static void badk(), doit(), prdatum(); 65 | 66 | int 67 | main(argc, argv) 68 | int argc; 69 | char *argv[]; 70 | { 71 | int c; 72 | register cmd *act; 73 | extern int optind; 74 | extern char *optarg; 75 | 76 | progname = argv[0]; 77 | 78 | while ((c = getopt(argc, argv, "R")) != EOF) 79 | switch (c) { 80 | case 'R': /* raw processing */ 81 | rflag++; 82 | break; 83 | 84 | default: 85 | oops("usage: %s", usage); 86 | break; 87 | } 88 | 89 | if ((argc -= optind) < 2) 90 | oops("usage: %s", usage); 91 | 92 | if ((act = parse(argv[optind])) == NULL) 93 | badk(argv[optind]); 94 | optind++; 95 | doit(act, argv[optind]); 96 | return 0; 97 | } 98 | 99 | static void 100 | doit(act, file) 101 | register cmd *act; 102 | char *file; 103 | { 104 | datum key; 105 | datum val; 106 | register DBM *db; 107 | register char *op; 108 | register int n; 109 | char *line; 110 | #ifdef TIME 111 | long start; 112 | extern long time(); 113 | #endif 114 | 115 | if ((db = dbm_open(file, act->flags, 0644)) == NULL) 116 | oops("cannot open: %s", file); 117 | 118 | if ((line = (char *) malloc(LINEMAX)) == NULL) 119 | oops("%s: cannot get memory", "line alloc"); 120 | 121 | switch (act->scode) { 122 | 123 | case DLOOK: 124 | while (fgets(line, LINEMAX, stdin) != NULL) { 125 | n = strlen(line) - 1; 126 | line[n] = 0; 127 | key.dptr = line; 128 | key.dsize = n; 129 | val = dbm_fetch(db, key); 130 | if (val.dptr != NULL) { 131 | prdatum(stdout, val); 132 | putchar('\n'); 133 | continue; 134 | } 135 | prdatum(stderr, key); 136 | fprintf(stderr, ": not found.\n"); 137 | } 138 | break; 139 | case DINSERT: 140 | break; 141 | case DDELETE: 142 | while (fgets(line, LINEMAX, stdin) != NULL) { 143 | n = strlen(line) - 1; 144 | line[n] = 0; 145 | key.dptr = line; 146 | key.dsize = n; 147 | if (dbm_delete(db, key) == -1) { 148 | prdatum(stderr, key); 149 | fprintf(stderr, ": not found.\n"); 150 | } 151 | } 152 | break; 153 | case DCAT: 154 | for (key = dbm_firstkey(db); key.dptr != 0; 155 | key = dbm_nextkey(db)) { 156 | prdatum(stdout, key); 157 | putchar('\t'); 158 | prdatum(stdout, dbm_fetch(db, key)); 159 | putchar('\n'); 160 | } 161 | break; 162 | case DBUILD: 163 | #ifdef TIME 164 | start = time(0); 165 | #endif 166 | while (fgets(line, LINEMAX, stdin) != NULL) { 167 | n = strlen(line) - 1; 168 | line[n] = 0; 169 | key.dptr = line; 170 | if ((op = strchr(line, '\t')) != 0) { 171 | key.dsize = op - line; 172 | *op++ = 0; 173 | val.dptr = op; 174 | val.dsize = line + n - op; 175 | } 176 | else 177 | oops("bad input; %s", line); 178 | 179 | if (dbm_store(db, key, val, DBM_REPLACE) < 0) { 180 | prdatum(stderr, key); 181 | fprintf(stderr, ": "); 182 | oops("store: %s", "failed"); 183 | } 184 | } 185 | #ifdef TIME 186 | printf("done: %d seconds.\n", time(0) - start); 187 | #endif 188 | break; 189 | case DPRESS: 190 | break; 191 | case DCREAT: 192 | break; 193 | } 194 | 195 | dbm_close(db); 196 | } 197 | 198 | static void 199 | badk(word) 200 | char *word; 201 | { 202 | register int i; 203 | 204 | if (progname) 205 | fprintf(stderr, "%s: ", progname); 206 | fprintf(stderr, "bad keywd %s. use one of\n", word); 207 | for (i = 0; i < (int)CTABSIZ; i++) 208 | fprintf(stderr, "%-8s%c", cmds[i].sname, 209 | ((i + 1) % 6 == 0) ? '\n' : ' '); 210 | fprintf(stderr, "\n"); 211 | exit(1); 212 | /*NOTREACHED*/ 213 | } 214 | 215 | static cmd * 216 | parse(str) 217 | register char *str; 218 | { 219 | register int i = CTABSIZ; 220 | register cmd *p; 221 | 222 | for (p = cmds; i--; p++) 223 | if (strcmp(p->sname, str) == 0) 224 | return p; 225 | return NULL; 226 | } 227 | 228 | static void 229 | prdatum(stream, d) 230 | FILE *stream; 231 | datum d; 232 | { 233 | register int c; 234 | register char *p = d.dptr; 235 | register int n = d.dsize; 236 | 237 | while (n--) { 238 | c = *p++ & 0377; 239 | if (c & 0200) { 240 | fprintf(stream, "M-"); 241 | c &= 0177; 242 | } 243 | if (c == 0177 || c < ' ') 244 | fprintf(stream, "^%c", (c == 0177) ? '?' : c + '@'); 245 | else 246 | putc(c, stream); 247 | } 248 | } 249 | 250 | 251 | -------------------------------------------------------------------------------- /grind: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | rm -f /tmp/*.dir /tmp/*.pag 3 | awk -e '{ 4 | printf "%s\t", $0 5 | for (i = 0; i < 40; i++) 6 | printf "%s.", $0 7 | printf "\n" 8 | }' < /usr/dict/words | $1 build /tmp/$2 9 | 10 | -------------------------------------------------------------------------------- /hash.c: -------------------------------------------------------------------------------- 1 | /* 2 | * sdbm - ndbm work-alike hashed database library 3 | * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978). 4 | * author: oz@nexus.yorku.ca 5 | * status: public domain. keep it that way. 6 | * 7 | * hashing routine 8 | */ 9 | 10 | #include "sdbm.h" 11 | /* 12 | * polynomial conversion ignoring overflows 13 | * [this seems to work remarkably well, in fact better 14 | * then the ndbm hash function. Replace at your own risk] 15 | * use: 65599 nice. 16 | * 65587 even better. 17 | */ 18 | long 19 | dbm_hash(str, len) 20 | register char *str; 21 | register int len; 22 | { 23 | register unsigned long n = 0; 24 | 25 | #ifdef DUFF 26 | 27 | #define HASHC n = *str++ + 65599 * n 28 | 29 | if (len > 0) { 30 | register int loop = (len + 8 - 1) >> 3; 31 | 32 | switch(len & (8 - 1)) { 33 | case 0: do { 34 | HASHC; case 7: HASHC; 35 | case 6: HASHC; case 5: HASHC; 36 | case 4: HASHC; case 3: HASHC; 37 | case 2: HASHC; case 1: HASHC; 38 | } while (--loop); 39 | } 40 | 41 | } 42 | #else 43 | while (len--) 44 | n = *str++ + 65599 * n; 45 | #endif 46 | return n; 47 | } 48 | -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | # 2 | # makefile for public domain ndbm-clone: sdbm 3 | # DUFF: use duff's device (loop unroll) in parts of the code 4 | # 5 | CFLAGS = -O -DSDBM -DDUFF -DBSD42 6 | #LDFLAGS = -p 7 | 8 | OBJS = sdbm.o pair.o hash.o 9 | SRCS = sdbm.c pair.c hash.c dbu.c dba.c dbd.c util.c 10 | HDRS = tune.h sdbm.h pair.h 11 | MISC = README CHANGES COMPARE sdbm.3 dbe.c dbe.1 dbm.c dbm.h biblio \ 12 | readme.ms readme.ps 13 | 14 | all: dbu dba dbd dbe 15 | 16 | dbu: dbu.o sdbm util.o 17 | cc $(LDFLAGS) -o dbu dbu.o util.o libsdbm.a 18 | 19 | dba: dba.o util.o 20 | cc $(LDFLAGS) -o dba dba.o util.o 21 | dbd: dbd.o util.o 22 | cc $(LDFLAGS) -o dbd dbd.o util.o 23 | dbe: dbe.o sdbm 24 | cc $(LDFLAGS) -o dbe dbe.o libsdbm.a 25 | 26 | sdbm: $(OBJS) 27 | ar cr libsdbm.a $(OBJS) 28 | ranlib libsdbm.a 29 | ### cp libsdbm.a /usr/lib/libsdbm.a 30 | 31 | dba.o: sdbm.h 32 | dbu.o: sdbm.h 33 | util.o:sdbm.h 34 | 35 | $(OBJS): sdbm.h tune.h pair.h 36 | 37 | # 38 | # dbu using berkelezoid ndbm routines [if you have them] for testing 39 | # 40 | #x-dbu: dbu.o util.o 41 | # cc $(CFLAGS) -o x-dbu dbu.o util.o 42 | lint: 43 | lint -abchx $(SRCS) 44 | 45 | clean: 46 | rm -f *.o mon.out core 47 | 48 | purge: clean 49 | rm -f dbu libsdbm.a dbd dba dbe x-dbu *.dir *.pag 50 | 51 | shar: 52 | shar $(MISC) makefile $(SRCS) $(HDRS) >SDBM.SHAR 53 | 54 | readme: 55 | nroff -ms readme.ms | col -b >README 56 | -------------------------------------------------------------------------------- /pair.c: -------------------------------------------------------------------------------- 1 | /* 2 | * sdbm - ndbm work-alike hashed database library 3 | * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978). 4 | * author: oz@nexus.yorku.ca 5 | * status: public domain. 6 | * 7 | * page-level routines 8 | */ 9 | 10 | #ifndef lint 11 | static char rcsid[] = "$Id: pair.c,v 1.10 90/12/13 13:00:35 oz Exp $"; 12 | #endif 13 | 14 | #include "sdbm.h" 15 | #include "tune.h" 16 | #include "pair.h" 17 | 18 | #ifndef BSD42 19 | #include 20 | #endif 21 | 22 | #define exhash(item) dbm_hash((item).dptr, (item).dsize) 23 | 24 | /* 25 | * forward 26 | */ 27 | static int seepair proto((char *, int, char *, int)); 28 | 29 | /* 30 | * page format: 31 | * +------------------------------+ 32 | * ino | n | keyoff | datoff | keyoff | 33 | * +------------+--------+--------+ 34 | * | datoff | - - - ----> | 35 | * +--------+---------------------+ 36 | * | F R E E A R E A | 37 | * +--------------+---------------+ 38 | * | <---- - - - | data | 39 | * +--------+-----+----+----------+ 40 | * | key | data | key | 41 | * +--------+----------+----------+ 42 | * 43 | * calculating the offsets for free area: if the number 44 | * of entries (ino[0]) is zero, the offset to the END of 45 | * the free area is the block size. Otherwise, it is the 46 | * nth (ino[ino[0]]) entry's offset. 47 | */ 48 | 49 | int 50 | fitpair(pag, need) 51 | char *pag; 52 | int need; 53 | { 54 | register int n; 55 | register int off; 56 | register int free; 57 | register short *ino = (short *) pag; 58 | 59 | off = ((n = ino[0]) > 0) ? ino[n] : PBLKSIZ; 60 | free = off - (n + 1) * sizeof(short); 61 | need += 2 * sizeof(short); 62 | 63 | debug(("free %d need %d\n", free, need)); 64 | 65 | return need <= free; 66 | } 67 | 68 | void 69 | putpair(pag, key, val) 70 | char *pag; 71 | datum key; 72 | datum val; 73 | { 74 | register int n; 75 | register int off; 76 | register short *ino = (short *) pag; 77 | 78 | off = ((n = ino[0]) > 0) ? ino[n] : PBLKSIZ; 79 | /* 80 | * enter the key first 81 | */ 82 | off -= key.dsize; 83 | (void) memcpy(pag + off, key.dptr, key.dsize); 84 | ino[n + 1] = off; 85 | /* 86 | * now the data 87 | */ 88 | off -= val.dsize; 89 | (void) memcpy(pag + off, val.dptr, val.dsize); 90 | ino[n + 2] = off; 91 | /* 92 | * adjust item count 93 | */ 94 | ino[0] += 2; 95 | } 96 | 97 | datum 98 | getpair(pag, key) 99 | char *pag; 100 | datum key; 101 | { 102 | register int i; 103 | register int n; 104 | datum val; 105 | register short *ino = (short *) pag; 106 | 107 | if ((n = ino[0]) == 0) 108 | return nullitem; 109 | 110 | if ((i = seepair(pag, n, key.dptr, key.dsize)) == 0) 111 | return nullitem; 112 | 113 | val.dptr = pag + ino[i + 1]; 114 | val.dsize = ino[i] - ino[i + 1]; 115 | return val; 116 | } 117 | 118 | #ifdef SEEDUPS 119 | int 120 | duppair(pag, key) 121 | char *pag; 122 | datum key; 123 | { 124 | register short *ino = (short *) pag; 125 | return ino[0] > 0 && seepair(pag, ino[0], key.dptr, key.dsize) > 0; 126 | } 127 | #endif 128 | 129 | datum 130 | getnkey(pag, num) 131 | char *pag; 132 | int num; 133 | { 134 | datum key; 135 | register int off; 136 | register short *ino = (short *) pag; 137 | 138 | num = num * 2 - 1; 139 | if (ino[0] == 0 || num > ino[0]) 140 | return nullitem; 141 | 142 | off = (num > 1) ? ino[num - 1] : PBLKSIZ; 143 | 144 | key.dptr = pag + ino[num]; 145 | key.dsize = off - ino[num]; 146 | 147 | return key; 148 | } 149 | 150 | int 151 | delpair(pag, key) 152 | char *pag; 153 | datum key; 154 | { 155 | register int n; 156 | register int i; 157 | register short *ino = (short *) pag; 158 | 159 | if ((n = ino[0]) == 0) 160 | return 0; 161 | 162 | if ((i = seepair(pag, n, key.dptr, key.dsize)) == 0) 163 | return 0; 164 | /* 165 | * found the key. if it is the last entry 166 | * [i.e. i == n - 1] we just adjust the entry count. 167 | * hard case: move all data down onto the deleted pair, 168 | * shift offsets onto deleted offsets, and adjust them. 169 | * [note: 0 < i < n] 170 | */ 171 | if (i < n - 1) { 172 | register int m; 173 | register char *dst = pag + (i == 1 ? PBLKSIZ : ino[i - 1]); 174 | register char *src = pag + ino[i + 1]; 175 | register int zoo = dst - src; 176 | 177 | debug(("free-up %d ", zoo)); 178 | /* 179 | * shift data/keys down 180 | */ 181 | m = ino[i + 1] - ino[n]; 182 | #ifdef DUFF 183 | #define MOVB *--dst = *--src 184 | 185 | if (m > 0) { 186 | register int loop = (m + 8 - 1) >> 3; 187 | 188 | switch (m & (8 - 1)) { 189 | case 0: do { 190 | MOVB; case 7: MOVB; 191 | case 6: MOVB; case 5: MOVB; 192 | case 4: MOVB; case 3: MOVB; 193 | case 2: MOVB; case 1: MOVB; 194 | } while (--loop); 195 | } 196 | } 197 | #else 198 | #ifdef MEMMOVE 199 | memmove(dst - m, src - m, m); 200 | #else 201 | while (m--) 202 | *--dst = *--src; 203 | #endif 204 | #endif 205 | /* 206 | * adjust offset index up 207 | */ 208 | while (i < n - 1) { 209 | ino[i] = ino[i + 2] + zoo; 210 | i++; 211 | } 212 | } 213 | ino[0] -= 2; 214 | return 1; 215 | } 216 | 217 | /* 218 | * search for the key in the page. 219 | * return offset index in the range 0 < i < n. 220 | * return 0 if not found. 221 | */ 222 | static int 223 | seepair(pag, n, key, siz) 224 | char *pag; 225 | register int n; 226 | register char *key; 227 | register int siz; 228 | { 229 | register int i; 230 | register int off = PBLKSIZ; 231 | register short *ino = (short *) pag; 232 | 233 | for (i = 1; i < n; i += 2) { 234 | if (siz == off - ino[i] && 235 | memcmp(key, pag + ino[i], siz) == 0) 236 | return i; 237 | off = ino[i + 1]; 238 | } 239 | return 0; 240 | } 241 | 242 | void 243 | splpage(pag, new, sbit) 244 | char *pag; 245 | char *new; 246 | long sbit; 247 | { 248 | datum key; 249 | datum val; 250 | 251 | register int n; 252 | register int off = PBLKSIZ; 253 | char cur[PBLKSIZ]; 254 | register short *ino = (short *) cur; 255 | 256 | (void) memcpy(cur, pag, PBLKSIZ); 257 | (void) memset(pag, 0, PBLKSIZ); 258 | (void) memset(new, 0, PBLKSIZ); 259 | 260 | n = ino[0]; 261 | for (ino++; n > 0; ino += 2) { 262 | key.dptr = cur + ino[0]; 263 | key.dsize = off - ino[0]; 264 | val.dptr = cur + ino[1]; 265 | val.dsize = ino[0] - ino[1]; 266 | /* 267 | * select the page pointer (by looking at sbit) and insert 268 | */ 269 | (void) putpair((exhash(key) & sbit) ? new : pag, key, val); 270 | 271 | off = ino[1]; 272 | n -= 2; 273 | } 274 | 275 | debug(("%d split %d/%d\n", ((short *) cur)[0] / 2, 276 | ((short *) new)[0] / 2, 277 | ((short *) pag)[0] / 2)); 278 | } 279 | 280 | /* 281 | * check page sanity: 282 | * number of entries should be something 283 | * reasonable, and all offsets in the index should be in order. 284 | * this could be made more rigorous. 285 | */ 286 | int 287 | chkpage(pag) 288 | char *pag; 289 | { 290 | register int n; 291 | register int off; 292 | register short *ino = (short *) pag; 293 | 294 | if ((n = ino[0]) < 0 || n > PBLKSIZ / sizeof(short)) 295 | return 0; 296 | 297 | if (n > 0) { 298 | off = PBLKSIZ; 299 | for (ino++; n > 0; ino += 2) { 300 | if (ino[0] > off || ino[1] > off || 301 | ino[1] > ino[0]) 302 | return 0; 303 | off = ino[1]; 304 | n -= 2; 305 | } 306 | } 307 | return 1; 308 | } 309 | -------------------------------------------------------------------------------- /pair.h: -------------------------------------------------------------------------------- 1 | extern int fitpair proto((char *, int)); 2 | extern void putpair proto((char *, datum, datum)); 3 | extern datum getpair proto((char *, datum)); 4 | extern int delpair proto((char *, datum)); 5 | extern int chkpage proto((char *)); 6 | extern datum getnkey proto((char *, int)); 7 | extern void splpage proto((char *, char *, long)); 8 | #ifdef SEEDUPS 9 | extern int duppair proto((char *, datum)); 10 | #endif 11 | -------------------------------------------------------------------------------- /readme.ms: -------------------------------------------------------------------------------- 1 | .\" tbl | readme.ms | [tn]roff -ms | ... 2 | .\" note the "C" (courier) and "CB" fonts: you will probably have to 3 | .\" change these. 4 | .\" $Id: readme.ms,v 1.1 90/12/13 13:09:15 oz Exp Locker: oz $ 5 | 6 | .de P1 7 | .br 8 | .nr dT 4 9 | .nf 10 | .ft C 11 | .sp .5 12 | .nr t \\n(dT*\\w'x'u 13 | .ta 1u*\\ntu 2u*\\ntu 3u*\\ntu 4u*\\ntu 5u*\\ntu 6u*\\ntu 7u*\\ntu 8u*\\ntu 9u*\\ntu 10u*\\ntu 11u*\\ntu 12u*\\ntu 13u*\\ntu 14u*\\ntu 14 | .. 15 | .de P2 16 | .br 17 | .ft 1 18 | .br 19 | .sp .5 20 | .br 21 | .fi 22 | .. 23 | .\" CW uses the typewriter/courier font. 24 | .de CW 25 | \fC\\$1\\fP\\$2 26 | .. 27 | 28 | .\" Footnote numbering [by Henry Spencer] 29 | .\" \*f for a footnote number.. 30 | .\" .FS 31 | .\" \*F 32 | .\" .FE 33 | .\" 34 | .ds f \\u\\s-2\\n+f\\s+2\\d 35 | .nr f 0 1 36 | .ds F \\n+F. 37 | .nr F 0 1 38 | 39 | .ND 40 | .LP 41 | .TL 42 | \fIsdbm\fP \(em Substitute DBM 43 | .br 44 | or 45 | .br 46 | Berkeley \fIndbm\fP for Every UN*X\** Made Simple 47 | .AU 48 | Ozan (oz) Yigit 49 | .AI 50 | The Guild of PD Software Toolmakers 51 | Toronto - Canada 52 | .sp 53 | oz@nexus.yorku.ca 54 | .LP 55 | .FS 56 | UN*X is not a trademark of any (dis)organization. 57 | .FE 58 | .sp 2 59 | \fIImplementation is the sincerest form of flattery. \(em L. Peter Deutsch\fP 60 | .SH 61 | A The Clone of the \fIndbm\fP library 62 | .PP 63 | The sources accompanying this notice \(em \fIsdbm\fP \(em constitute 64 | the first public release (Dec. 1990) of a complete clone of 65 | the Berkeley UN*X \fIndbm\fP library. The \fIsdbm\fP library is meant to 66 | clone the proven functionality of \fIndbm\fP as closely as possible, 67 | including a few improvements. It is practical, easy to understand, and 68 | compatible. 69 | The \fIsdbm\fP library is not derived from any licensed, proprietary or 70 | copyrighted software. 71 | .PP 72 | The \fIsdbm\fP implementation is based on a 1978 algorithm 73 | [Lar78] by P.-A. (Paul) Larson known as ``Dynamic Hashing''. 74 | In the course of searching for a substitute for \fIndbm\fP, I 75 | prototyped three different external-hashing algorithms [Lar78, Fag79, Lit80] 76 | and ultimately chose Larson's algorithm as a basis of the \fIsdbm\fP 77 | implementation. The Bell Labs 78 | \fIdbm\fP (and therefore \fIndbm\fP) is based on an algorithm invented by 79 | Ken Thompson, [Tho90, Tor87] and predates Larson's work. 80 | .PP 81 | The \fIsdbm\fR programming interface is totally compatible 82 | with \fIndbm\fP and includes a slight improvement in database initialization. 83 | It is also expected to be binary-compatible under most UN*X versions that 84 | support the \fIndbm\fP library. 85 | .PP 86 | The \fIsdbm\fP implementation shares the shortcomings of the \fIndbm\fP 87 | library, as a side effect of various simplifications to the original Larson 88 | algorithm. It does produce \fIholes\fP in the page file as it writes 89 | pages past the end of file. (Larson's paper include a clever solution to 90 | this problem that is a result of using the hash value directly as a block 91 | address.) On the other hand, extensive tests seem to indicate that \fIsdbm\fP 92 | creates fewer holes in general, and the resulting pagefiles are 93 | smaller. The \fIsdbm\fP implementation is also faster than \fIndbm\fP 94 | in database creation. 95 | Unlike the \fIndbm\fP, the \fIsdbm\fP 96 | .CW store 97 | operation will not ``wander away'' trying to split its 98 | data pages to insert a datum that \fIcannot\fP (due to elaborate worst-case 99 | situations) be inserted. (It will fail after a pre-defined number of attempts.) 100 | .SH 101 | Important Compatibility Warning 102 | .PP 103 | The \fIsdbm\fP and \fIndbm\fP 104 | libraries \fIcannot\fP share databases: one cannot read the (dir/pag) 105 | database created by the other. This is due to the differences 106 | between the \fIndbm\fP and \fIsdbm\fP algorithms\**, 107 | .FS 108 | Torek's discussion [Tor87] 109 | indicates that \fIdbm/ndbm\fP implementations use the hash 110 | value to traverse the radix trie differently than \fIsdbm\fP 111 | and as a result, the page indexes are generated in \fIdifferent\fP order. 112 | For more information, send e-mail to the author. 113 | .FE 114 | and the hash functions 115 | used. 116 | It is easy to convert between the \fIdbm/ndbm\fP databases and \fIsdbm\fP 117 | by ignoring the index completely: see 118 | .CW dbd , 119 | .CW dbu 120 | etc. 121 | .R 122 | .LP 123 | .SH 124 | Notice of Intellectual Property 125 | .LP 126 | \fIThe entire\fP sdbm \fIlibrary package, as authored by me,\fP Ozan S. Yigit, 127 | \fIis hereby placed in the public domain.\fP As such, the author is not 128 | responsible for the consequences of use of this software, no matter how 129 | awful, even if they arise from defects in it. There is no expressed or 130 | implied warranty for the \fIsdbm\fP library. 131 | .PP 132 | Since the \fIsdbm\fP 133 | library package is in the public domain, this \fIoriginal\fP 134 | release or any additional public-domain releases of the modified original 135 | cannot possibly (by definition) be withheld from you. Also by definition, 136 | You (singular) have all the rights to this code (including the right to 137 | sell without permission, the right to hoard\** 138 | .FS 139 | You cannot really hoard something that is available to the public at 140 | large, but try if it makes you feel any better. 141 | .FE 142 | and the right to do other icky things as 143 | you see fit) but those rights are also granted to everyone else. 144 | .PP 145 | Please note that all previous distributions of this software contained 146 | a copyright (which is now dropped) to protect its 147 | origins and its current public domain status against any possible claims 148 | and/or challenges. 149 | .SH 150 | Acknowledgments 151 | .PP 152 | Many people have been very helpful and supportive. A partial list would 153 | necessarily include Rayan Zacherissen (who contributed the man page, 154 | and also hacked a MMAP version of \fIsdbm\fP), 155 | Arnold Robbins, Chris Lewis, 156 | Bill Davidsen, Henry Spencer, Geoff Collyer, Rich Salz (who got me started 157 | in the first place), Johannes Ruschein 158 | (who did the minix port) and David Tilbrook. I thank you all. 159 | .SH 160 | Distribution Manifest and Notes 161 | .LP 162 | This distribution of \fIsdbm\fP includes (at least) the following: 163 | .P1 164 | CHANGES change log 165 | README this file. 166 | biblio a small bibliography on external hashing 167 | dba.c a crude (n/s)dbm page file analyzer 168 | dbd.c a crude (n/s)dbm page file dumper (for conversion) 169 | dbe.1 man page for dbe.c 170 | dbe.c Janick's database editor 171 | dbm.c a dbm library emulation wrapper for ndbm/sdbm 172 | dbm.h header file for the above 173 | dbu.c a crude db management utility 174 | hash.c hashing function 175 | makefile guess. 176 | pair.c page-level routines (posted earlier) 177 | pair.h header file for the above 178 | readme.ms troff source for the README file 179 | sdbm.3 man page 180 | sdbm.c the real thing 181 | sdbm.h header file for the above 182 | tune.h place for tuning & portability thingies 183 | util.c miscellaneous 184 | .P2 185 | .PP 186 | .CW dbu 187 | is a simple database manipulation program\** that tries to look 188 | .FS 189 | The 190 | .CW dbd , 191 | .CW dba , 192 | .CW dbu 193 | utilities are quick hacks and are not fit for production use. They were 194 | developed late one night, just to test out \fIsdbm\fP, and convert some 195 | databases. 196 | .FE 197 | like Bell Labs' 198 | .CW cbt 199 | utility. It is currently incomplete in functionality. 200 | I use 201 | .CW dbu 202 | to test out the routines: it takes (from stdin) tab separated 203 | key/value pairs for commands like 204 | .CW build 205 | or 206 | .CW insert 207 | or takes keys for 208 | commands like 209 | .CW delete 210 | or 211 | .CW look . 212 | .P1 213 | dbu dbmfile 214 | .P2 215 | .PP 216 | .CW dba 217 | is a crude analyzer of \fIdbm/sdbm/ndbm\fP 218 | page files. It scans the entire 219 | page file, reporting page level statistics, and totals at the end. 220 | .PP 221 | .CW dbd 222 | is a crude dump program for \fIdbm/ndbm/sdbm\fP 223 | databases. It ignores the 224 | bitmap, and dumps the data pages in sequence. It can be used to create 225 | input for the 226 | .CW dbu 227 | utility. 228 | Note that 229 | .CW dbd 230 | will skip any NULLs in the key and data 231 | fields, thus is unsuitable to convert some peculiar databases that 232 | insist in including the terminating null. 233 | .PP 234 | I have also included a copy of the 235 | .CW dbe 236 | (\fIndbm\fP DataBase Editor) by Janick Bergeron [janick@bnr.ca] for 237 | your pleasure. You may find it more useful than the little 238 | .CW dbu 239 | utility. 240 | .PP 241 | .CW dbm.[ch] 242 | is a \fIdbm\fP library emulation on top of \fIndbm\fP 243 | (and hence suitable for \fIsdbm\fP). Written by Robert Elz. 244 | .PP 245 | The \fIsdbm\fP 246 | library has been around in beta test for quite a long time, and from whatever 247 | little feedback I received (maybe no news is good news), I believe it has been 248 | functioning without any significant problems. I would, of course, appreciate 249 | all fixes and/or improvements. Portability enhancements would especially be 250 | useful. 251 | .SH 252 | Implementation Issues 253 | .PP 254 | Hash functions: 255 | The algorithm behind \fIsdbm\fP implementation needs a good bit-scrambling 256 | hash function to be effective. I ran into a set of constants for a simple 257 | hash function that seem to help \fIsdbm\fP perform better than \fIndbm\fP 258 | for various inputs: 259 | .P1 260 | /* 261 | * polynomial conversion ignoring overflows 262 | * 65599 nice. 65587 even better. 263 | */ 264 | long 265 | dbm_hash(char *str, int len) { 266 | register unsigned long n = 0; 267 | 268 | while (len--) 269 | n = n * 65599 + *str++; 270 | return n; 271 | } 272 | .P2 273 | .PP 274 | There may be better hash functions for the purposes of dynamic hashing. 275 | Try your favorite, and check the pagefile. If it contains too many pages 276 | with too many holes, (in relation to this one for example) or if 277 | \fIsdbm\fP 278 | simply stops working (fails after 279 | .CW SPLTMAX 280 | attempts to split) when you feed your 281 | NEWS 282 | .CW history 283 | file to it, you probably do not have a good hashing function. 284 | If you do better (for different types of input), I would like to know 285 | about the function you use. 286 | .PP 287 | Block sizes: It seems (from various tests on a few machines) that a page 288 | file block size 289 | .CW PBLKSIZ 290 | of 1024 is by far the best for performance, but 291 | this also happens to limit the size of a key/value pair. Depending on your 292 | needs, you may wish to increase the page size, and also adjust 293 | .CW PAIRMAX 294 | (the maximum size of a key/value pair allowed: should always be at least 295 | three words smaller than 296 | .CW PBLKSIZ .) 297 | accordingly. The system-wide version of the library 298 | should probably be 299 | configured with 1024 (distribution default), as this appears to be sufficient 300 | for most common uses of \fIsdbm\fP. 301 | .SH 302 | Portability 303 | .PP 304 | This package has been tested in many different UN*Xes even including minix, 305 | and appears to be reasonably portable. This does not mean it will port 306 | easily to non-UN*X systems. 307 | .SH 308 | Notes and Miscellaneous 309 | .PP 310 | The \fIsdbm\fP is not a very complicated package, at least not after you 311 | familiarize yourself with the literature on external hashing. There are 312 | other interesting algorithms in existence that ensure (approximately) 313 | single-read access to a data value associated with any key. These are 314 | directory-less schemes such as \fIlinear hashing\fP [Lit80] (+ Larson 315 | variations), \fIspiral storage\fP [Mar79] or directory schemes such as 316 | \fIextensible hashing\fP [Fag79] by Fagin et al. I do hope these sources 317 | provide a reasonable playground for experimentation with other algorithms. 318 | See the June 1988 issue of ACM Computing Surveys [Enb88] for an 319 | excellent overview of the field. 320 | .PG 321 | .SH 322 | References 323 | .LP 324 | .IP [Lar78] 4m 325 | P.-A. Larson, 326 | ``Dynamic Hashing'', \fIBIT\fP, vol. 18, pp. 184-201, 1978. 327 | .IP [Tho90] 4m 328 | Ken Thompson, \fIprivate communication\fP, Nov. 1990 329 | .IP [Lit80] 4m 330 | W. Litwin, 331 | `` Linear Hashing: A new tool for file and table addressing'', 332 | \fIProceedings of the 6th Conference on Very Large Dabatases (Montreal)\fP, 333 | pp. 212-223, Very Large Database Foundation, Saratoga, Calif., 1980. 334 | .IP [Fag79] 4m 335 | R. Fagin, J. Nievergelt, N. Pippinger, and H. R. Strong, 336 | ``Extendible Hashing - A Fast Access Method for Dynamic Files'', 337 | \fIACM Trans. Database Syst.\fP, vol. 4, no.3, pp. 315-344, Sept. 1979. 338 | .IP [Wal84] 4m 339 | Rich Wales, 340 | ``Discussion of "dbm" data base system'', \fIUSENET newsgroup unix.wizards\fP, 341 | Jan. 1984. 342 | .IP [Tor87] 4m 343 | Chris Torek, 344 | ``Re: dbm.a and ndbm.a archives'', \fIUSENET newsgroup comp.unix\fP, 345 | 1987. 346 | .IP [Mar79] 4m 347 | G. N. Martin, 348 | ``Spiral Storage: Incrementally Augmentable Hash Addressed Storage'', 349 | \fITechnical Report #27\fP, University of Varwick, Coventry, U.K., 1979. 350 | .IP [Enb88] 4m 351 | R. J. Enbody and H. C. Du, 352 | ``Dynamic Hashing Schemes'',\fIACM Computing Surveys\fP, 353 | vol. 20, no. 2, pp. 85-113, June 1988. 354 | -------------------------------------------------------------------------------- /readme.txt: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | sdbm - Substitute DBM 8 | or 9 | Berkeley ndbm for Every UN*X[1] Made Simple 10 | 11 | Ozan (oz) Yigit 12 | 13 | The Guild of PD Software Toolmakers 14 | Toronto - Canada 15 | 16 | oz@nexus.yorku.ca 17 | 18 | 19 | 20 | Implementation is the sincerest form of flattery. - L. Peter 21 | Deutsch 22 | 23 | A The Clone of the ndbm library 24 | 25 | The sources accompanying this notice - sdbm - consti- 26 | tute the first public release (Dec. 1990) of a complete 27 | clone of the Berkeley UN*X ndbm library. The sdbm library is 28 | meant to clone the proven functionality of ndbm as closely 29 | as possible, including a few improvements. It is practical, 30 | easy to understand, and compatible. The sdbm library is not 31 | derived from any licensed, proprietary or copyrighted 32 | software. 33 | 34 | The sdbm implementation is based on a 1978 algorithm 35 | [Lar78] by P.-A. (Paul) Larson known as ``Dynamic Hashing''. 36 | In the course of searching for a substitute for ndbm, I pro- 37 | totyped three different external-hashing algorithms [Lar78, 38 | Fag79, Lit80] and ultimately chose Larson's algorithm as a 39 | basis of the sdbm implementation. The Bell Labs dbm (and 40 | therefore ndbm) is based on an algorithm invented by Ken 41 | Thompson, [Tho90, Tor87] and predates Larson's work. 42 | 43 | The sdbm programming interface is totally compatible 44 | with ndbm and includes a slight improvement in database ini- 45 | tialization. It is also expected to be binary-compatible 46 | under most UN*X versions that support the ndbm library. 47 | 48 | The sdbm implementation shares the shortcomings of the 49 | ndbm library, as a side effect of various simplifications to 50 | the original Larson algorithm. It does produce holes in the 51 | page file as it writes pages past the end of file. (Larson's 52 | paper include a clever solution to this problem that is a 53 | result of using the hash value directly as a block address.) 54 | On the other hand, extensive tests seem to indicate that 55 | sdbm creates fewer holes in general, and the resulting page- 56 | files are smaller. The sdbm implementation is also faster 57 | than ndbm in database creation. Unlike the ndbm, the sdbm 58 | _________________________ 59 | 60 | [1] UN*X is not a trademark of any (dis)organization. 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | - 2 - 71 | 72 | 73 | store operation will not ``wander away'' trying to split its 74 | data pages to insert a datum that cannot (due to elaborate 75 | worst-case situations) be inserted. (It will fail after a 76 | pre-defined number of attempts.) 77 | 78 | Important Compatibility Warning 79 | 80 | The sdbm and ndbm libraries cannot share databases: one 81 | cannot read the (dir/pag) database created by the other. 82 | This is due to the differences between the ndbm and sdbm 83 | algorithms[2], and the hash functions used. It is easy to 84 | convert between the dbm/ndbm databases and sdbm by ignoring 85 | the index completely: see dbd, dbu etc. 86 | 87 | 88 | Notice of Intellectual Property 89 | 90 | The entire sdbm library package, as authored by me, Ozan S. 91 | Yigit, is hereby placed in the public domain. As such, the 92 | author is not responsible for the consequences of use of 93 | this software, no matter how awful, even if they arise from 94 | defects in it. There is no expressed or implied warranty for 95 | the sdbm library. 96 | 97 | Since the sdbm library package is in the public domain, 98 | this original release or any additional public-domain 99 | releases of the modified original cannot possibly (by defin- 100 | ition) be withheld from you. Also by definition, You (singu- 101 | lar) have all the rights to this code (including the right 102 | to sell without permission, the right to hoard[3] and the 103 | right to do other icky things as you see fit) but those 104 | rights are also granted to everyone else. 105 | 106 | Please note that all previous distributions of this 107 | software contained a copyright (which is now dropped) to 108 | protect its origins and its current public domain status 109 | against any possible claims and/or challenges. 110 | 111 | Acknowledgments 112 | 113 | Many people have been very helpful and supportive. A 114 | partial list would necessarily include Rayan Zacherissen 115 | (who contributed the man page, and also hacked a MMAP 116 | _________________________ 117 | 118 | [2] Torek's discussion [Tor87] indicates that 119 | dbm/ndbm implementations use the hash value to traverse 120 | the radix trie differently than sdbm and as a result, 121 | the page indexes are generated in different order. For 122 | more information, send e-mail to the author. 123 | [3] You cannot really hoard something that is avail- 124 | able to the public at large, but try if it makes you 125 | feel any better. 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | - 3 - 137 | 138 | 139 | version of sdbm), Arnold Robbins, Chris Lewis, Bill David- 140 | sen, Henry Spencer, Geoff Collyer, Rich Salz (who got me 141 | started in the first place), Johannes Ruschein (who did the 142 | minix port) and David Tilbrook. I thank you all. 143 | 144 | Distribution Manifest and Notes 145 | 146 | This distribution of sdbm includes (at least) the following: 147 | 148 | CHANGES change log 149 | README this file. 150 | biblio a small bibliography on external hashing 151 | dba.c a crude (n/s)dbm page file analyzer 152 | dbd.c a crude (n/s)dbm page file dumper (for conversion) 153 | dbe.1 man page for dbe.c 154 | dbe.c Janick's database editor 155 | dbm.c a dbm library emulation wrapper for ndbm/sdbm 156 | dbm.h header file for the above 157 | dbu.c a crude db management utility 158 | hash.c hashing function 159 | makefile guess. 160 | pair.c page-level routines (posted earlier) 161 | pair.h header file for the above 162 | readme.ms troff source for the README file 163 | sdbm.3 man page 164 | sdbm.c the real thing 165 | sdbm.h header file for the above 166 | tune.h place for tuning & portability thingies 167 | util.c miscellaneous 168 | 169 | dbu is a simple database manipulation program[4] that 170 | tries to look like Bell Labs' cbt utility. It is currently 171 | incomplete in functionality. I use dbu to test out the rou- 172 | tines: it takes (from stdin) tab separated key/value pairs 173 | for commands like build or insert or takes keys for commands 174 | like delete or look. 175 | 176 | dbu dbmfile 177 | 178 | dba is a crude analyzer of dbm/sdbm/ndbm page files. It 179 | scans the entire page file, reporting page level statistics, 180 | and totals at the end. 181 | 182 | dbd is a crude dump program for dbm/ndbm/sdbm data- 183 | bases. It ignores the bitmap, and dumps the data pages in 184 | sequence. It can be used to create input for the dbu util- 185 | ity. Note that dbd will skip any NULLs in the key and data 186 | fields, thus is unsuitable to convert some peculiar 187 | _________________________ 188 | 189 | [4] The dbd, dba, dbu utilities are quick hacks and 190 | are not fit for production use. They were developed 191 | late one night, just to test out sdbm, and convert some 192 | databases. 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | - 4 - 203 | 204 | 205 | databases that insist in including the terminating null. 206 | 207 | I have also included a copy of the dbe (ndbm DataBase 208 | Editor) by Janick Bergeron [janick@bnr.ca] for your pleas- 209 | ure. You may find it more useful than the little dbu util- 210 | ity. 211 | 212 | dbm.[ch] is a dbm library emulation on top of ndbm (and 213 | hence suitable for sdbm). Written by Robert Elz. 214 | 215 | The sdbm library has been around in beta test for quite 216 | a long time, and from whatever little feedback I received 217 | (maybe no news is good news), I believe it has been func- 218 | tioning without any significant problems. I would, of 219 | course, appreciate all fixes and/or improvements. Portabil- 220 | ity enhancements would especially be useful. 221 | 222 | Implementation Issues 223 | 224 | Hash functions: The algorithm behind sdbm implementa- 225 | tion needs a good bit-scrambling hash function to be effec- 226 | tive. I ran into a set of constants for a simple hash func- 227 | tion that seem to help sdbm perform better than ndbm for 228 | various inputs: 229 | 230 | /* 231 | * polynomial conversion ignoring overflows 232 | * 65599 nice. 65587 even better. 233 | */ 234 | long 235 | dbm_hash(char *str, int len) { 236 | register unsigned long n = 0; 237 | 238 | while (len--) 239 | n = n * 65599 + *str++; 240 | return n; 241 | } 242 | 243 | There may be better hash functions for the purposes of 244 | dynamic hashing. Try your favorite, and check the pagefile. 245 | If it contains too many pages with too many holes, (in rela- 246 | tion to this one for example) or if sdbm simply stops work- 247 | ing (fails after SPLTMAX attempts to split) when you feed 248 | your NEWS history file to it, you probably do not have a 249 | good hashing function. If you do better (for different 250 | types of input), I would like to know about the function you 251 | use. 252 | 253 | Block sizes: It seems (from various tests on a few 254 | machines) that a page file block size PBLKSIZ of 1024 is by 255 | far the best for performance, but this also happens to limit 256 | the size of a key/value pair. Depending on your needs, you 257 | may wish to increase the page size, and also adjust PAIRMAX 258 | (the maximum size of a key/value pair allowed: should always 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | - 5 - 269 | 270 | 271 | be at least three words smaller than PBLKSIZ.) accordingly. 272 | The system-wide version of the library should probably be 273 | configured with 1024 (distribution default), as this appears 274 | to be sufficient for most common uses of sdbm. 275 | 276 | Portability 277 | 278 | This package has been tested in many different UN*Xes 279 | even including minix, and appears to be reasonably portable. 280 | This does not mean it will port easily to non-UN*X systems. 281 | 282 | Notes and Miscellaneous 283 | 284 | The sdbm is not a very complicated package, at least 285 | not after you familiarize yourself with the literature on 286 | external hashing. There are other interesting algorithms in 287 | existence that ensure (approximately) single-read access to 288 | a data value associated with any key. These are directory- 289 | less schemes such as linear hashing [Lit80] (+ Larson varia- 290 | tions), spiral storage [Mar79] or directory schemes such as 291 | extensible hashing [Fag79] by Fagin et al. I do hope these 292 | sources provide a reasonable playground for experimentation 293 | with other algorithms. See the June 1988 issue of ACM Com- 294 | puting Surveys [Enb88] for an excellent overview of the 295 | field. 296 | 297 | References 298 | 299 | 300 | [Lar78] 301 | P.-A. Larson, ``Dynamic Hashing'', BIT, vol. 18, pp. 302 | 184-201, 1978. 303 | 304 | [Tho90] 305 | Ken Thompson, private communication, Nov. 1990 306 | 307 | [Lit80] 308 | W. Litwin, `` Linear Hashing: A new tool for file and 309 | table addressing'', Proceedings of the 6th Conference on 310 | Very Large Dabatases (Montreal), pp. 212-223, Very 311 | Large Database Foundation, Saratoga, Calif., 1980. 312 | 313 | [Fag79] 314 | R. Fagin, J. Nievergelt, N. Pippinger, and H. R. 315 | Strong, ``Extendible Hashing - A Fast Access Method for 316 | Dynamic Files'', ACM Trans. Database Syst., vol. 4, 317 | no.3, pp. 315-344, Sept. 1979. 318 | 319 | [Wal84] 320 | Rich Wales, ``Discussion of "dbm" data base system'', 321 | USENET newsgroup unix.wizards, Jan. 1984. 322 | 323 | [Tor87] 324 | Chris Torek, ``Re: dbm.a and ndbm.a archives'', 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | - 6 - 335 | 336 | 337 | USENET newsgroup comp.unix, 1987. 338 | 339 | [Mar79] 340 | G. N. Martin, ``Spiral Storage: Incrementally Augment- 341 | able Hash Addressed Storage'', Technical Report #27, 342 | University of Varwick, Coventry, U.K., 1979. 343 | 344 | [Enb88] 345 | R. J. Enbody and H. C. Du, ``Dynamic Hashing 346 | Schemes'',ACM Computing Surveys, vol. 20, no. 2, pp. 347 | 85-113, June 1988. 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | -------------------------------------------------------------------------------- /sdbm.3: -------------------------------------------------------------------------------- 1 | .\" $Id: sdbm.3,v 1.2 90/12/13 13:00:57 oz Exp $ 2 | .TH SDBM 3 "1 March 1990" 3 | .SH NAME 4 | sdbm, dbm_open, dbm_prep, dbm_close, dbm_fetch, dbm_store, dbm_delete, dbm_firstkey, dbm_nextkey, dbm_hash, dbm_rdonly, dbm_error, dbm_clearerr, dbm_dirfno, dbm_pagfno \- data base subroutines 5 | .SH SYNOPSIS 6 | .nf 7 | .ft B 8 | #include 9 | .sp 10 | typedef struct { 11 | char *dptr; 12 | int dsize; 13 | } datum; 14 | .sp 15 | datum nullitem = { NULL, 0 }; 16 | .sp 17 | \s-1DBM\s0 *dbm_open(char *file, int flags, int mode) 18 | .sp 19 | \s-1DBM\s0 *dbm_prep(char *dirname, char *pagname, int flags, int mode) 20 | .sp 21 | void dbm_close(\s-1DBM\s0 *db) 22 | .sp 23 | datum dbm_fetch(\s-1DBM\s0 *db, key) 24 | .sp 25 | int dbm_store(\s-1DBM\s0 *db, datum key, datum val, int flags) 26 | .sp 27 | int dbm_delete(\s-1DBM\s0 *db, datum key) 28 | .sp 29 | datum dbm_firstkey(\s-1DBM\s0 *db) 30 | .sp 31 | datum dbm_nextkey(\s-1DBM\s0 *db) 32 | .sp 33 | long dbm_hash(char *string, int len) 34 | .sp 35 | int dbm_rdonly(\s-1DBM\s0 *db) 36 | int dbm_error(\s-1DBM\s0 *db) 37 | dbm_clearerr(\s-1DBM\s0 *db) 38 | int dbm_dirfno(\s-1DBM\s0 *db) 39 | int dbm_pagfno(\s-1DBM\s0 *db) 40 | .ft R 41 | .fi 42 | .SH DESCRIPTION 43 | .IX "database library" sdbm "" "\fLsdbm\fR" 44 | .IX dbm_open "" "\fLdbm_open\fR \(em open \fLsdbm\fR database" 45 | .IX dbm_prep "" "\fLdbm_prep\fR \(em prepare \fLsdbm\fR database" 46 | .IX dbm_close "" "\fLdbm_close\fR \(em close \fLsdbm\fR routine" 47 | .IX dbm_fetch "" "\fLdbm_fetch\fR \(em fetch \fLsdbm\fR database data" 48 | .IX dbm_store "" "\fLdbm_store\fR \(em add data to \fLsdbm\fR database" 49 | .IX dbm_delete "" "\fLdbm_delete\fR \(em remove data from \fLsdbm\fR database" 50 | .IX dbm_firstkey "" "\fLdbm_firstkey\fR \(em access \fLsdbm\fR database" 51 | .IX dbm_nextkey "" "\fLdbm_nextkey\fR \(em access \fLsdbm\fR database" 52 | .IX dbm_hash "" "\fLdbm_hash\fR \(em string hash for \fLsdbm\fR database" 53 | .IX dbm_rdonly "" "\fLdbm_rdonly\fR \(em return \fLsdbm\fR database read-only mode" 54 | .IX dbm_error "" "\fLdbm_error\fR \(em return \fLsdbm\fR database error condition" 55 | .IX dbm_clearerr "" "\fLdbm_clearerr\fR \(em clear \fLsdbm\fR database error condition" 56 | .IX dbm_dirfno "" "\fLdbm_dirfno\fR \(em return \fLsdbm\fR database bitmap file descriptor" 57 | .IX dbm_pagfno "" "\fLdbm_pagfno\fR \(em return \fLsdbm\fR database data file descriptor" 58 | .IX "database functions \(em \fLsdbm\fR" dbm_open "" \fLdbm_open\fP 59 | .IX "database functions \(em \fLsdbm\fR" dbm_prep "" \fLdbm_prep\fP 60 | .IX "database functions \(em \fLsdbm\fR" dbm_close "" \fLdbm_close\fP 61 | .IX "database functions \(em \fLsdbm\fR" dbm_fetch "" \fLdbm_fetch\fP 62 | .IX "database functions \(em \fLsdbm\fR" dbm_store "" \fLdbm_store\fP 63 | .IX "database functions \(em \fLsdbm\fR" dbm_delete "" \fLdbm_delete\fP 64 | .IX "database functions \(em \fLsdbm\fR" dbm_firstkey "" \fLdbm_firstkey\fP 65 | .IX "database functions \(em \fLsdbm\fR" dbm_nextkey "" \fLdbm_nextkey\fP 66 | .IX "database functions \(em \fLsdbm\fR" dbm_rdonly "" \fLdbm_rdonly\fP 67 | .IX "database functions \(em \fLsdbm\fR" dbm_error "" \fLdbm_error\fP 68 | .IX "database functions \(em \fLsdbm\fR" dbm_clearerr "" \fLdbm_clearerr\fP 69 | .IX "database functions \(em \fLsdbm\fR" dbm_dirfno "" \fLdbm_dirfno\fP 70 | .IX "database functions \(em \fLsdbm\fR" dbm_pagfno "" \fLdbm_pagfno\fP 71 | .LP 72 | This package allows an application to maintain a mapping of pairs 73 | in disk files. This is not to be considered a real database system, but is 74 | still useful in many simple applications built around fast retrieval of a data 75 | value from a key. This implementation uses an external hashing scheme, 76 | called Dynamic Hashing, as described by Per-Aake Larson in BIT 18 (1978) pp. 77 | 184-201. Retrieval of any item usually requires a single disk access. 78 | The application interface is compatible with the 79 | .IR ndbm (3) 80 | library. 81 | .LP 82 | An 83 | .B sdbm 84 | database is kept in two files usually given the extensions 85 | .B \.dir 86 | and 87 | .BR \.pag . 88 | The 89 | .B \.dir 90 | file contains a bitmap representing a forest of binary hash trees, the leaves 91 | of which indicate data pages in the 92 | .B \.pag 93 | file. 94 | .LP 95 | The application interface uses the 96 | .B datum 97 | structure to describe both 98 | .I keys 99 | and 100 | .IR value s. 101 | A 102 | .B datum 103 | specifies a byte sequence of 104 | .I dsize 105 | size pointed to by 106 | .IR dptr . 107 | If you use 108 | .SM ASCII 109 | strings as 110 | .IR key s 111 | or 112 | .IR value s, 113 | then you must decide whether or not to include the terminating 114 | .SM NUL 115 | byte which sometimes defines strings. Including it will require larger 116 | database files, but it will be possible to get sensible output from a 117 | .IR strings (1) 118 | command applied to the data file. 119 | .LP 120 | In order to allow a process using this package to manipulate multiple 121 | databases, the applications interface always requires a 122 | .IR handle , 123 | a 124 | .BR "DBM *" , 125 | to identify the database to be manipulated. Such a handle can be obtained 126 | from the only routines that do not require it, namely 127 | .BR dbm_open (\|) 128 | or 129 | .BR dbm_prep (\|). 130 | Either of these will open or create the two necessary files. The 131 | difference is that the latter allows explicitly naming the bitmap and data 132 | files whereas 133 | .BR dbm_open (\|) 134 | will take a base file name and call 135 | .BR dbm_prep (\|) 136 | with the default extensions. 137 | The 138 | .I flags 139 | and 140 | .I mode 141 | parameters are the same as for 142 | .BR open (2). 143 | .LP 144 | To free the resources occupied while a database handle is active, call 145 | .BR dbm_close (\|). 146 | .LP 147 | Given a handle, one can retrieve data associated with a key by using the 148 | .BR dbm_fetch (\|) 149 | routine, and associate data with a key by using the 150 | .BR dbm_store (\|) 151 | routine. 152 | .LP 153 | The values of the 154 | .I flags 155 | parameter for 156 | .BR dbm_store (\|) 157 | can be either 158 | .BR \s-1DBM_INSERT\s0 , 159 | which will not change an existing entry with the same key, or 160 | .BR \s-1DBM_REPLACE\s0 , 161 | which will replace an existing entry with the same key. 162 | Keys are unique within the database. 163 | .LP 164 | To delete a key and its associated value use the 165 | .BR dbm_delete (\|) 166 | routine. 167 | .LP 168 | To retrieve every key in the database, use a loop like: 169 | .sp 170 | .nf 171 | .ft B 172 | for (key = dbm_firstkey(db); key.dptr != NULL; key = dbm_nextkey(db)) 173 | ; 174 | .ft R 175 | .fi 176 | .LP 177 | The order of retrieval is unspecified. 178 | .LP 179 | If you determine that the performance of the database is inadequate or 180 | you notice clustering or other effects that may be due to the hashing 181 | algorithm used by this package, you can override it by supplying your 182 | own 183 | .BR dbm_hash (\|) 184 | routine. Doing so will make the database unintelligable to any other 185 | applications that do not use your specialized hash function. 186 | .sp 187 | .LP 188 | The following macros are defined in the header file: 189 | .IP 190 | .BR dbm_rdonly (\|) 191 | returns true if the database has been opened read\-only. 192 | .IP 193 | .BR dbm_error (\|) 194 | returns true if an I/O error has occurred. 195 | .IP 196 | .BR dbm_clearerr (\|) 197 | allows you to clear the error flag if you think you know what the error 198 | was and insist on ignoring it. 199 | .IP 200 | .BR dbm_dirfno (\|) 201 | returns the file descriptor associated with the bitmap file. 202 | .IP 203 | .BR dbm_pagfno (\|) 204 | returns the file descriptor associated with the data file. 205 | .SH SEE ALSO 206 | .IR open (2). 207 | .SH DIAGNOSTICS 208 | Functions that return a 209 | .B "DBM *" 210 | handle will use 211 | .SM NULL 212 | to indicate an error. 213 | Functions that return an 214 | .B int 215 | will use \-1 to indicate an error. The normal return value in that case is 0. 216 | Functions that return a 217 | .B datum 218 | will return 219 | .B nullitem 220 | to indicate an error. 221 | .LP 222 | As a special case of 223 | .BR dbm_store (\|), 224 | if it is called with the 225 | .B \s-1DBM_INSERT\s0 226 | flag and the key already exists in the database, the return value will be 1. 227 | .LP 228 | In general, if a function parameter is invalid, 229 | .B errno 230 | will be set to 231 | .BR \s-1EINVAL\s0 . 232 | If a write operation is requested on a read-only database, 233 | .B errno 234 | will be set to 235 | .BR \s-1ENOPERM\s0 . 236 | If a memory allocation (using 237 | .IR malloc (3)) 238 | failed, 239 | .B errno 240 | will be set to 241 | .BR \s-1ENOMEM\s0 . 242 | For I/O operation failures 243 | .B errno 244 | will contain the value set by the relevant failed system call, either 245 | .IR read (2), 246 | .IR write (2), 247 | or 248 | .IR lseek (2). 249 | .SH AUTHOR 250 | .IP "Ozan S. Yigit" (oz@nexus.yorku.ca) 251 | .SH BUGS 252 | The sum of key and value data sizes must not exceed 253 | .B \s-1PAIRMAX\s0 254 | (1008 bytes). 255 | .LP 256 | The sum of the key and value data sizes where several keys hash to the 257 | same value must fit within one bitmap page. 258 | .LP 259 | The 260 | .B \.pag 261 | file will contain holes, so its apparent size is larger than its contents. 262 | When copied through the filesystem the holes will be filled. 263 | .LP 264 | The contents of 265 | .B datum 266 | values returned are in volatile storage. If you want to retain the values 267 | pointed to, you must copy them immediately before another call to this package. 268 | .LP 269 | The only safe way for multiple processes to (read and) update a database at 270 | the same time, is to implement a private locking scheme outside this package 271 | and open and close the database between lock acquisitions. It is safe for 272 | multiple processes to concurrently access a database read-only. 273 | .SH APPLICATIONS PORTABILITY 274 | For complete source code compatibility with the Berkeley Unix 275 | .IR ndbm (3) 276 | library, the 277 | .B sdbm.h 278 | header file should be installed in 279 | .BR /usr/include/ndbm.h . 280 | .LP 281 | The 282 | .B nullitem 283 | data item, and the 284 | .BR dbm_prep (\|), 285 | .BR dbm_hash (\|), 286 | .BR dbm_rdonly (\|), 287 | .BR dbm_dirfno (\|), 288 | and 289 | .BR dbm_pagfno (\|) 290 | functions are unique to this package. 291 | -------------------------------------------------------------------------------- /sdbm.bun: -------------------------------------------------------------------------------- 1 | 2 | : to unbundle, sh this file 3 | echo x - CHANGES 1>&2 4 | sed 's/^X//' >CHANGES <<'@@@End of CHANGES' 5 | XJune 1997: 6 | X 7 | Xo fixed a long-hidden memmove bug in delpair that causes database 8 | X corruption in MEMMOVE versions of sdbm. [sdbm defaults to duff's 9 | X device to move data, so memmove version is almost never used.] 10 | X 11 | XChanges from the earlier BETA releases. 12 | X 13 | Xo dbm_prep does everything now, so dbm_open is just a simple 14 | X wrapper that builds the default filenames. dbm_prep no longer 15 | X requires a (DBM *) db parameter: it allocates one itself. It 16 | X returns (DBM *) db or (DBM *) NULL. 17 | X 18 | Xo makroom is now reliable. In the common-case optimization of the page 19 | X split, the page into which the incoming key/value pair is to be inserted 20 | X is write-deferred (if the split is successful), thereby saving a cosly 21 | X write. BUT, if the split does not make enough room (unsuccessful), the 22 | X deferred page is written out, as the failure-window is now dependent on 23 | X the number of split attempts. 24 | X 25 | Xo if -DDUFF is defined, hash function will also use the DUFF construct. 26 | X This may look like a micro-performance tweak (maybe it is), but in fact, 27 | X the hash function is the third most-heavily used function, after read 28 | X and write. 29 | @@@End of CHANGES 30 | echo x - COMPARE 1>&2 31 | sed 's/^X//' >COMPARE <<'@@@End of COMPARE' 32 | X 33 | XScript started on Thu Sep 28 15:41:06 1989 34 | X% uname -a 35 | Xtitan titan 4_0 UMIPS mips 36 | X% make all x-dbm 37 | X cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c dbm.c 38 | X cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c sdbm.c 39 | X cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c pair.c 40 | X cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c hash.c 41 | X ar cr libsdbm.a sdbm.o pair.o hash.o 42 | X ranlib libsdbm.a 43 | X cc -o dbm dbm.o libsdbm.a 44 | X cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c dba.c 45 | X cc -o dba dba.o 46 | X cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c dbd.c 47 | X cc -o dbd dbd.o 48 | X cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -o x-dbm dbm.o 49 | X% 50 | X% 51 | X% wc history 52 | X 65110 218344 3204883 history 53 | X% 54 | X% /bin/time dbm build foo &2 122 | sed 's/^X//' >biblio <<'@@@End of biblio' 123 | X%A R. J. Enbody 124 | X%A H. C. Du 125 | X%T Dynamic Hashing Schemes 126 | X%J ACM Computing Surveys 127 | X%V 20 128 | X%N 2 129 | X%D June 1988 130 | X%P 85-113 131 | X%K surveys 132 | X 133 | X%A P.-A. Larson 134 | X%T Dynamic Hashing 135 | X%J BIT 136 | X%V 18 137 | X%P 184-201 138 | X%D 1978 139 | X%K dynamic 140 | X 141 | X%A W. Litwin 142 | X%T Linear Hashing: A new tool for file and table addressing 143 | X%J Proceedings of the 6th Conference on Very Large Dabatases (Montreal) 144 | X%I Very Large Database Foundation 145 | X%C Saratoga, Calif. 146 | X%P 212-223 147 | X%D 1980 148 | X%K linear 149 | X 150 | X%A R. Fagin 151 | X%A J. Nievergelt 152 | X%A N. Pippinger 153 | X%A H. R. Strong 154 | X%T Extendible Hashing - A Fast Access Method for Dynamic Files 155 | X%J ACM Trans. Database Syst. 156 | X%V 4 157 | X%N 3 158 | X%D Sept. 1979 159 | X%P 315-344 160 | X%K extend 161 | X 162 | X%A G. N. Martin 163 | X%T Spiral Storage: Incrementally Augmentable Hash Addressed Storage 164 | X%J Technical Report #27 165 | X%I University of Varwick 166 | X%C Coventry, U.K. 167 | X%D 1979 168 | X%K spiral 169 | X 170 | X%A Chris Torek 171 | X%T Re: dbm.a and ndbm.a archives 172 | X%B USENET newsgroup comp.unix 173 | X%D 1987 174 | X%K torek 175 | X 176 | X%A Rich Wales 177 | X%T Discusson of "dbm" data base system 178 | X%B USENET newsgroup unix.wizards 179 | X%D Jan. 1984 180 | X%K rich 181 | X 182 | X 183 | X 184 | X 185 | X 186 | X 187 | @@@End of biblio 188 | echo x - dba.c 1>&2 189 | sed 's/^X//' >dba.c <<'@@@End of dba.c' 190 | X/* 191 | X * dba dbm analysis/recovery 192 | X */ 193 | X 194 | X#include 195 | X#include 196 | X#include "sdbm.h" 197 | X 198 | Xchar *progname; 199 | Xextern void oops(); 200 | X 201 | Xint 202 | Xmain(argc, argv) 203 | Xchar **argv; 204 | X{ 205 | X int n; 206 | X char *p; 207 | X char *name; 208 | X int pagf; 209 | X 210 | X progname = argv[0]; 211 | X 212 | X if (p = argv[1]) { 213 | X name = (char *) malloc((n = strlen(p)) + 5); 214 | X strcpy(name, p); 215 | X strcpy(name + n, ".pag"); 216 | X 217 | X if ((pagf = open(name, O_RDONLY)) < 0) 218 | X oops("cannot open %s.", name); 219 | X 220 | X sdump(pagf); 221 | X } 222 | X else 223 | X oops("usage: %s dbname", progname); 224 | X 225 | X return 0; 226 | X} 227 | X 228 | Xsdump(pagf) 229 | Xint pagf; 230 | X{ 231 | X register b; 232 | X register n = 0; 233 | X register t = 0; 234 | X register o = 0; 235 | X register e; 236 | X char pag[PBLKSIZ]; 237 | X 238 | X while ((b = read(pagf, pag, PBLKSIZ)) > 0) { 239 | X printf("#%d: ", n); 240 | X if (!okpage(pag)) 241 | X printf("bad\n"); 242 | X else { 243 | X printf("ok. "); 244 | X if (!(e = pagestat(pag))) 245 | X o++; 246 | X else 247 | X t += e; 248 | X } 249 | X n++; 250 | X } 251 | X 252 | X if (b == 0) 253 | X printf("%d pages (%d holes): %d entries\n", n, o, t); 254 | X else 255 | X oops("read failed: block %d", n); 256 | X} 257 | X 258 | Xpagestat(pag) 259 | Xchar *pag; 260 | X{ 261 | X register n; 262 | X register free; 263 | X register short *ino = (short *) pag; 264 | X 265 | X if (!(n = ino[0])) 266 | X printf("no entries.\n"); 267 | X else { 268 | X free = ino[n] - (n + 1) * sizeof(short); 269 | X printf("%3d entries %2d%% used free %d.\n", 270 | X n / 2, ((PBLKSIZ - free) * 100) / PBLKSIZ, free); 271 | X } 272 | X return n / 2; 273 | X} 274 | @@@End of dba.c 275 | echo x - dbd.c 1>&2 276 | sed 's/^X//' >dbd.c <<'@@@End of dbd.c' 277 | X/* 278 | X * dbd - dump a dbm data file 279 | X */ 280 | X 281 | X#include 282 | X#include 283 | X#include "sdbm.h" 284 | X 285 | Xchar *progname; 286 | Xextern void oops(); 287 | X 288 | X 289 | X#define empty(page) (((short *) page)[0] == 0) 290 | X 291 | Xint 292 | Xmain(argc, argv) 293 | Xchar **argv; 294 | X{ 295 | X int n; 296 | X char *p; 297 | X char *name; 298 | X int pagf; 299 | X 300 | X progname = argv[0]; 301 | X 302 | X if (p = argv[1]) { 303 | X name = (char *) malloc((n = strlen(p)) + 5); 304 | X strcpy(name, p); 305 | X strcpy(name + n, ".pag"); 306 | X 307 | X if ((pagf = open(name, O_RDONLY)) < 0) 308 | X oops("cannot open %s.", name); 309 | X 310 | X sdump(pagf); 311 | X } 312 | X else 313 | X oops("usage: %s dbname", progname); 314 | X return 0; 315 | X} 316 | X 317 | Xsdump(pagf) 318 | Xint pagf; 319 | X{ 320 | X register r; 321 | X register n = 0; 322 | X register o = 0; 323 | X char pag[PBLKSIZ]; 324 | X 325 | X while ((r = read(pagf, pag, PBLKSIZ)) > 0) { 326 | X if (!okpage(pag)) 327 | X fprintf(stderr, "%d: bad page.\n", n); 328 | X else if (empty(pag)) 329 | X o++; 330 | X else 331 | X dispage(pag); 332 | X n++; 333 | X } 334 | X 335 | X if (r == 0) 336 | X fprintf(stderr, "%d pages (%d holes).\n", n, o); 337 | X else 338 | X oops("read failed: block %d", n); 339 | X} 340 | X 341 | X 342 | X#ifdef OLD 343 | Xdispage(pag) 344 | Xchar *pag; 345 | X{ 346 | X register i, n; 347 | X register off; 348 | X register short *ino = (short *) pag; 349 | X 350 | X off = PBLKSIZ; 351 | X for (i = 1; i < ino[0]; i += 2) { 352 | X printf("\t[%d]: ", ino[i]); 353 | X for (n = ino[i]; n < off; n++) 354 | X putchar(pag[n]); 355 | X putchar(' '); 356 | X off = ino[i]; 357 | X printf("[%d]: ", ino[i + 1]); 358 | X for (n = ino[i + 1]; n < off; n++) 359 | X putchar(pag[n]); 360 | X off = ino[i + 1]; 361 | X putchar('\n'); 362 | X } 363 | X} 364 | X#else 365 | Xdispage(pag) 366 | Xchar *pag; 367 | X{ 368 | X register i, n; 369 | X register off; 370 | X register short *ino = (short *) pag; 371 | X 372 | X off = PBLKSIZ; 373 | X for (i = 1; i < ino[0]; i += 2) { 374 | X for (n = ino[i]; n < off; n++) 375 | X if (pag[n] != 0) 376 | X putchar(pag[n]); 377 | X putchar('\t'); 378 | X off = ino[i]; 379 | X for (n = ino[i + 1]; n < off; n++) 380 | X if (pag[n] != 0) 381 | X putchar(pag[n]); 382 | X putchar('\n'); 383 | X off = ino[i + 1]; 384 | X } 385 | X} 386 | X#endif 387 | @@@End of dbd.c 388 | echo x - dbe.1 1>&2 389 | sed 's/^X//' >dbe.1 <<'@@@End of dbe.1' 390 | X.TH dbe 1 "ndbm(3) EDITOR" 391 | X.SH NAME 392 | Xdbe \- Edit a ndbm(3) database 393 | X.SH USAGE 394 | Xdbe [-m r|w|rw] [-crtvx] -a|-d|-f|-F|-s [ []] 395 | X.SH DESCRIPTION 396 | X\fIdbme\fP operates on ndbm(3) databases. 397 | XIt can be used to create them, look at them or change them. 398 | XWhen specifying the value of a key or the content of its associated entry, 399 | X\\nnn, \\0, \\n, \\t, \\f and \\r are interpreted as usual. 400 | XWhen displaying key/content pairs, non-printable characters are displayed 401 | Xusing the \\nnn notation. 402 | X.SH OPTIONS 403 | X.IP -a 404 | XList all entries in the database. 405 | X.IP -c 406 | XCreate the database if it does not exist. 407 | X.IP -d 408 | XDelete the entry associated with the specified key. 409 | X.IP -f 410 | XFetch and display the entry associated with the specified key. 411 | X.IP -F 412 | XFetch and display all the entries whose key match the specified 413 | Xregular-expression 414 | X.IP "-m r|w|rw" 415 | XOpen the database in read-only, write-only or read-write mode 416 | X.IP -r 417 | XReplace the entry associated with the specified key if it already exists. 418 | XSee option -s. 419 | X.IP -s 420 | XStore an entry under a specific key. 421 | XAn error occurs if the key already exists and the option -r was not specified. 422 | X.IP -t 423 | XRe-initialize the database before executing the command. 424 | X.IP -v 425 | XVerbose mode. 426 | XConfirm stores and deletions. 427 | X.IP -x 428 | XIf option -x is used with option -c, then if the database already exists, 429 | Xan error occurs. 430 | XThis can be used to implement a simple exclusive access locking mechanism. 431 | X.SH SEE ALSO 432 | Xndbm(3) 433 | X.SH AUTHOR 434 | Xjanick@bnr.ca 435 | X 436 | @@@End of dbe.1 437 | echo x - dbe.c 1>&2 438 | sed 's/^X//' >dbe.c <<'@@@End of dbe.c' 439 | X#include 440 | X#ifndef VMS 441 | X#include 442 | X#include 443 | X#else 444 | X#include "file.h" 445 | X#include "ndbm.h" 446 | X#endif 447 | X#include 448 | X 449 | X/***************************************************************************\ 450 | X** ** 451 | X** Function name: getopt() ** 452 | X** Author: Henry Spencer, UofT ** 453 | X** Coding date: 84/04/28 ** 454 | X** ** 455 | X** Description: ** 456 | X** ** 457 | X** Parses argv[] for arguments. ** 458 | X** Works with Whitesmith's C compiler. ** 459 | X** ** 460 | X** Inputs - The number of arguments ** 461 | X** - The base address of the array of arguments ** 462 | X** - A string listing the valid options (':' indicates an ** 463 | X** argument to the preceding option is required, a ';' ** 464 | X** indicates an argument to the preceding option is optional) ** 465 | X** ** 466 | X** Outputs - Returns the next option character, ** 467 | X** '?' for non '-' arguments ** 468 | X** or ':' when there is no more arguments. ** 469 | X** ** 470 | X** Side Effects + The argument to an option is pointed to by 'optarg' ** 471 | X** ** 472 | X***************************************************************************** 473 | X** ** 474 | X** REVISION HISTORY: ** 475 | X** ** 476 | X** DATE NAME DESCRIPTION ** 477 | X** YY/MM/DD ------------------ ------------------------------------ ** 478 | X** 88/10/20 Janick Bergeron Returns '?' on unamed arguments ** 479 | X** returns '!' on unknown options ** 480 | X** and 'EOF' only when exhausted. ** 481 | X** 88/11/18 Janick Bergeron Return ':' when no more arguments ** 482 | X** 89/08/11 Janick Bergeron Optional optarg when ';' in optstring ** 483 | X** ** 484 | X\***************************************************************************/ 485 | X 486 | Xchar *optarg; /* Global argument pointer. */ 487 | X 488 | X#ifdef VMS 489 | X#define index strchr 490 | X#endif 491 | X 492 | Xchar 493 | Xgetopt(argc, argv, optstring) 494 | Xint argc; 495 | Xchar **argv; 496 | Xchar *optstring; 497 | X{ 498 | X register int c; 499 | X register char *place; 500 | X extern char *index(); 501 | X static int optind = 0; 502 | X static char *scan = NULL; 503 | X 504 | X optarg = NULL; 505 | X 506 | X if (scan == NULL || *scan == '\0') { 507 | X 508 | X if (optind == 0) 509 | X optind++; 510 | X if (optind >= argc) 511 | X return ':'; 512 | X 513 | X optarg = place = argv[optind++]; 514 | X if (place[0] != '-' || place[1] == '\0') 515 | X return '?'; 516 | X if (place[1] == '-' && place[2] == '\0') 517 | X return '?'; 518 | X scan = place + 1; 519 | X } 520 | X 521 | X c = *scan++; 522 | X place = index(optstring, c); 523 | X if (place == NULL || c == ':' || c == ';') { 524 | X 525 | X (void) fprintf(stderr, "%s: unknown option %c\n", argv[0], c); 526 | X scan = NULL; 527 | X return '!'; 528 | X } 529 | X if (*++place == ':') { 530 | X 531 | X if (*scan != '\0') { 532 | X 533 | X optarg = scan; 534 | X scan = NULL; 535 | X 536 | X } 537 | X else { 538 | X 539 | X if (optind >= argc) { 540 | X 541 | X (void) fprintf(stderr, "%s: %c requires an argument\n", 542 | X argv[0], c); 543 | X return '!'; 544 | X } 545 | X optarg = argv[optind]; 546 | X optind++; 547 | X } 548 | X } 549 | X else if (*place == ';') { 550 | X 551 | X if (*scan != '\0') { 552 | X 553 | X optarg = scan; 554 | X scan = NULL; 555 | X 556 | X } 557 | X else { 558 | X 559 | X if (optind >= argc || *argv[optind] == '-') 560 | X optarg = NULL; 561 | X else { 562 | X optarg = argv[optind]; 563 | X optind++; 564 | X } 565 | X } 566 | X } 567 | X return c; 568 | X} 569 | X 570 | X 571 | Xvoid 572 | Xprint_datum(db) 573 | Xdatum db; 574 | X{ 575 | X int i; 576 | X 577 | X putchar('"'); 578 | X for (i = 0; i < db.dsize; i++) { 579 | X if (isprint(db.dptr[i])) 580 | X putchar(db.dptr[i]); 581 | X else { 582 | X putchar('\\'); 583 | X putchar('0' + ((db.dptr[i] >> 6) & 0x07)); 584 | X putchar('0' + ((db.dptr[i] >> 3) & 0x07)); 585 | X putchar('0' + (db.dptr[i] & 0x07)); 586 | X } 587 | X } 588 | X putchar('"'); 589 | X} 590 | X 591 | X 592 | Xdatum 593 | Xread_datum(s) 594 | Xchar *s; 595 | X{ 596 | X datum db; 597 | X char *p; 598 | X int i; 599 | X 600 | X db.dsize = 0; 601 | X db.dptr = (char *) malloc(strlen(s) * sizeof(char)); 602 | X for (p = db.dptr; *s != '\0'; p++, db.dsize++, s++) { 603 | X if (*s == '\\') { 604 | X if (*++s == 'n') 605 | X *p = '\n'; 606 | X else if (*s == 'r') 607 | X *p = '\r'; 608 | X else if (*s == 'f') 609 | X *p = '\f'; 610 | X else if (*s == 't') 611 | X *p = '\t'; 612 | X else if (isdigit(*s) && isdigit(*(s + 1)) && isdigit(*(s + 2))) { 613 | X i = (*s++ - '0') << 6; 614 | X i |= (*s++ - '0') << 3; 615 | X i |= *s - '0'; 616 | X *p = i; 617 | X } 618 | X else if (*s == '0') 619 | X *p = '\0'; 620 | X else 621 | X *p = *s; 622 | X } 623 | X else 624 | X *p = *s; 625 | X } 626 | X 627 | X return db; 628 | X} 629 | X 630 | X 631 | Xchar * 632 | Xkey2s(db) 633 | Xdatum db; 634 | X{ 635 | X char *buf; 636 | X char *p1, *p2; 637 | X 638 | X buf = (char *) malloc((db.dsize + 1) * sizeof(char)); 639 | X for (p1 = buf, p2 = db.dptr; *p2 != '\0'; *p1++ = *p2++); 640 | X *p1 = '\0'; 641 | X return buf; 642 | X} 643 | X 644 | X 645 | Xmain(argc, argv) 646 | Xint argc; 647 | Xchar **argv; 648 | X{ 649 | X typedef enum { 650 | X YOW, FETCH, STORE, DELETE, SCAN, REGEXP 651 | X } commands; 652 | X char opt; 653 | X int flags; 654 | X int giveusage = 0; 655 | X int verbose = 0; 656 | X commands what = YOW; 657 | X char *comarg[3]; 658 | X int st_flag = DBM_INSERT; 659 | X int argn; 660 | X DBM *db; 661 | X datum key; 662 | X datum content; 663 | X 664 | X flags = O_RDWR; 665 | X argn = 0; 666 | X 667 | X while ((opt = getopt(argc, argv, "acdfFm:rstvx")) != ':') { 668 | X switch (opt) { 669 | X case 'a': 670 | X what = SCAN; 671 | X break; 672 | X case 'c': 673 | X flags |= O_CREAT; 674 | X break; 675 | X case 'd': 676 | X what = DELETE; 677 | X break; 678 | X case 'f': 679 | X what = FETCH; 680 | X break; 681 | X case 'F': 682 | X what = REGEXP; 683 | X break; 684 | X case 'm': 685 | X flags &= ~(000007); 686 | X if (strcmp(optarg, "r") == 0) 687 | X flags |= O_RDONLY; 688 | X else if (strcmp(optarg, "w") == 0) 689 | X flags |= O_WRONLY; 690 | X else if (strcmp(optarg, "rw") == 0) 691 | X flags |= O_RDWR; 692 | X else { 693 | X fprintf(stderr, "Invalid mode: \"%s\"\n", optarg); 694 | X giveusage = 1; 695 | X } 696 | X break; 697 | X case 'r': 698 | X st_flag = DBM_REPLACE; 699 | X break; 700 | X case 's': 701 | X what = STORE; 702 | X break; 703 | X case 't': 704 | X flags |= O_TRUNC; 705 | X break; 706 | X case 'v': 707 | X verbose = 1; 708 | X break; 709 | X case 'x': 710 | X flags |= O_EXCL; 711 | X break; 712 | X case '!': 713 | X giveusage = 1; 714 | X break; 715 | X case '?': 716 | X if (argn < 3) 717 | X comarg[argn++] = optarg; 718 | X else { 719 | X fprintf(stderr, "Too many arguments.\n"); 720 | X giveusage = 1; 721 | X } 722 | X break; 723 | X } 724 | X } 725 | X 726 | X if (giveusage | what == YOW | argn < 1) { 727 | X fprintf(stderr, "Usage: %s databse [-m r|w|rw] [-crtx] -a|-d|-f|-F|-s [key [content]]\n", argv[0]); 728 | X exit(-1); 729 | X } 730 | X 731 | X if ((db = dbm_open(comarg[0], flags, 0777)) == NULL) { 732 | X fprintf(stderr, "Error opening database \"%s\"\n", comarg[0]); 733 | X exit(-1); 734 | X } 735 | X 736 | X if (argn > 1) 737 | X key = read_datum(comarg[1]); 738 | X if (argn > 2) 739 | X content = read_datum(comarg[2]); 740 | X 741 | X switch (what) { 742 | X 743 | X case SCAN: 744 | X key = dbm_firstkey(db); 745 | X if (dbm_error(db)) { 746 | X fprintf(stderr, "Error when fetching first key\n"); 747 | X goto db_exit; 748 | X } 749 | X while (key.dptr != NULL) { 750 | X content = dbm_fetch(db, key); 751 | X if (dbm_error(db)) { 752 | X fprintf(stderr, "Error when fetching "); 753 | X print_datum(key); 754 | X printf("\n"); 755 | X goto db_exit; 756 | X } 757 | X print_datum(key); 758 | X printf(": "); 759 | X print_datum(content); 760 | X printf("\n"); 761 | X if (dbm_error(db)) { 762 | X fprintf(stderr, "Error when fetching next key\n"); 763 | X goto db_exit; 764 | X } 765 | X key = dbm_nextkey(db); 766 | X } 767 | X break; 768 | X 769 | X case REGEXP: 770 | X if (argn < 2) { 771 | X fprintf(stderr, "Missing regular expression.\n"); 772 | X goto db_exit; 773 | X } 774 | X if (re_comp(comarg[1])) { 775 | X fprintf(stderr, "Invalid regular expression\n"); 776 | X goto db_exit; 777 | X } 778 | X key = dbm_firstkey(db); 779 | X if (dbm_error(db)) { 780 | X fprintf(stderr, "Error when fetching first key\n"); 781 | X goto db_exit; 782 | X } 783 | X while (key.dptr != NULL) { 784 | X if (re_exec(key2s(key))) { 785 | X content = dbm_fetch(db, key); 786 | X if (dbm_error(db)) { 787 | X fprintf(stderr, "Error when fetching "); 788 | X print_datum(key); 789 | X printf("\n"); 790 | X goto db_exit; 791 | X } 792 | X print_datum(key); 793 | X printf(": "); 794 | X print_datum(content); 795 | X printf("\n"); 796 | X if (dbm_error(db)) { 797 | X fprintf(stderr, "Error when fetching next key\n"); 798 | X goto db_exit; 799 | X } 800 | X } 801 | X key = dbm_nextkey(db); 802 | X } 803 | X break; 804 | X 805 | X case FETCH: 806 | X if (argn < 2) { 807 | X fprintf(stderr, "Missing fetch key.\n"); 808 | X goto db_exit; 809 | X } 810 | X content = dbm_fetch(db, key); 811 | X if (dbm_error(db)) { 812 | X fprintf(stderr, "Error when fetching "); 813 | X print_datum(key); 814 | X printf("\n"); 815 | X goto db_exit; 816 | X } 817 | X if (content.dptr == NULL) { 818 | X fprintf(stderr, "Cannot find "); 819 | X print_datum(key); 820 | X printf("\n"); 821 | X goto db_exit; 822 | X } 823 | X print_datum(key); 824 | X printf(": "); 825 | X print_datum(content); 826 | X printf("\n"); 827 | X break; 828 | X 829 | X case DELETE: 830 | X if (argn < 2) { 831 | X fprintf(stderr, "Missing delete key.\n"); 832 | X goto db_exit; 833 | X } 834 | X if (dbm_delete(db, key) || dbm_error(db)) { 835 | X fprintf(stderr, "Error when deleting "); 836 | X print_datum(key); 837 | X printf("\n"); 838 | X goto db_exit; 839 | X } 840 | X if (verbose) { 841 | X print_datum(key); 842 | X printf(": DELETED\n"); 843 | X } 844 | X break; 845 | X 846 | X case STORE: 847 | X if (argn < 3) { 848 | X fprintf(stderr, "Missing key and/or content.\n"); 849 | X goto db_exit; 850 | X } 851 | X if (dbm_store(db, key, content, st_flag) || dbm_error(db)) { 852 | X fprintf(stderr, "Error when storing "); 853 | X print_datum(key); 854 | X printf("\n"); 855 | X goto db_exit; 856 | X } 857 | X if (verbose) { 858 | X print_datum(key); 859 | X printf(": "); 860 | X print_datum(content); 861 | X printf(" STORED\n"); 862 | X } 863 | X break; 864 | X } 865 | X 866 | Xdb_exit: 867 | X dbm_clearerr(db); 868 | X dbm_close(db); 869 | X if (dbm_error(db)) { 870 | X fprintf(stderr, "Error closing database \"%s\"\n", comarg[0]); 871 | X exit(-1); 872 | X } 873 | X} 874 | @@@End of dbe.c 875 | echo x - dbm.c 1>&2 876 | sed 's/^X//' >dbm.c <<'@@@End of dbm.c' 877 | X/* 878 | X * Copyright (c) 1985 The Regents of the University of California. 879 | X * All rights reserved. 880 | X * 881 | X * Redistribution and use in source and binary forms are permitted 882 | X * provided that the above copyright notice and this paragraph are 883 | X * duplicated in all such forms and that any documentation, 884 | X * advertising materials, and other materials related to such 885 | X * distribution and use acknowledge that the software was developed 886 | X * by the University of California, Berkeley. The name of the 887 | X * University may not be used to endorse or promote products derived 888 | X * from this software without specific prior written permission. 889 | X * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 890 | X * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 891 | X * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 892 | X */ 893 | X 894 | X#ifndef lint 895 | Xstatic char sccsid[] = "@(#)dbm.c 5.4 (Berkeley) 5/24/89"; 896 | X#endif /* not lint */ 897 | X 898 | X#include "dbm.h" 899 | X 900 | X#define NODB ((DBM *)0) 901 | X 902 | Xstatic DBM *cur_db = NODB; 903 | X 904 | Xstatic char no_db[] = "dbm: no open database\n"; 905 | X 906 | Xdbminit(file) 907 | X char *file; 908 | X{ 909 | X if (cur_db != NODB) 910 | X dbm_close(cur_db); 911 | X 912 | X cur_db = dbm_open(file, 2, 0); 913 | X if (cur_db == NODB) { 914 | X cur_db = dbm_open(file, 0, 0); 915 | X if (cur_db == NODB) 916 | X return (-1); 917 | X } 918 | X return (0); 919 | X} 920 | X 921 | Xlong 922 | Xforder(key) 923 | Xdatum key; 924 | X{ 925 | X if (cur_db == NODB) { 926 | X printf(no_db); 927 | X return (0L); 928 | X } 929 | X return (dbm_forder(cur_db, key)); 930 | X} 931 | X 932 | Xdatum 933 | Xfetch(key) 934 | Xdatum key; 935 | X{ 936 | X datum item; 937 | X 938 | X if (cur_db == NODB) { 939 | X printf(no_db); 940 | X item.dptr = 0; 941 | X return (item); 942 | X } 943 | X return (dbm_fetch(cur_db, key)); 944 | X} 945 | X 946 | Xdelete(key) 947 | Xdatum key; 948 | X{ 949 | X if (cur_db == NODB) { 950 | X printf(no_db); 951 | X return (-1); 952 | X } 953 | X if (dbm_rdonly(cur_db)) 954 | X return (-1); 955 | X return (dbm_delete(cur_db, key)); 956 | X} 957 | X 958 | Xstore(key, dat) 959 | Xdatum key, dat; 960 | X{ 961 | X if (cur_db == NODB) { 962 | X printf(no_db); 963 | X return (-1); 964 | X } 965 | X if (dbm_rdonly(cur_db)) 966 | X return (-1); 967 | X 968 | X return (dbm_store(cur_db, key, dat, DBM_REPLACE)); 969 | X} 970 | X 971 | Xdatum 972 | Xfirstkey() 973 | X{ 974 | X datum item; 975 | X 976 | X if (cur_db == NODB) { 977 | X printf(no_db); 978 | X item.dptr = 0; 979 | X return (item); 980 | X } 981 | X return (dbm_firstkey(cur_db)); 982 | X} 983 | X 984 | Xdatum 985 | Xnextkey(key) 986 | Xdatum key; 987 | X{ 988 | X datum item; 989 | X 990 | X if (cur_db == NODB) { 991 | X printf(no_db); 992 | X item.dptr = 0; 993 | X return (item); 994 | X } 995 | X return (dbm_nextkey(cur_db, key)); 996 | X} 997 | @@@End of dbm.c 998 | echo x - dbm.h 1>&2 999 | sed 's/^X//' >dbm.h <<'@@@End of dbm.h' 1000 | X/* 1001 | X * Copyright (c) 1983 The Regents of the University of California. 1002 | X * All rights reserved. 1003 | X * 1004 | X * Redistribution and use in source and binary forms are permitted 1005 | X * provided that the above copyright notice and this paragraph are 1006 | X * duplicated in all such forms and that any documentation, 1007 | X * advertising materials, and other materials related to such 1008 | X * distribution and use acknowledge that the software was developed 1009 | X * by the University of California, Berkeley. The name of the 1010 | X * University may not be used to endorse or promote products derived 1011 | X * from this software without specific prior written permission. 1012 | X * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR 1013 | X * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED 1014 | X * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. 1015 | X * 1016 | X * @(#)dbm.h 5.2 (Berkeley) 5/24/89 1017 | X */ 1018 | X 1019 | X#ifndef NULL 1020 | X/* 1021 | X * this is lunacy, we no longer use it (and never should have 1022 | X * unconditionally defined it), but, this whole file is for 1023 | X * backwards compatability - someone may rely on this. 1024 | X */ 1025 | X#define NULL ((char *) 0) 1026 | X#endif 1027 | X 1028 | X#include 1029 | X 1030 | Xdatum fetch(); 1031 | Xdatum firstkey(); 1032 | Xdatum nextkey(); 1033 | @@@End of dbm.h 1034 | echo x - dbu.c 1>&2 1035 | sed 's/^X//' >dbu.c <<'@@@End of dbu.c' 1036 | X#include 1037 | X#include 1038 | X#ifdef SDBM 1039 | X#include "sdbm.h" 1040 | X#else 1041 | X#include 1042 | X#endif 1043 | X#include 1044 | X 1045 | X#ifdef BSD42 1046 | X#define strchr index 1047 | X#endif 1048 | X 1049 | Xextern int getopt(); 1050 | Xextern char *strchr(); 1051 | Xextern void oops(); 1052 | X 1053 | Xchar *progname; 1054 | X 1055 | Xstatic int rflag; 1056 | Xstatic char *usage = "%s [-R] cat | look |... dbmname"; 1057 | X 1058 | X#define DERROR 0 1059 | X#define DLOOK 1 1060 | X#define DINSERT 2 1061 | X#define DDELETE 3 1062 | X#define DCAT 4 1063 | X#define DBUILD 5 1064 | X#define DPRESS 6 1065 | X#define DCREAT 7 1066 | X 1067 | X#define LINEMAX 8192 1068 | X 1069 | Xtypedef struct { 1070 | X char *sname; 1071 | X int scode; 1072 | X int flags; 1073 | X} cmd; 1074 | X 1075 | Xstatic cmd cmds[] = { 1076 | X 1077 | X "fetch", DLOOK, O_RDONLY, 1078 | X "get", DLOOK, O_RDONLY, 1079 | X "look", DLOOK, O_RDONLY, 1080 | X "add", DINSERT, O_RDWR, 1081 | X "insert", DINSERT, O_RDWR, 1082 | X "store", DINSERT, O_RDWR, 1083 | X "delete", DDELETE, O_RDWR, 1084 | X "remove", DDELETE, O_RDWR, 1085 | X "dump", DCAT, O_RDONLY, 1086 | X "list", DCAT, O_RDONLY, 1087 | X "cat", DCAT, O_RDONLY, 1088 | X "creat", DCREAT, O_RDWR | O_CREAT | O_TRUNC, 1089 | X "new", DCREAT, O_RDWR | O_CREAT | O_TRUNC, 1090 | X "build", DBUILD, O_RDWR | O_CREAT, 1091 | X "squash", DPRESS, O_RDWR, 1092 | X "compact", DPRESS, O_RDWR, 1093 | X "compress", DPRESS, O_RDWR 1094 | X}; 1095 | X 1096 | X#define CTABSIZ (sizeof (cmds)/sizeof (cmd)) 1097 | X 1098 | Xstatic cmd *parse(); 1099 | Xstatic void badk(), doit(), prdatum(); 1100 | X 1101 | Xint 1102 | Xmain(argc, argv) 1103 | Xint argc; 1104 | Xchar *argv[]; 1105 | X{ 1106 | X int c; 1107 | X register cmd *act; 1108 | X extern int optind; 1109 | X extern char *optarg; 1110 | X 1111 | X progname = argv[0]; 1112 | X 1113 | X while ((c = getopt(argc, argv, "R")) != EOF) 1114 | X switch (c) { 1115 | X case 'R': /* raw processing */ 1116 | X rflag++; 1117 | X break; 1118 | X 1119 | X default: 1120 | X oops("usage: %s", usage); 1121 | X break; 1122 | X } 1123 | X 1124 | X if ((argc -= optind) < 2) 1125 | X oops("usage: %s", usage); 1126 | X 1127 | X if ((act = parse(argv[optind])) == NULL) 1128 | X badk(argv[optind]); 1129 | X optind++; 1130 | X doit(act, argv[optind]); 1131 | X return 0; 1132 | X} 1133 | X 1134 | Xstatic void 1135 | Xdoit(act, file) 1136 | Xregister cmd *act; 1137 | Xchar *file; 1138 | X{ 1139 | X datum key; 1140 | X datum val; 1141 | X register DBM *db; 1142 | X register char *op; 1143 | X register int n; 1144 | X char *line; 1145 | X#ifdef TIME 1146 | X long start; 1147 | X extern long time(); 1148 | X#endif 1149 | X 1150 | X if ((db = dbm_open(file, act->flags, 0644)) == NULL) 1151 | X oops("cannot open: %s", file); 1152 | X 1153 | X if ((line = (char *) malloc(LINEMAX)) == NULL) 1154 | X oops("%s: cannot get memory", "line alloc"); 1155 | X 1156 | X switch (act->scode) { 1157 | X 1158 | X case DLOOK: 1159 | X while (fgets(line, LINEMAX, stdin) != NULL) { 1160 | X n = strlen(line) - 1; 1161 | X line[n] = 0; 1162 | X key.dptr = line; 1163 | X key.dsize = n; 1164 | X val = dbm_fetch(db, key); 1165 | X if (val.dptr != NULL) { 1166 | X prdatum(stdout, val); 1167 | X putchar('\n'); 1168 | X continue; 1169 | X } 1170 | X prdatum(stderr, key); 1171 | X fprintf(stderr, ": not found.\n"); 1172 | X } 1173 | X break; 1174 | X case DINSERT: 1175 | X break; 1176 | X case DDELETE: 1177 | X while (fgets(line, LINEMAX, stdin) != NULL) { 1178 | X n = strlen(line) - 1; 1179 | X line[n] = 0; 1180 | X key.dptr = line; 1181 | X key.dsize = n; 1182 | X if (dbm_delete(db, key) == -1) { 1183 | X prdatum(stderr, key); 1184 | X fprintf(stderr, ": not found.\n"); 1185 | X } 1186 | X } 1187 | X break; 1188 | X case DCAT: 1189 | X for (key = dbm_firstkey(db); key.dptr != 0; 1190 | X key = dbm_nextkey(db)) { 1191 | X prdatum(stdout, key); 1192 | X putchar('\t'); 1193 | X prdatum(stdout, dbm_fetch(db, key)); 1194 | X putchar('\n'); 1195 | X } 1196 | X break; 1197 | X case DBUILD: 1198 | X#ifdef TIME 1199 | X start = time(0); 1200 | X#endif 1201 | X while (fgets(line, LINEMAX, stdin) != NULL) { 1202 | X n = strlen(line) - 1; 1203 | X line[n] = 0; 1204 | X key.dptr = line; 1205 | X if ((op = strchr(line, '\t')) != 0) { 1206 | X key.dsize = op - line; 1207 | X *op++ = 0; 1208 | X val.dptr = op; 1209 | X val.dsize = line + n - op; 1210 | X } 1211 | X else 1212 | X oops("bad input; %s", line); 1213 | X 1214 | X if (dbm_store(db, key, val, DBM_REPLACE) < 0) { 1215 | X prdatum(stderr, key); 1216 | X fprintf(stderr, ": "); 1217 | X oops("store: %s", "failed"); 1218 | X } 1219 | X } 1220 | X#ifdef TIME 1221 | X printf("done: %d seconds.\n", time(0) - start); 1222 | X#endif 1223 | X break; 1224 | X case DPRESS: 1225 | X break; 1226 | X case DCREAT: 1227 | X break; 1228 | X } 1229 | X 1230 | X dbm_close(db); 1231 | X} 1232 | X 1233 | Xstatic void 1234 | Xbadk(word) 1235 | Xchar *word; 1236 | X{ 1237 | X register int i; 1238 | X 1239 | X if (progname) 1240 | X fprintf(stderr, "%s: ", progname); 1241 | X fprintf(stderr, "bad keywd %s. use one of\n", word); 1242 | X for (i = 0; i < (int)CTABSIZ; i++) 1243 | X fprintf(stderr, "%-8s%c", cmds[i].sname, 1244 | X ((i + 1) % 6 == 0) ? '\n' : ' '); 1245 | X fprintf(stderr, "\n"); 1246 | X exit(1); 1247 | X /*NOTREACHED*/ 1248 | X} 1249 | X 1250 | Xstatic cmd * 1251 | Xparse(str) 1252 | Xregister char *str; 1253 | X{ 1254 | X register int i = CTABSIZ; 1255 | X register cmd *p; 1256 | X 1257 | X for (p = cmds; i--; p++) 1258 | X if (strcmp(p->sname, str) == 0) 1259 | X return p; 1260 | X return NULL; 1261 | X} 1262 | X 1263 | Xstatic void 1264 | Xprdatum(stream, d) 1265 | XFILE *stream; 1266 | Xdatum d; 1267 | X{ 1268 | X register int c; 1269 | X register char *p = d.dptr; 1270 | X register int n = d.dsize; 1271 | X 1272 | X while (n--) { 1273 | X c = *p++ & 0377; 1274 | X if (c & 0200) { 1275 | X fprintf(stream, "M-"); 1276 | X c &= 0177; 1277 | X } 1278 | X if (c == 0177 || c < ' ') 1279 | X fprintf(stream, "^%c", (c == 0177) ? '?' : c + '@'); 1280 | X else 1281 | X putc(c, stream); 1282 | X } 1283 | X} 1284 | X 1285 | X 1286 | @@@End of dbu.c 1287 | echo x - grind 1>&2 1288 | sed 's/^X//' >grind <<'@@@End of grind' 1289 | X#!/bin/sh 1290 | Xrm -f /tmp/*.dir /tmp/*.pag 1291 | Xawk -e '{ 1292 | X printf "%s\t", $0 1293 | X for (i = 0; i < 40; i++) 1294 | X printf "%s.", $0 1295 | X printf "\n" 1296 | X}' < /usr/dict/words | $1 build /tmp/$2 1297 | X 1298 | @@@End of grind 1299 | echo x - hash.c 1>&2 1300 | sed 's/^X//' >hash.c <<'@@@End of hash.c' 1301 | X/* 1302 | X * sdbm - ndbm work-alike hashed database library 1303 | X * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978). 1304 | X * author: oz@nexus.yorku.ca 1305 | X * status: public domain. keep it that way. 1306 | X * 1307 | X * hashing routine 1308 | X */ 1309 | X 1310 | X#include "sdbm.h" 1311 | X/* 1312 | X * polynomial conversion ignoring overflows 1313 | X * [this seems to work remarkably well, in fact better 1314 | X * then the ndbm hash function. Replace at your own risk] 1315 | X * use: 65599 nice. 1316 | X * 65587 even better. 1317 | X */ 1318 | Xlong 1319 | Xdbm_hash(str, len) 1320 | Xregister char *str; 1321 | Xregister int len; 1322 | X{ 1323 | X register unsigned long n = 0; 1324 | X 1325 | X#ifdef DUFF 1326 | X 1327 | X#define HASHC n = *str++ + 65599 * n 1328 | X 1329 | X if (len > 0) { 1330 | X register int loop = (len + 8 - 1) >> 3; 1331 | X 1332 | X switch(len & (8 - 1)) { 1333 | X case 0: do { 1334 | X HASHC; case 7: HASHC; 1335 | X case 6: HASHC; case 5: HASHC; 1336 | X case 4: HASHC; case 3: HASHC; 1337 | X case 2: HASHC; case 1: HASHC; 1338 | X } while (--loop); 1339 | X } 1340 | X 1341 | X } 1342 | X#else 1343 | X while (len--) 1344 | X n = *str++ + 65599 * n; 1345 | X#endif 1346 | X return n; 1347 | X} 1348 | @@@End of hash.c 1349 | echo x - makefile 1>&2 1350 | sed 's/^X//' >makefile <<'@@@End of makefile' 1351 | X# 1352 | X# makefile for public domain ndbm-clone: sdbm 1353 | X# DUFF: use duff's device (loop unroll) in parts of the code 1354 | X# 1355 | XCFLAGS = -O -DSDBM -DDUFF -DBSD42 1356 | X#LDFLAGS = -p 1357 | X 1358 | XOBJS = sdbm.o pair.o hash.o 1359 | XSRCS = sdbm.c pair.c hash.c dbu.c dba.c dbd.c util.c 1360 | XHDRS = tune.h sdbm.h pair.h 1361 | XMISC = README CHANGES COMPARE sdbm.3 dbe.c dbe.1 dbm.c dbm.h biblio \ 1362 | X readme.ms readme.ps 1363 | X 1364 | Xall: dbu dba dbd dbe 1365 | X 1366 | Xdbu: dbu.o sdbm util.o 1367 | X cc $(LDFLAGS) -o dbu dbu.o util.o libsdbm.a 1368 | X 1369 | Xdba: dba.o util.o 1370 | X cc $(LDFLAGS) -o dba dba.o util.o 1371 | Xdbd: dbd.o util.o 1372 | X cc $(LDFLAGS) -o dbd dbd.o util.o 1373 | Xdbe: dbe.o sdbm 1374 | X cc $(LDFLAGS) -o dbe dbe.o libsdbm.a 1375 | X 1376 | Xsdbm: $(OBJS) 1377 | X ar cr libsdbm.a $(OBJS) 1378 | X ranlib libsdbm.a 1379 | X### cp libsdbm.a /usr/lib/libsdbm.a 1380 | X 1381 | Xdba.o: sdbm.h 1382 | Xdbu.o: sdbm.h 1383 | Xutil.o:sdbm.h 1384 | X 1385 | X$(OBJS): sdbm.h tune.h pair.h 1386 | X 1387 | X# 1388 | X# dbu using berkelezoid ndbm routines [if you have them] for testing 1389 | X# 1390 | X#x-dbu: dbu.o util.o 1391 | X# cc $(CFLAGS) -o x-dbu dbu.o util.o 1392 | Xlint: 1393 | X lint -abchx $(SRCS) 1394 | X 1395 | Xclean: 1396 | X rm -f *.o mon.out core 1397 | X 1398 | Xpurge: clean 1399 | X rm -f dbu libsdbm.a dbd dba dbe x-dbu *.dir *.pag 1400 | X 1401 | Xshar: 1402 | X shar $(MISC) makefile $(SRCS) $(HDRS) >SDBM.SHAR 1403 | X 1404 | Xreadme: 1405 | X nroff -ms readme.ms | col -b >README 1406 | @@@End of makefile 1407 | echo x - pair.c 1>&2 1408 | sed 's/^X//' >pair.c <<'@@@End of pair.c' 1409 | X/* 1410 | X * sdbm - ndbm work-alike hashed database library 1411 | X * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978). 1412 | X * author: oz@nexus.yorku.ca 1413 | X * status: public domain. 1414 | X * 1415 | X * page-level routines 1416 | X */ 1417 | X 1418 | X#ifndef lint 1419 | Xstatic char rcsid[] = "$Id: pair.c,v 1.10 90/12/13 13:00:35 oz Exp $"; 1420 | X#endif 1421 | X 1422 | X#include "sdbm.h" 1423 | X#include "tune.h" 1424 | X#include "pair.h" 1425 | X 1426 | X#ifndef BSD42 1427 | X#include 1428 | X#endif 1429 | X 1430 | X#define exhash(item) dbm_hash((item).dptr, (item).dsize) 1431 | X 1432 | X/* 1433 | X * forward 1434 | X */ 1435 | Xstatic int seepair proto((char *, int, char *, int)); 1436 | X 1437 | X/* 1438 | X * page format: 1439 | X * +------------------------------+ 1440 | X * ino | n | keyoff | datoff | keyoff | 1441 | X * +------------+--------+--------+ 1442 | X * | datoff | - - - ----> | 1443 | X * +--------+---------------------+ 1444 | X * | F R E E A R E A | 1445 | X * +--------------+---------------+ 1446 | X * | <---- - - - | data | 1447 | X * +--------+-----+----+----------+ 1448 | X * | key | data | key | 1449 | X * +--------+----------+----------+ 1450 | X * 1451 | X * calculating the offsets for free area: if the number 1452 | X * of entries (ino[0]) is zero, the offset to the END of 1453 | X * the free area is the block size. Otherwise, it is the 1454 | X * nth (ino[ino[0]]) entry's offset. 1455 | X */ 1456 | X 1457 | Xint 1458 | Xfitpair(pag, need) 1459 | Xchar *pag; 1460 | Xint need; 1461 | X{ 1462 | X register int n; 1463 | X register int off; 1464 | X register int free; 1465 | X register short *ino = (short *) pag; 1466 | X 1467 | X off = ((n = ino[0]) > 0) ? ino[n] : PBLKSIZ; 1468 | X free = off - (n + 1) * sizeof(short); 1469 | X need += 2 * sizeof(short); 1470 | X 1471 | X debug(("free %d need %d\n", free, need)); 1472 | X 1473 | X return need <= free; 1474 | X} 1475 | X 1476 | Xvoid 1477 | Xputpair(pag, key, val) 1478 | Xchar *pag; 1479 | Xdatum key; 1480 | Xdatum val; 1481 | X{ 1482 | X register int n; 1483 | X register int off; 1484 | X register short *ino = (short *) pag; 1485 | X 1486 | X off = ((n = ino[0]) > 0) ? ino[n] : PBLKSIZ; 1487 | X/* 1488 | X * enter the key first 1489 | X */ 1490 | X off -= key.dsize; 1491 | X (void) memcpy(pag + off, key.dptr, key.dsize); 1492 | X ino[n + 1] = off; 1493 | X/* 1494 | X * now the data 1495 | X */ 1496 | X off -= val.dsize; 1497 | X (void) memcpy(pag + off, val.dptr, val.dsize); 1498 | X ino[n + 2] = off; 1499 | X/* 1500 | X * adjust item count 1501 | X */ 1502 | X ino[0] += 2; 1503 | X} 1504 | X 1505 | Xdatum 1506 | Xgetpair(pag, key) 1507 | Xchar *pag; 1508 | Xdatum key; 1509 | X{ 1510 | X register int i; 1511 | X register int n; 1512 | X datum val; 1513 | X register short *ino = (short *) pag; 1514 | X 1515 | X if ((n = ino[0]) == 0) 1516 | X return nullitem; 1517 | X 1518 | X if ((i = seepair(pag, n, key.dptr, key.dsize)) == 0) 1519 | X return nullitem; 1520 | X 1521 | X val.dptr = pag + ino[i + 1]; 1522 | X val.dsize = ino[i] - ino[i + 1]; 1523 | X return val; 1524 | X} 1525 | X 1526 | X#ifdef SEEDUPS 1527 | Xint 1528 | Xduppair(pag, key) 1529 | Xchar *pag; 1530 | Xdatum key; 1531 | X{ 1532 | X register short *ino = (short *) pag; 1533 | X return ino[0] > 0 && seepair(pag, ino[0], key.dptr, key.dsize) > 0; 1534 | X} 1535 | X#endif 1536 | X 1537 | Xdatum 1538 | Xgetnkey(pag, num) 1539 | Xchar *pag; 1540 | Xint num; 1541 | X{ 1542 | X datum key; 1543 | X register int off; 1544 | X register short *ino = (short *) pag; 1545 | X 1546 | X num = num * 2 - 1; 1547 | X if (ino[0] == 0 || num > ino[0]) 1548 | X return nullitem; 1549 | X 1550 | X off = (num > 1) ? ino[num - 1] : PBLKSIZ; 1551 | X 1552 | X key.dptr = pag + ino[num]; 1553 | X key.dsize = off - ino[num]; 1554 | X 1555 | X return key; 1556 | X} 1557 | X 1558 | Xint 1559 | Xdelpair(pag, key) 1560 | Xchar *pag; 1561 | Xdatum key; 1562 | X{ 1563 | X register int n; 1564 | X register int i; 1565 | X register short *ino = (short *) pag; 1566 | X 1567 | X if ((n = ino[0]) == 0) 1568 | X return 0; 1569 | X 1570 | X if ((i = seepair(pag, n, key.dptr, key.dsize)) == 0) 1571 | X return 0; 1572 | X/* 1573 | X * found the key. if it is the last entry 1574 | X * [i.e. i == n - 1] we just adjust the entry count. 1575 | X * hard case: move all data down onto the deleted pair, 1576 | X * shift offsets onto deleted offsets, and adjust them. 1577 | X * [note: 0 < i < n] 1578 | X */ 1579 | X if (i < n - 1) { 1580 | X register int m; 1581 | X register char *dst = pag + (i == 1 ? PBLKSIZ : ino[i - 1]); 1582 | X register char *src = pag + ino[i + 1]; 1583 | X register int zoo = dst - src; 1584 | X 1585 | X debug(("free-up %d ", zoo)); 1586 | X/* 1587 | X * shift data/keys down 1588 | X */ 1589 | X m = ino[i + 1] - ino[n]; 1590 | X#ifdef DUFF 1591 | X#define MOVB *--dst = *--src 1592 | X 1593 | X if (m > 0) { 1594 | X register int loop = (m + 8 - 1) >> 3; 1595 | X 1596 | X switch (m & (8 - 1)) { 1597 | X case 0: do { 1598 | X MOVB; case 7: MOVB; 1599 | X case 6: MOVB; case 5: MOVB; 1600 | X case 4: MOVB; case 3: MOVB; 1601 | X case 2: MOVB; case 1: MOVB; 1602 | X } while (--loop); 1603 | X } 1604 | X } 1605 | X#else 1606 | X#ifdef MEMMOVE 1607 | X memmove(dst - m, src - m, m); 1608 | X#else 1609 | X while (m--) 1610 | X *--dst = *--src; 1611 | X#endif 1612 | X#endif 1613 | X/* 1614 | X * adjust offset index up 1615 | X */ 1616 | X while (i < n - 1) { 1617 | X ino[i] = ino[i + 2] + zoo; 1618 | X i++; 1619 | X } 1620 | X } 1621 | X ino[0] -= 2; 1622 | X return 1; 1623 | X} 1624 | X 1625 | X/* 1626 | X * search for the key in the page. 1627 | X * return offset index in the range 0 < i < n. 1628 | X * return 0 if not found. 1629 | X */ 1630 | Xstatic int 1631 | Xseepair(pag, n, key, siz) 1632 | Xchar *pag; 1633 | Xregister int n; 1634 | Xregister char *key; 1635 | Xregister int siz; 1636 | X{ 1637 | X register int i; 1638 | X register int off = PBLKSIZ; 1639 | X register short *ino = (short *) pag; 1640 | X 1641 | X for (i = 1; i < n; i += 2) { 1642 | X if (siz == off - ino[i] && 1643 | X memcmp(key, pag + ino[i], siz) == 0) 1644 | X return i; 1645 | X off = ino[i + 1]; 1646 | X } 1647 | X return 0; 1648 | X} 1649 | X 1650 | Xvoid 1651 | Xsplpage(pag, new, sbit) 1652 | Xchar *pag; 1653 | Xchar *new; 1654 | Xlong sbit; 1655 | X{ 1656 | X datum key; 1657 | X datum val; 1658 | X 1659 | X register int n; 1660 | X register int off = PBLKSIZ; 1661 | X char cur[PBLKSIZ]; 1662 | X register short *ino = (short *) cur; 1663 | X 1664 | X (void) memcpy(cur, pag, PBLKSIZ); 1665 | X (void) memset(pag, 0, PBLKSIZ); 1666 | X (void) memset(new, 0, PBLKSIZ); 1667 | X 1668 | X n = ino[0]; 1669 | X for (ino++; n > 0; ino += 2) { 1670 | X key.dptr = cur + ino[0]; 1671 | X key.dsize = off - ino[0]; 1672 | X val.dptr = cur + ino[1]; 1673 | X val.dsize = ino[0] - ino[1]; 1674 | X/* 1675 | X * select the page pointer (by looking at sbit) and insert 1676 | X */ 1677 | X (void) putpair((exhash(key) & sbit) ? new : pag, key, val); 1678 | X 1679 | X off = ino[1]; 1680 | X n -= 2; 1681 | X } 1682 | X 1683 | X debug(("%d split %d/%d\n", ((short *) cur)[0] / 2, 1684 | X ((short *) new)[0] / 2, 1685 | X ((short *) pag)[0] / 2)); 1686 | X} 1687 | X 1688 | X/* 1689 | X * check page sanity: 1690 | X * number of entries should be something 1691 | X * reasonable, and all offsets in the index should be in order. 1692 | X * this could be made more rigorous. 1693 | X */ 1694 | Xint 1695 | Xchkpage(pag) 1696 | Xchar *pag; 1697 | X{ 1698 | X register int n; 1699 | X register int off; 1700 | X register short *ino = (short *) pag; 1701 | X 1702 | X if ((n = ino[0]) < 0 || n > PBLKSIZ / sizeof(short)) 1703 | X return 0; 1704 | X 1705 | X if (n > 0) { 1706 | X off = PBLKSIZ; 1707 | X for (ino++; n > 0; ino += 2) { 1708 | X if (ino[0] > off || ino[1] > off || 1709 | X ino[1] > ino[0]) 1710 | X return 0; 1711 | X off = ino[1]; 1712 | X n -= 2; 1713 | X } 1714 | X } 1715 | X return 1; 1716 | X} 1717 | @@@End of pair.c 1718 | echo x - pair.h 1>&2 1719 | sed 's/^X//' >pair.h <<'@@@End of pair.h' 1720 | Xextern int fitpair proto((char *, int)); 1721 | Xextern void putpair proto((char *, datum, datum)); 1722 | Xextern datum getpair proto((char *, datum)); 1723 | Xextern int delpair proto((char *, datum)); 1724 | Xextern int chkpage proto((char *)); 1725 | Xextern datum getnkey proto((char *, int)); 1726 | Xextern void splpage proto((char *, char *, long)); 1727 | X#ifdef SEEDUPS 1728 | Xextern int duppair proto((char *, datum)); 1729 | X#endif 1730 | @@@End of pair.h 1731 | echo x - readme.ms 1>&2 1732 | sed 's/^X//' >readme.ms <<'@@@End of readme.ms' 1733 | X.\" tbl | readme.ms | [tn]roff -ms | ... 1734 | X.\" note the "C" (courier) and "CB" fonts: you will probably have to 1735 | X.\" change these. 1736 | X.\" $Id: readme.ms,v 1.1 90/12/13 13:09:15 oz Exp Locker: oz $ 1737 | X 1738 | X.de P1 1739 | X.br 1740 | X.nr dT 4 1741 | X.nf 1742 | X.ft C 1743 | X.sp .5 1744 | X.nr t \\n(dT*\\w'x'u 1745 | X.ta 1u*\\ntu 2u*\\ntu 3u*\\ntu 4u*\\ntu 5u*\\ntu 6u*\\ntu 7u*\\ntu 8u*\\ntu 9u*\\ntu 10u*\\ntu 11u*\\ntu 12u*\\ntu 13u*\\ntu 14u*\\ntu 1746 | X.. 1747 | X.de P2 1748 | X.br 1749 | X.ft 1 1750 | X.br 1751 | X.sp .5 1752 | X.br 1753 | X.fi 1754 | X.. 1755 | X.\" CW uses the typewriter/courier font. 1756 | X.de CW 1757 | X\fC\\$1\\fP\\$2 1758 | X.. 1759 | X 1760 | X.\" Footnote numbering [by Henry Spencer] 1761 | X.\" \*f for a footnote number.. 1762 | X.\" .FS 1763 | X.\" \*F 1764 | X.\" .FE 1765 | X.\" 1766 | X.ds f \\u\\s-2\\n+f\\s+2\\d 1767 | X.nr f 0 1 1768 | X.ds F \\n+F. 1769 | X.nr F 0 1 1770 | X 1771 | X.ND 1772 | X.LP 1773 | X.TL 1774 | X\fIsdbm\fP \(em Substitute DBM 1775 | X.br 1776 | Xor 1777 | X.br 1778 | XBerkeley \fIndbm\fP for Every UN*X\** Made Simple 1779 | X.AU 1780 | XOzan (oz) Yigit 1781 | X.AI 1782 | XThe Guild of PD Software Toolmakers 1783 | XToronto - Canada 1784 | X.sp 1785 | Xoz@nexus.yorku.ca 1786 | X.LP 1787 | X.FS 1788 | XUN*X is not a trademark of any (dis)organization. 1789 | X.FE 1790 | X.sp 2 1791 | X\fIImplementation is the sincerest form of flattery. \(em L. Peter Deutsch\fP 1792 | X.SH 1793 | XA The Clone of the \fIndbm\fP library 1794 | X.PP 1795 | XThe sources accompanying this notice \(em \fIsdbm\fP \(em constitute 1796 | Xthe first public release (Dec. 1990) of a complete clone of 1797 | Xthe Berkeley UN*X \fIndbm\fP library. The \fIsdbm\fP library is meant to 1798 | Xclone the proven functionality of \fIndbm\fP as closely as possible, 1799 | Xincluding a few improvements. It is practical, easy to understand, and 1800 | Xcompatible. 1801 | XThe \fIsdbm\fP library is not derived from any licensed, proprietary or 1802 | Xcopyrighted software. 1803 | X.PP 1804 | XThe \fIsdbm\fP implementation is based on a 1978 algorithm 1805 | X[Lar78] by P.-A. (Paul) Larson known as ``Dynamic Hashing''. 1806 | XIn the course of searching for a substitute for \fIndbm\fP, I 1807 | Xprototyped three different external-hashing algorithms [Lar78, Fag79, Lit80] 1808 | Xand ultimately chose Larson's algorithm as a basis of the \fIsdbm\fP 1809 | Ximplementation. The Bell Labs 1810 | X\fIdbm\fP (and therefore \fIndbm\fP) is based on an algorithm invented by 1811 | XKen Thompson, [Tho90, Tor87] and predates Larson's work. 1812 | X.PP 1813 | XThe \fIsdbm\fR programming interface is totally compatible 1814 | Xwith \fIndbm\fP and includes a slight improvement in database initialization. 1815 | XIt is also expected to be binary-compatible under most UN*X versions that 1816 | Xsupport the \fIndbm\fP library. 1817 | X.PP 1818 | XThe \fIsdbm\fP implementation shares the shortcomings of the \fIndbm\fP 1819 | Xlibrary, as a side effect of various simplifications to the original Larson 1820 | Xalgorithm. It does produce \fIholes\fP in the page file as it writes 1821 | Xpages past the end of file. (Larson's paper include a clever solution to 1822 | Xthis problem that is a result of using the hash value directly as a block 1823 | Xaddress.) On the other hand, extensive tests seem to indicate that \fIsdbm\fP 1824 | Xcreates fewer holes in general, and the resulting pagefiles are 1825 | Xsmaller. The \fIsdbm\fP implementation is also faster than \fIndbm\fP 1826 | Xin database creation. 1827 | XUnlike the \fIndbm\fP, the \fIsdbm\fP 1828 | X.CW store 1829 | Xoperation will not ``wander away'' trying to split its 1830 | Xdata pages to insert a datum that \fIcannot\fP (due to elaborate worst-case 1831 | Xsituations) be inserted. (It will fail after a pre-defined number of attempts.) 1832 | X.SH 1833 | XImportant Compatibility Warning 1834 | X.PP 1835 | XThe \fIsdbm\fP and \fIndbm\fP 1836 | Xlibraries \fIcannot\fP share databases: one cannot read the (dir/pag) 1837 | Xdatabase created by the other. This is due to the differences 1838 | Xbetween the \fIndbm\fP and \fIsdbm\fP algorithms\**, 1839 | X.FS 1840 | XTorek's discussion [Tor87] 1841 | Xindicates that \fIdbm/ndbm\fP implementations use the hash 1842 | Xvalue to traverse the radix trie differently than \fIsdbm\fP 1843 | Xand as a result, the page indexes are generated in \fIdifferent\fP order. 1844 | XFor more information, send e-mail to the author. 1845 | X.FE 1846 | Xand the hash functions 1847 | Xused. 1848 | XIt is easy to convert between the \fIdbm/ndbm\fP databases and \fIsdbm\fP 1849 | Xby ignoring the index completely: see 1850 | X.CW dbd , 1851 | X.CW dbu 1852 | Xetc. 1853 | X.R 1854 | X.LP 1855 | X.SH 1856 | XNotice of Intellectual Property 1857 | X.LP 1858 | X\fIThe entire\fP sdbm \fIlibrary package, as authored by me,\fP Ozan S. Yigit, 1859 | X\fIis hereby placed in the public domain.\fP As such, the author is not 1860 | Xresponsible for the consequences of use of this software, no matter how 1861 | Xawful, even if they arise from defects in it. There is no expressed or 1862 | Ximplied warranty for the \fIsdbm\fP library. 1863 | X.PP 1864 | XSince the \fIsdbm\fP 1865 | Xlibrary package is in the public domain, this \fIoriginal\fP 1866 | Xrelease or any additional public-domain releases of the modified original 1867 | Xcannot possibly (by definition) be withheld from you. Also by definition, 1868 | XYou (singular) have all the rights to this code (including the right to 1869 | Xsell without permission, the right to hoard\** 1870 | X.FS 1871 | XYou cannot really hoard something that is available to the public at 1872 | Xlarge, but try if it makes you feel any better. 1873 | X.FE 1874 | Xand the right to do other icky things as 1875 | Xyou see fit) but those rights are also granted to everyone else. 1876 | X.PP 1877 | XPlease note that all previous distributions of this software contained 1878 | Xa copyright (which is now dropped) to protect its 1879 | Xorigins and its current public domain status against any possible claims 1880 | Xand/or challenges. 1881 | X.SH 1882 | XAcknowledgments 1883 | X.PP 1884 | XMany people have been very helpful and supportive. A partial list would 1885 | Xnecessarily include Rayan Zacherissen (who contributed the man page, 1886 | Xand also hacked a MMAP version of \fIsdbm\fP), 1887 | XArnold Robbins, Chris Lewis, 1888 | XBill Davidsen, Henry Spencer, Geoff Collyer, Rich Salz (who got me started 1889 | Xin the first place), Johannes Ruschein 1890 | X(who did the minix port) and David Tilbrook. I thank you all. 1891 | X.SH 1892 | XDistribution Manifest and Notes 1893 | X.LP 1894 | XThis distribution of \fIsdbm\fP includes (at least) the following: 1895 | X.P1 1896 | X CHANGES change log 1897 | X README this file. 1898 | X biblio a small bibliography on external hashing 1899 | X dba.c a crude (n/s)dbm page file analyzer 1900 | X dbd.c a crude (n/s)dbm page file dumper (for conversion) 1901 | X dbe.1 man page for dbe.c 1902 | X dbe.c Janick's database editor 1903 | X dbm.c a dbm library emulation wrapper for ndbm/sdbm 1904 | X dbm.h header file for the above 1905 | X dbu.c a crude db management utility 1906 | X hash.c hashing function 1907 | X makefile guess. 1908 | X pair.c page-level routines (posted earlier) 1909 | X pair.h header file for the above 1910 | X readme.ms troff source for the README file 1911 | X sdbm.3 man page 1912 | X sdbm.c the real thing 1913 | X sdbm.h header file for the above 1914 | X tune.h place for tuning & portability thingies 1915 | X util.c miscellaneous 1916 | X.P2 1917 | X.PP 1918 | X.CW dbu 1919 | Xis a simple database manipulation program\** that tries to look 1920 | X.FS 1921 | XThe 1922 | X.CW dbd , 1923 | X.CW dba , 1924 | X.CW dbu 1925 | Xutilities are quick hacks and are not fit for production use. They were 1926 | Xdeveloped late one night, just to test out \fIsdbm\fP, and convert some 1927 | Xdatabases. 1928 | X.FE 1929 | Xlike Bell Labs' 1930 | X.CW cbt 1931 | Xutility. It is currently incomplete in functionality. 1932 | XI use 1933 | X.CW dbu 1934 | Xto test out the routines: it takes (from stdin) tab separated 1935 | Xkey/value pairs for commands like 1936 | X.CW build 1937 | Xor 1938 | X.CW insert 1939 | Xor takes keys for 1940 | Xcommands like 1941 | X.CW delete 1942 | Xor 1943 | X.CW look . 1944 | X.P1 1945 | X dbu dbmfile 1946 | X.P2 1947 | X.PP 1948 | X.CW dba 1949 | Xis a crude analyzer of \fIdbm/sdbm/ndbm\fP 1950 | Xpage files. It scans the entire 1951 | Xpage file, reporting page level statistics, and totals at the end. 1952 | X.PP 1953 | X.CW dbd 1954 | Xis a crude dump program for \fIdbm/ndbm/sdbm\fP 1955 | Xdatabases. It ignores the 1956 | Xbitmap, and dumps the data pages in sequence. It can be used to create 1957 | Xinput for the 1958 | X.CW dbu 1959 | Xutility. 1960 | XNote that 1961 | X.CW dbd 1962 | Xwill skip any NULLs in the key and data 1963 | Xfields, thus is unsuitable to convert some peculiar databases that 1964 | Xinsist in including the terminating null. 1965 | X.PP 1966 | XI have also included a copy of the 1967 | X.CW dbe 1968 | X(\fIndbm\fP DataBase Editor) by Janick Bergeron [janick@bnr.ca] for 1969 | Xyour pleasure. You may find it more useful than the little 1970 | X.CW dbu 1971 | Xutility. 1972 | X.PP 1973 | X.CW dbm.[ch] 1974 | Xis a \fIdbm\fP library emulation on top of \fIndbm\fP 1975 | X(and hence suitable for \fIsdbm\fP). Written by Robert Elz. 1976 | X.PP 1977 | XThe \fIsdbm\fP 1978 | Xlibrary has been around in beta test for quite a long time, and from whatever 1979 | Xlittle feedback I received (maybe no news is good news), I believe it has been 1980 | Xfunctioning without any significant problems. I would, of course, appreciate 1981 | Xall fixes and/or improvements. Portability enhancements would especially be 1982 | Xuseful. 1983 | X.SH 1984 | XImplementation Issues 1985 | X.PP 1986 | XHash functions: 1987 | XThe algorithm behind \fIsdbm\fP implementation needs a good bit-scrambling 1988 | Xhash function to be effective. I ran into a set of constants for a simple 1989 | Xhash function that seem to help \fIsdbm\fP perform better than \fIndbm\fP 1990 | Xfor various inputs: 1991 | X.P1 1992 | X /* 1993 | X * polynomial conversion ignoring overflows 1994 | X * 65599 nice. 65587 even better. 1995 | X */ 1996 | X long 1997 | X dbm_hash(char *str, int len) { 1998 | X register unsigned long n = 0; 1999 | X 2000 | X while (len--) 2001 | X n = n * 65599 + *str++; 2002 | X return n; 2003 | X } 2004 | X.P2 2005 | X.PP 2006 | XThere may be better hash functions for the purposes of dynamic hashing. 2007 | XTry your favorite, and check the pagefile. If it contains too many pages 2008 | Xwith too many holes, (in relation to this one for example) or if 2009 | X\fIsdbm\fP 2010 | Xsimply stops working (fails after 2011 | X.CW SPLTMAX 2012 | Xattempts to split) when you feed your 2013 | XNEWS 2014 | X.CW history 2015 | Xfile to it, you probably do not have a good hashing function. 2016 | XIf you do better (for different types of input), I would like to know 2017 | Xabout the function you use. 2018 | X.PP 2019 | XBlock sizes: It seems (from various tests on a few machines) that a page 2020 | Xfile block size 2021 | X.CW PBLKSIZ 2022 | Xof 1024 is by far the best for performance, but 2023 | Xthis also happens to limit the size of a key/value pair. Depending on your 2024 | Xneeds, you may wish to increase the page size, and also adjust 2025 | X.CW PAIRMAX 2026 | X(the maximum size of a key/value pair allowed: should always be at least 2027 | Xthree words smaller than 2028 | X.CW PBLKSIZ .) 2029 | Xaccordingly. The system-wide version of the library 2030 | Xshould probably be 2031 | Xconfigured with 1024 (distribution default), as this appears to be sufficient 2032 | Xfor most common uses of \fIsdbm\fP. 2033 | X.SH 2034 | XPortability 2035 | X.PP 2036 | XThis package has been tested in many different UN*Xes even including minix, 2037 | Xand appears to be reasonably portable. This does not mean it will port 2038 | Xeasily to non-UN*X systems. 2039 | X.SH 2040 | XNotes and Miscellaneous 2041 | X.PP 2042 | XThe \fIsdbm\fP is not a very complicated package, at least not after you 2043 | Xfamiliarize yourself with the literature on external hashing. There are 2044 | Xother interesting algorithms in existence that ensure (approximately) 2045 | Xsingle-read access to a data value associated with any key. These are 2046 | Xdirectory-less schemes such as \fIlinear hashing\fP [Lit80] (+ Larson 2047 | Xvariations), \fIspiral storage\fP [Mar79] or directory schemes such as 2048 | X\fIextensible hashing\fP [Fag79] by Fagin et al. I do hope these sources 2049 | Xprovide a reasonable playground for experimentation with other algorithms. 2050 | XSee the June 1988 issue of ACM Computing Surveys [Enb88] for an 2051 | Xexcellent overview of the field. 2052 | X.PG 2053 | X.SH 2054 | XReferences 2055 | X.LP 2056 | X.IP [Lar78] 4m 2057 | XP.-A. Larson, 2058 | X``Dynamic Hashing'', \fIBIT\fP, vol. 18, pp. 184-201, 1978. 2059 | X.IP [Tho90] 4m 2060 | XKen Thompson, \fIprivate communication\fP, Nov. 1990 2061 | X.IP [Lit80] 4m 2062 | XW. Litwin, 2063 | X`` Linear Hashing: A new tool for file and table addressing'', 2064 | X\fIProceedings of the 6th Conference on Very Large Dabatases (Montreal)\fP, 2065 | Xpp. 212-223, Very Large Database Foundation, Saratoga, Calif., 1980. 2066 | X.IP [Fag79] 4m 2067 | XR. Fagin, J. Nievergelt, N. Pippinger, and H. R. Strong, 2068 | X``Extendible Hashing - A Fast Access Method for Dynamic Files'', 2069 | X\fIACM Trans. Database Syst.\fP, vol. 4, no.3, pp. 315-344, Sept. 1979. 2070 | X.IP [Wal84] 4m 2071 | XRich Wales, 2072 | X``Discussion of "dbm" data base system'', \fIUSENET newsgroup unix.wizards\fP, 2073 | XJan. 1984. 2074 | X.IP [Tor87] 4m 2075 | XChris Torek, 2076 | X``Re: dbm.a and ndbm.a archives'', \fIUSENET newsgroup comp.unix\fP, 2077 | X1987. 2078 | X.IP [Mar79] 4m 2079 | XG. N. Martin, 2080 | X``Spiral Storage: Incrementally Augmentable Hash Addressed Storage'', 2081 | X\fITechnical Report #27\fP, University of Varwick, Coventry, U.K., 1979. 2082 | X.IP [Enb88] 4m 2083 | XR. J. Enbody and H. C. Du, 2084 | X``Dynamic Hashing Schemes'',\fIACM Computing Surveys\fP, 2085 | Xvol. 20, no. 2, pp. 85-113, June 1988. 2086 | @@@End of readme.ms 2087 | echo x - readme.txt 1>&2 2088 | sed 's/^X//' >readme.txt <<'@@@End of readme.txt' 2089 | X 2090 | X 2091 | X 2092 | X 2093 | X 2094 | X 2095 | X sdbm - Substitute DBM 2096 | X or 2097 | X Berkeley ndbm for Every UN*X[1] Made Simple 2098 | X 2099 | X Ozan (oz) Yigit 2100 | X 2101 | X The Guild of PD Software Toolmakers 2102 | X Toronto - Canada 2103 | X 2104 | X oz@nexus.yorku.ca 2105 | X 2106 | X 2107 | X 2108 | XImplementation is the sincerest form of flattery. - L. Peter 2109 | XDeutsch 2110 | X 2111 | XA The Clone of the ndbm library 2112 | X 2113 | X The sources accompanying this notice - sdbm - consti- 2114 | Xtute the first public release (Dec. 1990) of a complete 2115 | Xclone of the Berkeley UN*X ndbm library. The sdbm library is 2116 | Xmeant to clone the proven functionality of ndbm as closely 2117 | Xas possible, including a few improvements. It is practical, 2118 | Xeasy to understand, and compatible. The sdbm library is not 2119 | Xderived from any licensed, proprietary or copyrighted 2120 | Xsoftware. 2121 | X 2122 | X The sdbm implementation is based on a 1978 algorithm 2123 | X[Lar78] by P.-A. (Paul) Larson known as ``Dynamic Hashing''. 2124 | XIn the course of searching for a substitute for ndbm, I pro- 2125 | Xtotyped three different external-hashing algorithms [Lar78, 2126 | XFag79, Lit80] and ultimately chose Larson's algorithm as a 2127 | Xbasis of the sdbm implementation. The Bell Labs dbm (and 2128 | Xtherefore ndbm) is based on an algorithm invented by Ken 2129 | XThompson, [Tho90, Tor87] and predates Larson's work. 2130 | X 2131 | X The sdbm programming interface is totally compatible 2132 | Xwith ndbm and includes a slight improvement in database ini- 2133 | Xtialization. It is also expected to be binary-compatible 2134 | Xunder most UN*X versions that support the ndbm library. 2135 | X 2136 | X The sdbm implementation shares the shortcomings of the 2137 | Xndbm library, as a side effect of various simplifications to 2138 | Xthe original Larson algorithm. It does produce holes in the 2139 | Xpage file as it writes pages past the end of file. (Larson's 2140 | Xpaper include a clever solution to this problem that is a 2141 | Xresult of using the hash value directly as a block address.) 2142 | XOn the other hand, extensive tests seem to indicate that 2143 | Xsdbm creates fewer holes in general, and the resulting page- 2144 | Xfiles are smaller. The sdbm implementation is also faster 2145 | Xthan ndbm in database creation. Unlike the ndbm, the sdbm 2146 | X_________________________ 2147 | X 2148 | X [1] UN*X is not a trademark of any (dis)organization. 2149 | X 2150 | X 2151 | X 2152 | X 2153 | X 2154 | X 2155 | X 2156 | X 2157 | X 2158 | X - 2 - 2159 | X 2160 | X 2161 | Xstore operation will not ``wander away'' trying to split its 2162 | Xdata pages to insert a datum that cannot (due to elaborate 2163 | Xworst-case situations) be inserted. (It will fail after a 2164 | Xpre-defined number of attempts.) 2165 | X 2166 | XImportant Compatibility Warning 2167 | X 2168 | X The sdbm and ndbm libraries cannot share databases: one 2169 | Xcannot read the (dir/pag) database created by the other. 2170 | XThis is due to the differences between the ndbm and sdbm 2171 | Xalgorithms[2], and the hash functions used. It is easy to 2172 | Xconvert between the dbm/ndbm databases and sdbm by ignoring 2173 | Xthe index completely: see dbd, dbu etc. 2174 | X 2175 | X 2176 | XNotice of Intellectual Property 2177 | X 2178 | XThe entire sdbm library package, as authored by me, Ozan S. 2179 | XYigit, is hereby placed in the public domain. As such, the 2180 | Xauthor is not responsible for the consequences of use of 2181 | Xthis software, no matter how awful, even if they arise from 2182 | Xdefects in it. There is no expressed or implied warranty for 2183 | Xthe sdbm library. 2184 | X 2185 | X Since the sdbm library package is in the public domain, 2186 | Xthis original release or any additional public-domain 2187 | Xreleases of the modified original cannot possibly (by defin- 2188 | Xition) be withheld from you. Also by definition, You (singu- 2189 | Xlar) have all the rights to this code (including the right 2190 | Xto sell without permission, the right to hoard[3] and the 2191 | Xright to do other icky things as you see fit) but those 2192 | Xrights are also granted to everyone else. 2193 | X 2194 | X Please note that all previous distributions of this 2195 | Xsoftware contained a copyright (which is now dropped) to 2196 | Xprotect its origins and its current public domain status 2197 | Xagainst any possible claims and/or challenges. 2198 | X 2199 | XAcknowledgments 2200 | X 2201 | X Many people have been very helpful and supportive. A 2202 | Xpartial list would necessarily include Rayan Zacherissen 2203 | X(who contributed the man page, and also hacked a MMAP 2204 | X_________________________ 2205 | X 2206 | X [2] Torek's discussion [Tor87] indicates that 2207 | Xdbm/ndbm implementations use the hash value to traverse 2208 | Xthe radix trie differently than sdbm and as a result, 2209 | Xthe page indexes are generated in different order. For 2210 | Xmore information, send e-mail to the author. 2211 | X [3] You cannot really hoard something that is avail- 2212 | Xable to the public at large, but try if it makes you 2213 | Xfeel any better. 2214 | X 2215 | X 2216 | X 2217 | X 2218 | X 2219 | X 2220 | X 2221 | X 2222 | X 2223 | X 2224 | X - 3 - 2225 | X 2226 | X 2227 | Xversion of sdbm), Arnold Robbins, Chris Lewis, Bill David- 2228 | Xsen, Henry Spencer, Geoff Collyer, Rich Salz (who got me 2229 | Xstarted in the first place), Johannes Ruschein (who did the 2230 | Xminix port) and David Tilbrook. I thank you all. 2231 | X 2232 | XDistribution Manifest and Notes 2233 | X 2234 | XThis distribution of sdbm includes (at least) the following: 2235 | X 2236 | X CHANGES change log 2237 | X README this file. 2238 | X biblio a small bibliography on external hashing 2239 | X dba.c a crude (n/s)dbm page file analyzer 2240 | X dbd.c a crude (n/s)dbm page file dumper (for conversion) 2241 | X dbe.1 man page for dbe.c 2242 | X dbe.c Janick's database editor 2243 | X dbm.c a dbm library emulation wrapper for ndbm/sdbm 2244 | X dbm.h header file for the above 2245 | X dbu.c a crude db management utility 2246 | X hash.c hashing function 2247 | X makefile guess. 2248 | X pair.c page-level routines (posted earlier) 2249 | X pair.h header file for the above 2250 | X readme.ms troff source for the README file 2251 | X sdbm.3 man page 2252 | X sdbm.c the real thing 2253 | X sdbm.h header file for the above 2254 | X tune.h place for tuning & portability thingies 2255 | X util.c miscellaneous 2256 | X 2257 | X dbu is a simple database manipulation program[4] that 2258 | Xtries to look like Bell Labs' cbt utility. It is currently 2259 | Xincomplete in functionality. I use dbu to test out the rou- 2260 | Xtines: it takes (from stdin) tab separated key/value pairs 2261 | Xfor commands like build or insert or takes keys for commands 2262 | Xlike delete or look. 2263 | X 2264 | X dbu dbmfile 2265 | X 2266 | X dba is a crude analyzer of dbm/sdbm/ndbm page files. It 2267 | Xscans the entire page file, reporting page level statistics, 2268 | Xand totals at the end. 2269 | X 2270 | X dbd is a crude dump program for dbm/ndbm/sdbm data- 2271 | Xbases. It ignores the bitmap, and dumps the data pages in 2272 | Xsequence. It can be used to create input for the dbu util- 2273 | Xity. Note that dbd will skip any NULLs in the key and data 2274 | Xfields, thus is unsuitable to convert some peculiar 2275 | X_________________________ 2276 | X 2277 | X [4] The dbd, dba, dbu utilities are quick hacks and 2278 | Xare not fit for production use. They were developed 2279 | Xlate one night, just to test out sdbm, and convert some 2280 | Xdatabases. 2281 | X 2282 | X 2283 | X 2284 | X 2285 | X 2286 | X 2287 | X 2288 | X 2289 | X 2290 | X - 4 - 2291 | X 2292 | X 2293 | Xdatabases that insist in including the terminating null. 2294 | X 2295 | X I have also included a copy of the dbe (ndbm DataBase 2296 | XEditor) by Janick Bergeron [janick@bnr.ca] for your pleas- 2297 | Xure. You may find it more useful than the little dbu util- 2298 | Xity. 2299 | X 2300 | X dbm.[ch] is a dbm library emulation on top of ndbm (and 2301 | Xhence suitable for sdbm). Written by Robert Elz. 2302 | X 2303 | X The sdbm library has been around in beta test for quite 2304 | Xa long time, and from whatever little feedback I received 2305 | X(maybe no news is good news), I believe it has been func- 2306 | Xtioning without any significant problems. I would, of 2307 | Xcourse, appreciate all fixes and/or improvements. Portabil- 2308 | Xity enhancements would especially be useful. 2309 | X 2310 | XImplementation Issues 2311 | X 2312 | X Hash functions: The algorithm behind sdbm implementa- 2313 | Xtion needs a good bit-scrambling hash function to be effec- 2314 | Xtive. I ran into a set of constants for a simple hash func- 2315 | Xtion that seem to help sdbm perform better than ndbm for 2316 | Xvarious inputs: 2317 | X 2318 | X /* 2319 | X * polynomial conversion ignoring overflows 2320 | X * 65599 nice. 65587 even better. 2321 | X */ 2322 | X long 2323 | X dbm_hash(char *str, int len) { 2324 | X register unsigned long n = 0; 2325 | X 2326 | X while (len--) 2327 | X n = n * 65599 + *str++; 2328 | X return n; 2329 | X } 2330 | X 2331 | X There may be better hash functions for the purposes of 2332 | Xdynamic hashing. Try your favorite, and check the pagefile. 2333 | XIf it contains too many pages with too many holes, (in rela- 2334 | Xtion to this one for example) or if sdbm simply stops work- 2335 | Xing (fails after SPLTMAX attempts to split) when you feed 2336 | Xyour NEWS history file to it, you probably do not have a 2337 | Xgood hashing function. If you do better (for different 2338 | Xtypes of input), I would like to know about the function you 2339 | Xuse. 2340 | X 2341 | X Block sizes: It seems (from various tests on a few 2342 | Xmachines) that a page file block size PBLKSIZ of 1024 is by 2343 | Xfar the best for performance, but this also happens to limit 2344 | Xthe size of a key/value pair. Depending on your needs, you 2345 | Xmay wish to increase the page size, and also adjust PAIRMAX 2346 | X(the maximum size of a key/value pair allowed: should always 2347 | X 2348 | X 2349 | X 2350 | X 2351 | X 2352 | X 2353 | X 2354 | X 2355 | X 2356 | X - 5 - 2357 | X 2358 | X 2359 | Xbe at least three words smaller than PBLKSIZ.) accordingly. 2360 | XThe system-wide version of the library should probably be 2361 | Xconfigured with 1024 (distribution default), as this appears 2362 | Xto be sufficient for most common uses of sdbm. 2363 | X 2364 | XPortability 2365 | X 2366 | X This package has been tested in many different UN*Xes 2367 | Xeven including minix, and appears to be reasonably portable. 2368 | XThis does not mean it will port easily to non-UN*X systems. 2369 | X 2370 | XNotes and Miscellaneous 2371 | X 2372 | X The sdbm is not a very complicated package, at least 2373 | Xnot after you familiarize yourself with the literature on 2374 | Xexternal hashing. There are other interesting algorithms in 2375 | Xexistence that ensure (approximately) single-read access to 2376 | Xa data value associated with any key. These are directory- 2377 | Xless schemes such as linear hashing [Lit80] (+ Larson varia- 2378 | Xtions), spiral storage [Mar79] or directory schemes such as 2379 | Xextensible hashing [Fag79] by Fagin et al. I do hope these 2380 | Xsources provide a reasonable playground for experimentation 2381 | Xwith other algorithms. See the June 1988 issue of ACM Com- 2382 | Xputing Surveys [Enb88] for an excellent overview of the 2383 | Xfield. 2384 | X 2385 | XReferences 2386 | X 2387 | X 2388 | X[Lar78] 2389 | X P.-A. Larson, ``Dynamic Hashing'', BIT, vol. 18, pp. 2390 | X 184-201, 1978. 2391 | X 2392 | X[Tho90] 2393 | X Ken Thompson, private communication, Nov. 1990 2394 | X 2395 | X[Lit80] 2396 | X W. Litwin, `` Linear Hashing: A new tool for file and 2397 | X table addressing'', Proceedings of the 6th Conference on 2398 | X Very Large Dabatases (Montreal), pp. 212-223, Very 2399 | X Large Database Foundation, Saratoga, Calif., 1980. 2400 | X 2401 | X[Fag79] 2402 | X R. Fagin, J. Nievergelt, N. Pippinger, and H. R. 2403 | X Strong, ``Extendible Hashing - A Fast Access Method for 2404 | X Dynamic Files'', ACM Trans. Database Syst., vol. 4, 2405 | X no.3, pp. 315-344, Sept. 1979. 2406 | X 2407 | X[Wal84] 2408 | X Rich Wales, ``Discussion of "dbm" data base system'', 2409 | X USENET newsgroup unix.wizards, Jan. 1984. 2410 | X 2411 | X[Tor87] 2412 | X Chris Torek, ``Re: dbm.a and ndbm.a archives'', 2413 | X 2414 | X 2415 | X 2416 | X 2417 | X 2418 | X 2419 | X 2420 | X 2421 | X 2422 | X - 6 - 2423 | X 2424 | X 2425 | X USENET newsgroup comp.unix, 1987. 2426 | X 2427 | X[Mar79] 2428 | X G. N. Martin, ``Spiral Storage: Incrementally Augment- 2429 | X able Hash Addressed Storage'', Technical Report #27, 2430 | X University of Varwick, Coventry, U.K., 1979. 2431 | X 2432 | X[Enb88] 2433 | X R. J. Enbody and H. C. Du, ``Dynamic Hashing 2434 | X Schemes'',ACM Computing Surveys, vol. 20, no. 2, pp. 2435 | X 85-113, June 1988. 2436 | X 2437 | X 2438 | X 2439 | X 2440 | X 2441 | X 2442 | X 2443 | X 2444 | X 2445 | X 2446 | X 2447 | X 2448 | X 2449 | X 2450 | X 2451 | X 2452 | X 2453 | X 2454 | X 2455 | X 2456 | X 2457 | X 2458 | X 2459 | X 2460 | X 2461 | X 2462 | X 2463 | X 2464 | X 2465 | X 2466 | X 2467 | X 2468 | X 2469 | X 2470 | X 2471 | X 2472 | X 2473 | X 2474 | X 2475 | X 2476 | X 2477 | X 2478 | X 2479 | X 2480 | X 2481 | X 2482 | X 2483 | X 2484 | X 2485 | @@@End of readme.txt 2486 | echo x - sdbm.3 1>&2 2487 | sed 's/^X//' >sdbm.3 <<'@@@End of sdbm.3' 2488 | X.\" $Id: sdbm.3,v 1.2 90/12/13 13:00:57 oz Exp $ 2489 | X.TH SDBM 3 "1 March 1990" 2490 | X.SH NAME 2491 | Xsdbm, dbm_open, dbm_prep, dbm_close, dbm_fetch, dbm_store, dbm_delete, dbm_firstkey, dbm_nextkey, dbm_hash, dbm_rdonly, dbm_error, dbm_clearerr, dbm_dirfno, dbm_pagfno \- data base subroutines 2492 | X.SH SYNOPSIS 2493 | X.nf 2494 | X.ft B 2495 | X#include 2496 | X.sp 2497 | Xtypedef struct { 2498 | X char *dptr; 2499 | X int dsize; 2500 | X} datum; 2501 | X.sp 2502 | Xdatum nullitem = { NULL, 0 }; 2503 | X.sp 2504 | X\s-1DBM\s0 *dbm_open(char *file, int flags, int mode) 2505 | X.sp 2506 | X\s-1DBM\s0 *dbm_prep(char *dirname, char *pagname, int flags, int mode) 2507 | X.sp 2508 | Xvoid dbm_close(\s-1DBM\s0 *db) 2509 | X.sp 2510 | Xdatum dbm_fetch(\s-1DBM\s0 *db, key) 2511 | X.sp 2512 | Xint dbm_store(\s-1DBM\s0 *db, datum key, datum val, int flags) 2513 | X.sp 2514 | Xint dbm_delete(\s-1DBM\s0 *db, datum key) 2515 | X.sp 2516 | Xdatum dbm_firstkey(\s-1DBM\s0 *db) 2517 | X.sp 2518 | Xdatum dbm_nextkey(\s-1DBM\s0 *db) 2519 | X.sp 2520 | Xlong dbm_hash(char *string, int len) 2521 | X.sp 2522 | Xint dbm_rdonly(\s-1DBM\s0 *db) 2523 | Xint dbm_error(\s-1DBM\s0 *db) 2524 | Xdbm_clearerr(\s-1DBM\s0 *db) 2525 | Xint dbm_dirfno(\s-1DBM\s0 *db) 2526 | Xint dbm_pagfno(\s-1DBM\s0 *db) 2527 | X.ft R 2528 | X.fi 2529 | X.SH DESCRIPTION 2530 | X.IX "database library" sdbm "" "\fLsdbm\fR" 2531 | X.IX dbm_open "" "\fLdbm_open\fR \(em open \fLsdbm\fR database" 2532 | X.IX dbm_prep "" "\fLdbm_prep\fR \(em prepare \fLsdbm\fR database" 2533 | X.IX dbm_close "" "\fLdbm_close\fR \(em close \fLsdbm\fR routine" 2534 | X.IX dbm_fetch "" "\fLdbm_fetch\fR \(em fetch \fLsdbm\fR database data" 2535 | X.IX dbm_store "" "\fLdbm_store\fR \(em add data to \fLsdbm\fR database" 2536 | X.IX dbm_delete "" "\fLdbm_delete\fR \(em remove data from \fLsdbm\fR database" 2537 | X.IX dbm_firstkey "" "\fLdbm_firstkey\fR \(em access \fLsdbm\fR database" 2538 | X.IX dbm_nextkey "" "\fLdbm_nextkey\fR \(em access \fLsdbm\fR database" 2539 | X.IX dbm_hash "" "\fLdbm_hash\fR \(em string hash for \fLsdbm\fR database" 2540 | X.IX dbm_rdonly "" "\fLdbm_rdonly\fR \(em return \fLsdbm\fR database read-only mode" 2541 | X.IX dbm_error "" "\fLdbm_error\fR \(em return \fLsdbm\fR database error condition" 2542 | X.IX dbm_clearerr "" "\fLdbm_clearerr\fR \(em clear \fLsdbm\fR database error condition" 2543 | X.IX dbm_dirfno "" "\fLdbm_dirfno\fR \(em return \fLsdbm\fR database bitmap file descriptor" 2544 | X.IX dbm_pagfno "" "\fLdbm_pagfno\fR \(em return \fLsdbm\fR database data file descriptor" 2545 | X.IX "database functions \(em \fLsdbm\fR" dbm_open "" \fLdbm_open\fP 2546 | X.IX "database functions \(em \fLsdbm\fR" dbm_prep "" \fLdbm_prep\fP 2547 | X.IX "database functions \(em \fLsdbm\fR" dbm_close "" \fLdbm_close\fP 2548 | X.IX "database functions \(em \fLsdbm\fR" dbm_fetch "" \fLdbm_fetch\fP 2549 | X.IX "database functions \(em \fLsdbm\fR" dbm_store "" \fLdbm_store\fP 2550 | X.IX "database functions \(em \fLsdbm\fR" dbm_delete "" \fLdbm_delete\fP 2551 | X.IX "database functions \(em \fLsdbm\fR" dbm_firstkey "" \fLdbm_firstkey\fP 2552 | X.IX "database functions \(em \fLsdbm\fR" dbm_nextkey "" \fLdbm_nextkey\fP 2553 | X.IX "database functions \(em \fLsdbm\fR" dbm_rdonly "" \fLdbm_rdonly\fP 2554 | X.IX "database functions \(em \fLsdbm\fR" dbm_error "" \fLdbm_error\fP 2555 | X.IX "database functions \(em \fLsdbm\fR" dbm_clearerr "" \fLdbm_clearerr\fP 2556 | X.IX "database functions \(em \fLsdbm\fR" dbm_dirfno "" \fLdbm_dirfno\fP 2557 | X.IX "database functions \(em \fLsdbm\fR" dbm_pagfno "" \fLdbm_pagfno\fP 2558 | X.LP 2559 | XThis package allows an application to maintain a mapping of pairs 2560 | Xin disk files. This is not to be considered a real database system, but is 2561 | Xstill useful in many simple applications built around fast retrieval of a data 2562 | Xvalue from a key. This implementation uses an external hashing scheme, 2563 | Xcalled Dynamic Hashing, as described by Per-Aake Larson in BIT 18 (1978) pp. 2564 | X184-201. Retrieval of any item usually requires a single disk access. 2565 | XThe application interface is compatible with the 2566 | X.IR ndbm (3) 2567 | Xlibrary. 2568 | X.LP 2569 | XAn 2570 | X.B sdbm 2571 | Xdatabase is kept in two files usually given the extensions 2572 | X.B \.dir 2573 | Xand 2574 | X.BR \.pag . 2575 | XThe 2576 | X.B \.dir 2577 | Xfile contains a bitmap representing a forest of binary hash trees, the leaves 2578 | Xof which indicate data pages in the 2579 | X.B \.pag 2580 | Xfile. 2581 | X.LP 2582 | XThe application interface uses the 2583 | X.B datum 2584 | Xstructure to describe both 2585 | X.I keys 2586 | Xand 2587 | X.IR value s. 2588 | XA 2589 | X.B datum 2590 | Xspecifies a byte sequence of 2591 | X.I dsize 2592 | Xsize pointed to by 2593 | X.IR dptr . 2594 | XIf you use 2595 | X.SM ASCII 2596 | Xstrings as 2597 | X.IR key s 2598 | Xor 2599 | X.IR value s, 2600 | Xthen you must decide whether or not to include the terminating 2601 | X.SM NUL 2602 | Xbyte which sometimes defines strings. Including it will require larger 2603 | Xdatabase files, but it will be possible to get sensible output from a 2604 | X.IR strings (1) 2605 | Xcommand applied to the data file. 2606 | X.LP 2607 | XIn order to allow a process using this package to manipulate multiple 2608 | Xdatabases, the applications interface always requires a 2609 | X.IR handle , 2610 | Xa 2611 | X.BR "DBM *" , 2612 | Xto identify the database to be manipulated. Such a handle can be obtained 2613 | Xfrom the only routines that do not require it, namely 2614 | X.BR dbm_open (\|) 2615 | Xor 2616 | X.BR dbm_prep (\|). 2617 | XEither of these will open or create the two necessary files. The 2618 | Xdifference is that the latter allows explicitly naming the bitmap and data 2619 | Xfiles whereas 2620 | X.BR dbm_open (\|) 2621 | Xwill take a base file name and call 2622 | X.BR dbm_prep (\|) 2623 | Xwith the default extensions. 2624 | XThe 2625 | X.I flags 2626 | Xand 2627 | X.I mode 2628 | Xparameters are the same as for 2629 | X.BR open (2). 2630 | X.LP 2631 | XTo free the resources occupied while a database handle is active, call 2632 | X.BR dbm_close (\|). 2633 | X.LP 2634 | XGiven a handle, one can retrieve data associated with a key by using the 2635 | X.BR dbm_fetch (\|) 2636 | Xroutine, and associate data with a key by using the 2637 | X.BR dbm_store (\|) 2638 | Xroutine. 2639 | X.LP 2640 | XThe values of the 2641 | X.I flags 2642 | Xparameter for 2643 | X.BR dbm_store (\|) 2644 | Xcan be either 2645 | X.BR \s-1DBM_INSERT\s0 , 2646 | Xwhich will not change an existing entry with the same key, or 2647 | X.BR \s-1DBM_REPLACE\s0 , 2648 | Xwhich will replace an existing entry with the same key. 2649 | XKeys are unique within the database. 2650 | X.LP 2651 | XTo delete a key and its associated value use the 2652 | X.BR dbm_delete (\|) 2653 | Xroutine. 2654 | X.LP 2655 | XTo retrieve every key in the database, use a loop like: 2656 | X.sp 2657 | X.nf 2658 | X.ft B 2659 | Xfor (key = dbm_firstkey(db); key.dptr != NULL; key = dbm_nextkey(db)) 2660 | X ; 2661 | X.ft R 2662 | X.fi 2663 | X.LP 2664 | XThe order of retrieval is unspecified. 2665 | X.LP 2666 | XIf you determine that the performance of the database is inadequate or 2667 | Xyou notice clustering or other effects that may be due to the hashing 2668 | Xalgorithm used by this package, you can override it by supplying your 2669 | Xown 2670 | X.BR dbm_hash (\|) 2671 | Xroutine. Doing so will make the database unintelligable to any other 2672 | Xapplications that do not use your specialized hash function. 2673 | X.sp 2674 | X.LP 2675 | XThe following macros are defined in the header file: 2676 | X.IP 2677 | X.BR dbm_rdonly (\|) 2678 | Xreturns true if the database has been opened read\-only. 2679 | X.IP 2680 | X.BR dbm_error (\|) 2681 | Xreturns true if an I/O error has occurred. 2682 | X.IP 2683 | X.BR dbm_clearerr (\|) 2684 | Xallows you to clear the error flag if you think you know what the error 2685 | Xwas and insist on ignoring it. 2686 | X.IP 2687 | X.BR dbm_dirfno (\|) 2688 | Xreturns the file descriptor associated with the bitmap file. 2689 | X.IP 2690 | X.BR dbm_pagfno (\|) 2691 | Xreturns the file descriptor associated with the data file. 2692 | X.SH SEE ALSO 2693 | X.IR open (2). 2694 | X.SH DIAGNOSTICS 2695 | XFunctions that return a 2696 | X.B "DBM *" 2697 | Xhandle will use 2698 | X.SM NULL 2699 | Xto indicate an error. 2700 | XFunctions that return an 2701 | X.B int 2702 | Xwill use \-1 to indicate an error. The normal return value in that case is 0. 2703 | XFunctions that return a 2704 | X.B datum 2705 | Xwill return 2706 | X.B nullitem 2707 | Xto indicate an error. 2708 | X.LP 2709 | XAs a special case of 2710 | X.BR dbm_store (\|), 2711 | Xif it is called with the 2712 | X.B \s-1DBM_INSERT\s0 2713 | Xflag and the key already exists in the database, the return value will be 1. 2714 | X.LP 2715 | XIn general, if a function parameter is invalid, 2716 | X.B errno 2717 | Xwill be set to 2718 | X.BR \s-1EINVAL\s0 . 2719 | XIf a write operation is requested on a read-only database, 2720 | X.B errno 2721 | Xwill be set to 2722 | X.BR \s-1ENOPERM\s0 . 2723 | XIf a memory allocation (using 2724 | X.IR malloc (3)) 2725 | Xfailed, 2726 | X.B errno 2727 | Xwill be set to 2728 | X.BR \s-1ENOMEM\s0 . 2729 | XFor I/O operation failures 2730 | X.B errno 2731 | Xwill contain the value set by the relevant failed system call, either 2732 | X.IR read (2), 2733 | X.IR write (2), 2734 | Xor 2735 | X.IR lseek (2). 2736 | X.SH AUTHOR 2737 | X.IP "Ozan S. Yigit" (oz@nexus.yorku.ca) 2738 | X.SH BUGS 2739 | XThe sum of key and value data sizes must not exceed 2740 | X.B \s-1PAIRMAX\s0 2741 | X(1008 bytes). 2742 | X.LP 2743 | XThe sum of the key and value data sizes where several keys hash to the 2744 | Xsame value must fit within one bitmap page. 2745 | X.LP 2746 | XThe 2747 | X.B \.pag 2748 | Xfile will contain holes, so its apparent size is larger than its contents. 2749 | XWhen copied through the filesystem the holes will be filled. 2750 | X.LP 2751 | XThe contents of 2752 | X.B datum 2753 | Xvalues returned are in volatile storage. If you want to retain the values 2754 | Xpointed to, you must copy them immediately before another call to this package. 2755 | X.LP 2756 | XThe only safe way for multiple processes to (read and) update a database at 2757 | Xthe same time, is to implement a private locking scheme outside this package 2758 | Xand open and close the database between lock acquisitions. It is safe for 2759 | Xmultiple processes to concurrently access a database read-only. 2760 | X.SH APPLICATIONS PORTABILITY 2761 | XFor complete source code compatibility with the Berkeley Unix 2762 | X.IR ndbm (3) 2763 | Xlibrary, the 2764 | X.B sdbm.h 2765 | Xheader file should be installed in 2766 | X.BR /usr/include/ndbm.h . 2767 | X.LP 2768 | XThe 2769 | X.B nullitem 2770 | Xdata item, and the 2771 | X.BR dbm_prep (\|), 2772 | X.BR dbm_hash (\|), 2773 | X.BR dbm_rdonly (\|), 2774 | X.BR dbm_dirfno (\|), 2775 | Xand 2776 | X.BR dbm_pagfno (\|) 2777 | Xfunctions are unique to this package. 2778 | @@@End of sdbm.3 2779 | echo x - sdbm.c 1>&2 2780 | sed 's/^X//' >sdbm.c <<'@@@End of sdbm.c' 2781 | X/* 2782 | X * sdbm - ndbm work-alike hashed database library 2783 | X * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978). 2784 | X * author: oz@nexus.yorku.ca 2785 | X * status: public domain. 2786 | X * 2787 | X * core routines 2788 | X */ 2789 | X 2790 | X#ifndef lint 2791 | Xstatic char rcsid[] = "$Id: sdbm.c,v 1.16 90/12/13 13:01:31 oz Exp $"; 2792 | X#endif 2793 | X 2794 | X#include "sdbm.h" 2795 | X#include "tune.h" 2796 | X#include "pair.h" 2797 | X 2798 | X#include 2799 | X#include 2800 | X#ifdef BSD42 2801 | X#include 2802 | X#else 2803 | X#include 2804 | X#include 2805 | X#endif 2806 | X#include 2807 | X#include 2808 | X 2809 | X#ifdef __STDC__ 2810 | X#include 2811 | X#endif 2812 | X 2813 | X#ifndef NULL 2814 | X#define NULL 0 2815 | X#endif 2816 | X 2817 | X/* 2818 | X * externals 2819 | X */ 2820 | X#ifndef sun 2821 | Xextern int errno; 2822 | X#endif 2823 | X 2824 | Xextern char *malloc proto((unsigned int)); 2825 | Xextern void free proto((void *)); 2826 | Xextern long lseek(); 2827 | X 2828 | X/* 2829 | X * forward 2830 | X */ 2831 | Xstatic int getdbit proto((DBM *, long)); 2832 | Xstatic int setdbit proto((DBM *, long)); 2833 | Xstatic int getpage proto((DBM *, long)); 2834 | Xstatic datum getnext proto((DBM *)); 2835 | Xstatic int makroom proto((DBM *, long, int)); 2836 | X 2837 | X/* 2838 | X * useful macros 2839 | X */ 2840 | X#define bad(x) ((x).dptr == NULL || (x).dsize <= 0) 2841 | X#define exhash(item) dbm_hash((item).dptr, (item).dsize) 2842 | X#define ioerr(db) ((db)->flags |= DBM_IOERR) 2843 | X 2844 | X#define OFF_PAG(off) (long) (off) * PBLKSIZ 2845 | X#define OFF_DIR(off) (long) (off) * DBLKSIZ 2846 | X 2847 | Xstatic long masks[] = { 2848 | X 000000000000, 000000000001, 000000000003, 000000000007, 2849 | X 000000000017, 000000000037, 000000000077, 000000000177, 2850 | X 000000000377, 000000000777, 000000001777, 000000003777, 2851 | X 000000007777, 000000017777, 000000037777, 000000077777, 2852 | X 000000177777, 000000377777, 000000777777, 000001777777, 2853 | X 000003777777, 000007777777, 000017777777, 000037777777, 2854 | X 000077777777, 000177777777, 000377777777, 000777777777, 2855 | X 001777777777, 003777777777, 007777777777, 017777777777 2856 | X}; 2857 | X 2858 | Xdatum nullitem = {NULL, 0}; 2859 | X 2860 | XDBM * 2861 | Xdbm_open(file, flags, mode) 2862 | Xregister char *file; 2863 | Xregister int flags; 2864 | Xregister int mode; 2865 | X{ 2866 | X register DBM *db; 2867 | X register char *dirname; 2868 | X register char *pagname; 2869 | X register int n; 2870 | X 2871 | X if (file == NULL || !*file) 2872 | X return errno = EINVAL, (DBM *) NULL; 2873 | X/* 2874 | X * need space for two seperate filenames 2875 | X */ 2876 | X n = strlen(file) * 2 + strlen(DIRFEXT) + strlen(PAGFEXT) + 2; 2877 | X 2878 | X if ((dirname = malloc((unsigned) n)) == NULL) 2879 | X return errno = ENOMEM, (DBM *) NULL; 2880 | X/* 2881 | X * build the file names 2882 | X */ 2883 | X dirname = strcat(strcpy(dirname, file), DIRFEXT); 2884 | X pagname = strcpy(dirname + strlen(dirname) + 1, file); 2885 | X pagname = strcat(pagname, PAGFEXT); 2886 | X 2887 | X db = dbm_prep(dirname, pagname, flags, mode); 2888 | X free((char *) dirname); 2889 | X return db; 2890 | X} 2891 | X 2892 | XDBM * 2893 | Xdbm_prep(dirname, pagname, flags, mode) 2894 | Xchar *dirname; 2895 | Xchar *pagname; 2896 | Xint flags; 2897 | Xint mode; 2898 | X{ 2899 | X register DBM *db; 2900 | X struct stat dstat; 2901 | X 2902 | X if ((db = (DBM *) malloc(sizeof(DBM))) == NULL) 2903 | X return errno = ENOMEM, (DBM *) NULL; 2904 | X 2905 | X db->flags = 0; 2906 | X db->hmask = 0; 2907 | X db->blkptr = 0; 2908 | X db->keyptr = 0; 2909 | X/* 2910 | X * adjust user flags so that WRONLY becomes RDWR, 2911 | X * as required by this package. Also set our internal 2912 | X * flag for RDONLY if needed. 2913 | X */ 2914 | X if (flags & O_WRONLY) 2915 | X flags = (flags & ~O_WRONLY) | O_RDWR; 2916 | X 2917 | X else if ((flags & 03) == O_RDONLY) 2918 | X db->flags = DBM_RDONLY; 2919 | X/* 2920 | X * open the files in sequence, and stat the dirfile. 2921 | X * If we fail anywhere, undo everything, return NULL. 2922 | X */ 2923 | X if ((db->pagf = open(pagname, flags, mode)) > -1) { 2924 | X if ((db->dirf = open(dirname, flags, mode)) > -1) { 2925 | X/* 2926 | X * need the dirfile size to establish max bit number. 2927 | X */ 2928 | X if (fstat(db->dirf, &dstat) == 0) { 2929 | X/* 2930 | X * zero size: either a fresh database, or one with a single, 2931 | X * unsplit data page: dirpage is all zeros. 2932 | X */ 2933 | X db->dirbno = (!dstat.st_size) ? 0 : -1; 2934 | X db->pagbno = -1; 2935 | X db->maxbno = dstat.st_size * BYTESIZ; 2936 | X 2937 | X (void) memset(db->pagbuf, 0, PBLKSIZ); 2938 | X (void) memset(db->dirbuf, 0, DBLKSIZ); 2939 | X /* 2940 | X * success 2941 | X */ 2942 | X return db; 2943 | X } 2944 | X (void) close(db->dirf); 2945 | X } 2946 | X (void) close(db->pagf); 2947 | X } 2948 | X free((char *) db); 2949 | X return (DBM *) NULL; 2950 | X} 2951 | X 2952 | Xvoid 2953 | Xdbm_close(db) 2954 | Xregister DBM *db; 2955 | X{ 2956 | X if (db == NULL) 2957 | X errno = EINVAL; 2958 | X else { 2959 | X (void) close(db->dirf); 2960 | X (void) close(db->pagf); 2961 | X free((char *) db); 2962 | X } 2963 | X} 2964 | X 2965 | Xdatum 2966 | Xdbm_fetch(db, key) 2967 | Xregister DBM *db; 2968 | Xdatum key; 2969 | X{ 2970 | X if (db == NULL || bad(key)) 2971 | X return errno = EINVAL, nullitem; 2972 | X 2973 | X if (getpage(db, exhash(key))) 2974 | X return getpair(db->pagbuf, key); 2975 | X 2976 | X return ioerr(db), nullitem; 2977 | X} 2978 | X 2979 | Xint 2980 | Xdbm_delete(db, key) 2981 | Xregister DBM *db; 2982 | Xdatum key; 2983 | X{ 2984 | X if (db == NULL || bad(key)) 2985 | X return errno = EINVAL, -1; 2986 | X if (dbm_rdonly(db)) 2987 | X return errno = EPERM, -1; 2988 | X 2989 | X if (getpage(db, exhash(key))) { 2990 | X if (!delpair(db->pagbuf, key)) 2991 | X return -1; 2992 | X/* 2993 | X * update the page file 2994 | X */ 2995 | X if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 2996 | X || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) 2997 | X return ioerr(db), -1; 2998 | X 2999 | X return 0; 3000 | X } 3001 | X 3002 | X return ioerr(db), -1; 3003 | X} 3004 | X 3005 | Xint 3006 | Xdbm_store(db, key, val, flags) 3007 | Xregister DBM *db; 3008 | Xdatum key; 3009 | Xdatum val; 3010 | Xint flags; 3011 | X{ 3012 | X int need; 3013 | X register long hash; 3014 | X 3015 | X if (db == NULL || bad(key)) 3016 | X return errno = EINVAL, -1; 3017 | X if (dbm_rdonly(db)) 3018 | X return errno = EPERM, -1; 3019 | X 3020 | X need = key.dsize + val.dsize; 3021 | X/* 3022 | X * is the pair too big (or too small) for this database ?? 3023 | X */ 3024 | X if (need < 0 || need > PAIRMAX) 3025 | X return errno = EINVAL, -1; 3026 | X 3027 | X if (getpage(db, (hash = exhash(key)))) { 3028 | X/* 3029 | X * if we need to replace, delete the key/data pair 3030 | X * first. If it is not there, ignore. 3031 | X */ 3032 | X if (flags == DBM_REPLACE) 3033 | X (void) delpair(db->pagbuf, key); 3034 | X#ifdef SEEDUPS 3035 | X else if (duppair(db->pagbuf, key)) 3036 | X return 1; 3037 | X#endif 3038 | X/* 3039 | X * if we do not have enough room, we have to split. 3040 | X */ 3041 | X if (!fitpair(db->pagbuf, need)) 3042 | X if (!makroom(db, hash, need)) 3043 | X return ioerr(db), -1; 3044 | X/* 3045 | X * we have enough room or split is successful. insert the key, 3046 | X * and update the page file. 3047 | X */ 3048 | X (void) putpair(db->pagbuf, key, val); 3049 | X 3050 | X if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 3051 | X || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) 3052 | X return ioerr(db), -1; 3053 | X /* 3054 | X * success 3055 | X */ 3056 | X return 0; 3057 | X } 3058 | X 3059 | X return ioerr(db), -1; 3060 | X} 3061 | X 3062 | X/* 3063 | X * makroom - make room by splitting the overfull page 3064 | X * this routine will attempt to make room for SPLTMAX times before 3065 | X * giving up. 3066 | X */ 3067 | Xstatic int 3068 | Xmakroom(db, hash, need) 3069 | Xregister DBM *db; 3070 | Xlong hash; 3071 | Xint need; 3072 | X{ 3073 | X long newp; 3074 | X char twin[PBLKSIZ]; 3075 | X char *pag = db->pagbuf; 3076 | X char *new = twin; 3077 | X register int smax = SPLTMAX; 3078 | X 3079 | X do { 3080 | X/* 3081 | X * split the current page 3082 | X */ 3083 | X (void) splpage(pag, new, db->hmask + 1); 3084 | X/* 3085 | X * address of the new page 3086 | X */ 3087 | X newp = (hash & db->hmask) | (db->hmask + 1); 3088 | X 3089 | X/* 3090 | X * write delay, read avoidence/cache shuffle: 3091 | X * select the page for incoming pair: if key is to go to the new page, 3092 | X * write out the previous one, and copy the new one over, thus making 3093 | X * it the current page. If not, simply write the new page, and we are 3094 | X * still looking at the page of interest. current page is not updated 3095 | X * here, as dbm_store will do so, after it inserts the incoming pair. 3096 | X */ 3097 | X if (hash & (db->hmask + 1)) { 3098 | X if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 3099 | X || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) 3100 | X return 0; 3101 | X db->pagbno = newp; 3102 | X (void) memcpy(pag, new, PBLKSIZ); 3103 | X } 3104 | X else if (lseek(db->pagf, OFF_PAG(newp), SEEK_SET) < 0 3105 | X || write(db->pagf, new, PBLKSIZ) < 0) 3106 | X return 0; 3107 | X 3108 | X if (!setdbit(db, db->curbit)) 3109 | X return 0; 3110 | X/* 3111 | X * see if we have enough room now 3112 | X */ 3113 | X if (fitpair(pag, need)) 3114 | X return 1; 3115 | X/* 3116 | X * try again... update curbit and hmask as getpage would have 3117 | X * done. because of our update of the current page, we do not 3118 | X * need to read in anything. BUT we have to write the current 3119 | X * [deferred] page out, as the window of failure is too great. 3120 | X */ 3121 | X db->curbit = 2 * db->curbit + 3122 | X ((hash & (db->hmask + 1)) ? 2 : 1); 3123 | X db->hmask |= db->hmask + 1; 3124 | X 3125 | X if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 3126 | X || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) 3127 | X return 0; 3128 | X 3129 | X } while (--smax); 3130 | X/* 3131 | X * if we are here, this is real bad news. After SPLTMAX splits, 3132 | X * we still cannot fit the key. say goodnight. 3133 | X */ 3134 | X#ifdef BADMESS 3135 | X (void) write(2, "sdbm: cannot insert after SPLTMAX attempts.\n", 44); 3136 | X#endif 3137 | X return 0; 3138 | X 3139 | X} 3140 | X 3141 | X/* 3142 | X * the following two routines will break if 3143 | X * deletions aren't taken into account. (ndbm bug) 3144 | X */ 3145 | Xdatum 3146 | Xdbm_firstkey(db) 3147 | Xregister DBM *db; 3148 | X{ 3149 | X if (db == NULL) 3150 | X return errno = EINVAL, nullitem; 3151 | X/* 3152 | X * start at page 0 3153 | X */ 3154 | X if (lseek(db->pagf, OFF_PAG(0), SEEK_SET) < 0 3155 | X || read(db->pagf, db->pagbuf, PBLKSIZ) < 0) 3156 | X return ioerr(db), nullitem; 3157 | X db->pagbno = 0; 3158 | X db->blkptr = 0; 3159 | X db->keyptr = 0; 3160 | X 3161 | X return getnext(db); 3162 | X} 3163 | X 3164 | Xdatum 3165 | Xdbm_nextkey(db) 3166 | Xregister DBM *db; 3167 | X{ 3168 | X if (db == NULL) 3169 | X return errno = EINVAL, nullitem; 3170 | X return getnext(db); 3171 | X} 3172 | X 3173 | X/* 3174 | X * all important binary trie traversal 3175 | X */ 3176 | Xstatic int 3177 | Xgetpage(db, hash) 3178 | Xregister DBM *db; 3179 | Xregister long hash; 3180 | X{ 3181 | X register int hbit; 3182 | X register long dbit; 3183 | X register long pagb; 3184 | X 3185 | X dbit = 0; 3186 | X hbit = 0; 3187 | X while (dbit < db->maxbno && getdbit(db, dbit)) 3188 | X dbit = 2 * dbit + ((hash & (1 << hbit++)) ? 2 : 1); 3189 | X 3190 | X debug(("dbit: %d...", dbit)); 3191 | X 3192 | X db->curbit = dbit; 3193 | X db->hmask = masks[hbit]; 3194 | X 3195 | X pagb = hash & db->hmask; 3196 | X/* 3197 | X * see if the block we need is already in memory. 3198 | X * note: this lookaside cache has about 10% hit rate. 3199 | X */ 3200 | X if (pagb != db->pagbno) { 3201 | X/* 3202 | X * note: here, we assume a "hole" is read as 0s. 3203 | X * if not, must zero pagbuf first. 3204 | X */ 3205 | X if (lseek(db->pagf, OFF_PAG(pagb), SEEK_SET) < 0 3206 | X || read(db->pagf, db->pagbuf, PBLKSIZ) < 0) 3207 | X return 0; 3208 | X if (!chkpage(db->pagbuf)) 3209 | X return 0; 3210 | X db->pagbno = pagb; 3211 | X 3212 | X debug(("pag read: %d\n", pagb)); 3213 | X } 3214 | X return 1; 3215 | X} 3216 | X 3217 | Xstatic int 3218 | Xgetdbit(db, dbit) 3219 | Xregister DBM *db; 3220 | Xregister long dbit; 3221 | X{ 3222 | X register long c; 3223 | X register long dirb; 3224 | X 3225 | X c = dbit / BYTESIZ; 3226 | X dirb = c / DBLKSIZ; 3227 | X 3228 | X if (dirb != db->dirbno) { 3229 | X if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0 3230 | X || read(db->dirf, db->dirbuf, DBLKSIZ) < 0) 3231 | X return 0; 3232 | X db->dirbno = dirb; 3233 | X 3234 | X debug(("dir read: %d\n", dirb)); 3235 | X } 3236 | X 3237 | X return db->dirbuf[c % DBLKSIZ] & (1 << dbit % BYTESIZ); 3238 | X} 3239 | X 3240 | Xstatic int 3241 | Xsetdbit(db, dbit) 3242 | Xregister DBM *db; 3243 | Xregister long dbit; 3244 | X{ 3245 | X register long c; 3246 | X register long dirb; 3247 | X 3248 | X c = dbit / BYTESIZ; 3249 | X dirb = c / DBLKSIZ; 3250 | X 3251 | X if (dirb != db->dirbno) { 3252 | X if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0 3253 | X || read(db->dirf, db->dirbuf, DBLKSIZ) < 0) 3254 | X return 0; 3255 | X db->dirbno = dirb; 3256 | X 3257 | X debug(("dir read: %d\n", dirb)); 3258 | X } 3259 | X 3260 | X db->dirbuf[c % DBLKSIZ] |= (1 << dbit % BYTESIZ); 3261 | X 3262 | X if (dbit >= db->maxbno) 3263 | X db->maxbno += DBLKSIZ * BYTESIZ; 3264 | X 3265 | X if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0 3266 | X || write(db->dirf, db->dirbuf, DBLKSIZ) < 0) 3267 | X return 0; 3268 | X 3269 | X return 1; 3270 | X} 3271 | X 3272 | X/* 3273 | X * getnext - get the next key in the page, and if done with 3274 | X * the page, try the next page in sequence 3275 | X */ 3276 | Xstatic datum 3277 | Xgetnext(db) 3278 | Xregister DBM *db; 3279 | X{ 3280 | X datum key; 3281 | X 3282 | X for (;;) { 3283 | X db->keyptr++; 3284 | X key = getnkey(db->pagbuf, db->keyptr); 3285 | X if (key.dptr != NULL) 3286 | X return key; 3287 | X/* 3288 | X * we either run out, or there is nothing on this page.. 3289 | X * try the next one... If we lost our position on the 3290 | X * file, we will have to seek. 3291 | X */ 3292 | X db->keyptr = 0; 3293 | X if (db->pagbno != db->blkptr++) 3294 | X if (lseek(db->pagf, OFF_PAG(db->blkptr), SEEK_SET) < 0) 3295 | X break; 3296 | X db->pagbno = db->blkptr; 3297 | X if (read(db->pagf, db->pagbuf, PBLKSIZ) <= 0) 3298 | X break; 3299 | X if (!chkpage(db->pagbuf)) 3300 | X break; 3301 | X } 3302 | X 3303 | X return ioerr(db), nullitem; 3304 | X} 3305 | @@@End of sdbm.c 3306 | echo x - sdbm.h 1>&2 3307 | sed 's/^X//' >sdbm.h <<'@@@End of sdbm.h' 3308 | X/* 3309 | X * sdbm - ndbm work-alike hashed database library 3310 | X * based on Per-Ake Larson's Dynamic Hashing algorithms. BIT 18 (1978). 3311 | X * author: oz@nexus.yorku.ca 3312 | X * status: public domain. 3313 | X */ 3314 | X#define DBLKSIZ 4096 3315 | X#define PBLKSIZ 1024 3316 | X#define PAIRMAX 1008 /* arbitrary on PBLKSIZ-N */ 3317 | X#define SPLTMAX 10 /* maximum allowed splits */ 3318 | X /* for a single insertion */ 3319 | X#define DIRFEXT ".dir" 3320 | X#define PAGFEXT ".pag" 3321 | X 3322 | Xtypedef struct { 3323 | X int dirf; /* directory file descriptor */ 3324 | X int pagf; /* page file descriptor */ 3325 | X int flags; /* status/error flags, see below */ 3326 | X long maxbno; /* size of dirfile in bits */ 3327 | X long curbit; /* current bit number */ 3328 | X long hmask; /* current hash mask */ 3329 | X long blkptr; /* current block for nextkey */ 3330 | X int keyptr; /* current key for nextkey */ 3331 | X long blkno; /* current page to read/write */ 3332 | X long pagbno; /* current page in pagbuf */ 3333 | X char pagbuf[PBLKSIZ]; /* page file block buffer */ 3334 | X long dirbno; /* current block in dirbuf */ 3335 | X char dirbuf[DBLKSIZ]; /* directory file block buffer */ 3336 | X} DBM; 3337 | X 3338 | X#define DBM_RDONLY 0x1 /* data base open read-only */ 3339 | X#define DBM_IOERR 0x2 /* data base I/O error */ 3340 | X 3341 | X/* 3342 | X * utility macros 3343 | X */ 3344 | X#define dbm_rdonly(db) ((db)->flags & DBM_RDONLY) 3345 | X#define dbm_error(db) ((db)->flags & DBM_IOERR) 3346 | X 3347 | X#define dbm_clearerr(db) ((db)->flags &= ~DBM_IOERR) /* ouch */ 3348 | X 3349 | X#define dbm_dirfno(db) ((db)->dirf) 3350 | X#define dbm_pagfno(db) ((db)->pagf) 3351 | X 3352 | Xtypedef struct { 3353 | X char *dptr; 3354 | X int dsize; 3355 | X} datum; 3356 | X 3357 | Xextern datum nullitem; 3358 | X 3359 | X#ifdef __STDC__ 3360 | X#define proto(p) p 3361 | X#else 3362 | X#define proto(p) () 3363 | X#endif 3364 | X 3365 | X/* 3366 | X * flags to dbm_store 3367 | X */ 3368 | X#define DBM_INSERT 0 3369 | X#define DBM_REPLACE 1 3370 | X 3371 | X/* 3372 | X * ndbm interface 3373 | X */ 3374 | Xextern DBM *dbm_open proto((char *, int, int)); 3375 | Xextern void dbm_close proto((DBM *)); 3376 | Xextern datum dbm_fetch proto((DBM *, datum)); 3377 | Xextern int dbm_delete proto((DBM *, datum)); 3378 | Xextern int dbm_store proto((DBM *, datum, datum, int)); 3379 | Xextern datum dbm_firstkey proto((DBM *)); 3380 | Xextern datum dbm_nextkey proto((DBM *)); 3381 | X 3382 | X/* 3383 | X * other 3384 | X */ 3385 | Xextern DBM *dbm_prep proto((char *, char *, int, int)); 3386 | Xextern long dbm_hash proto((char *, int)); 3387 | @@@End of sdbm.h 3388 | echo x - tune.h 1>&2 3389 | sed 's/^X//' >tune.h <<'@@@End of tune.h' 3390 | X/* 3391 | X * sdbm - ndbm work-alike hashed database library 3392 | X * tuning and portability constructs [not nearly enough] 3393 | X * author: oz@nexus.yorku.ca 3394 | X */ 3395 | X 3396 | X#define BYTESIZ 8 3397 | X 3398 | X#ifdef SVID 3399 | X#include 3400 | X#endif 3401 | X 3402 | X#ifdef BSD42 3403 | X#define SEEK_SET L_SET 3404 | X#define memset(s,c,n) bzero(s, n) /* only when c is zero */ 3405 | X#define memcpy(s1,s2,n) bcopy(s2, s1, n) 3406 | X#define memcmp(s1,s2,n) bcmp(s1,s2,n) 3407 | X#endif 3408 | X 3409 | X/* 3410 | X * important tuning parms (hah) 3411 | X */ 3412 | X 3413 | X#define SEEDUPS /* always detect duplicates */ 3414 | X#define BADMESS /* generate a message for worst case: 3415 | X cannot make room after SPLTMAX splits */ 3416 | X/* 3417 | X * misc 3418 | X */ 3419 | X#ifdef DEBUG 3420 | X#define debug(x) printf x 3421 | X#else 3422 | X#define debug(x) 3423 | X#endif 3424 | @@@End of tune.h 3425 | echo x - util.c 1>&2 3426 | sed 's/^X//' >util.c <<'@@@End of util.c' 3427 | X#include 3428 | X#ifdef SDBM 3429 | X#include "sdbm.h" 3430 | X#else 3431 | X#include "ndbm.h" 3432 | X#endif 3433 | X 3434 | Xvoid 3435 | Xoops(s1, s2) 3436 | Xregister char *s1; 3437 | Xregister char *s2; 3438 | X{ 3439 | X extern int errno, sys_nerr; 3440 | X extern char *sys_errlist[]; 3441 | X extern char *progname; 3442 | X 3443 | X if (progname) 3444 | X fprintf(stderr, "%s: ", progname); 3445 | X fprintf(stderr, s1, s2); 3446 | X if (errno > 0 && errno < sys_nerr) 3447 | X fprintf(stderr, " (%s)", sys_errlist[errno]); 3448 | X fprintf(stderr, "\n"); 3449 | X exit(1); 3450 | X} 3451 | X 3452 | Xint 3453 | Xokpage(pag) 3454 | Xchar *pag; 3455 | X{ 3456 | X register unsigned n; 3457 | X register off; 3458 | X register short *ino = (short *) pag; 3459 | X 3460 | X if ((n = ino[0]) > PBLKSIZ / sizeof(short)) 3461 | X return 0; 3462 | X 3463 | X if (!n) 3464 | X return 1; 3465 | X 3466 | X off = PBLKSIZ; 3467 | X for (ino++; n; ino += 2) { 3468 | X if (ino[0] > off || ino[1] > off || 3469 | X ino[1] > ino[0]) 3470 | X return 0; 3471 | X off = ino[1]; 3472 | X n -= 2; 3473 | X } 3474 | X 3475 | X return 1; 3476 | X} 3477 | @@@End of util.c 3478 | -------------------------------------------------------------------------------- /sdbm.c: -------------------------------------------------------------------------------- 1 | /* 2 | * sdbm - ndbm work-alike hashed database library 3 | * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978). 4 | * author: oz@nexus.yorku.ca 5 | * status: public domain. 6 | * 7 | * core routines 8 | */ 9 | 10 | #ifndef lint 11 | static char rcsid[] = "$Id: sdbm.c,v 1.16 90/12/13 13:01:31 oz Exp $"; 12 | #endif 13 | 14 | #include "sdbm.h" 15 | #include "tune.h" 16 | #include "pair.h" 17 | 18 | #include 19 | #include 20 | #ifdef BSD42 21 | #include 22 | #else 23 | #include 24 | #include 25 | #endif 26 | #include 27 | #include 28 | 29 | #ifdef __STDC__ 30 | #include 31 | #endif 32 | 33 | #ifndef NULL 34 | #define NULL 0 35 | #endif 36 | 37 | /* 38 | * externals 39 | */ 40 | #ifndef sun 41 | extern int errno; 42 | #endif 43 | 44 | extern char *malloc proto((unsigned int)); 45 | extern void free proto((void *)); 46 | extern long lseek(); 47 | 48 | /* 49 | * forward 50 | */ 51 | static int getdbit proto((DBM *, long)); 52 | static int setdbit proto((DBM *, long)); 53 | static int getpage proto((DBM *, long)); 54 | static datum getnext proto((DBM *)); 55 | static int makroom proto((DBM *, long, int)); 56 | 57 | /* 58 | * useful macros 59 | */ 60 | #define bad(x) ((x).dptr == NULL || (x).dsize <= 0) 61 | #define exhash(item) dbm_hash((item).dptr, (item).dsize) 62 | #define ioerr(db) ((db)->flags |= DBM_IOERR) 63 | 64 | #define OFF_PAG(off) (long) (off) * PBLKSIZ 65 | #define OFF_DIR(off) (long) (off) * DBLKSIZ 66 | 67 | static long masks[] = { 68 | 000000000000, 000000000001, 000000000003, 000000000007, 69 | 000000000017, 000000000037, 000000000077, 000000000177, 70 | 000000000377, 000000000777, 000000001777, 000000003777, 71 | 000000007777, 000000017777, 000000037777, 000000077777, 72 | 000000177777, 000000377777, 000000777777, 000001777777, 73 | 000003777777, 000007777777, 000017777777, 000037777777, 74 | 000077777777, 000177777777, 000377777777, 000777777777, 75 | 001777777777, 003777777777, 007777777777, 017777777777 76 | }; 77 | 78 | datum nullitem = {NULL, 0}; 79 | 80 | DBM * 81 | dbm_open(file, flags, mode) 82 | register char *file; 83 | register int flags; 84 | register int mode; 85 | { 86 | register DBM *db; 87 | register char *dirname; 88 | register char *pagname; 89 | register int n; 90 | 91 | if (file == NULL || !*file) 92 | return errno = EINVAL, (DBM *) NULL; 93 | /* 94 | * need space for two seperate filenames 95 | */ 96 | n = strlen(file) * 2 + strlen(DIRFEXT) + strlen(PAGFEXT) + 2; 97 | 98 | if ((dirname = malloc((unsigned) n)) == NULL) 99 | return errno = ENOMEM, (DBM *) NULL; 100 | /* 101 | * build the file names 102 | */ 103 | dirname = strcat(strcpy(dirname, file), DIRFEXT); 104 | pagname = strcpy(dirname + strlen(dirname) + 1, file); 105 | pagname = strcat(pagname, PAGFEXT); 106 | 107 | db = dbm_prep(dirname, pagname, flags, mode); 108 | free((char *) dirname); 109 | return db; 110 | } 111 | 112 | DBM * 113 | dbm_prep(dirname, pagname, flags, mode) 114 | char *dirname; 115 | char *pagname; 116 | int flags; 117 | int mode; 118 | { 119 | register DBM *db; 120 | struct stat dstat; 121 | 122 | if ((db = (DBM *) malloc(sizeof(DBM))) == NULL) 123 | return errno = ENOMEM, (DBM *) NULL; 124 | 125 | db->flags = 0; 126 | db->hmask = 0; 127 | db->blkptr = 0; 128 | db->keyptr = 0; 129 | /* 130 | * adjust user flags so that WRONLY becomes RDWR, 131 | * as required by this package. Also set our internal 132 | * flag for RDONLY if needed. 133 | */ 134 | if (flags & O_WRONLY) 135 | flags = (flags & ~O_WRONLY) | O_RDWR; 136 | 137 | else if ((flags & 03) == O_RDONLY) 138 | db->flags = DBM_RDONLY; 139 | /* 140 | * open the files in sequence, and stat the dirfile. 141 | * If we fail anywhere, undo everything, return NULL. 142 | */ 143 | if ((db->pagf = open(pagname, flags, mode)) > -1) { 144 | if ((db->dirf = open(dirname, flags, mode)) > -1) { 145 | /* 146 | * need the dirfile size to establish max bit number. 147 | */ 148 | if (fstat(db->dirf, &dstat) == 0) { 149 | /* 150 | * zero size: either a fresh database, or one with a single, 151 | * unsplit data page: dirpage is all zeros. 152 | */ 153 | db->dirbno = (!dstat.st_size) ? 0 : -1; 154 | db->pagbno = -1; 155 | db->maxbno = dstat.st_size * BYTESIZ; 156 | 157 | (void) memset(db->pagbuf, 0, PBLKSIZ); 158 | (void) memset(db->dirbuf, 0, DBLKSIZ); 159 | /* 160 | * success 161 | */ 162 | return db; 163 | } 164 | (void) close(db->dirf); 165 | } 166 | (void) close(db->pagf); 167 | } 168 | free((char *) db); 169 | return (DBM *) NULL; 170 | } 171 | 172 | void 173 | dbm_close(db) 174 | register DBM *db; 175 | { 176 | if (db == NULL) 177 | errno = EINVAL; 178 | else { 179 | (void) close(db->dirf); 180 | (void) close(db->pagf); 181 | free((char *) db); 182 | } 183 | } 184 | 185 | datum 186 | dbm_fetch(db, key) 187 | register DBM *db; 188 | datum key; 189 | { 190 | if (db == NULL || bad(key)) 191 | return errno = EINVAL, nullitem; 192 | 193 | if (getpage(db, exhash(key))) 194 | return getpair(db->pagbuf, key); 195 | 196 | return ioerr(db), nullitem; 197 | } 198 | 199 | int 200 | dbm_delete(db, key) 201 | register DBM *db; 202 | datum key; 203 | { 204 | if (db == NULL || bad(key)) 205 | return errno = EINVAL, -1; 206 | if (dbm_rdonly(db)) 207 | return errno = EPERM, -1; 208 | 209 | if (getpage(db, exhash(key))) { 210 | if (!delpair(db->pagbuf, key)) 211 | return -1; 212 | /* 213 | * update the page file 214 | */ 215 | if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 216 | || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) 217 | return ioerr(db), -1; 218 | 219 | return 0; 220 | } 221 | 222 | return ioerr(db), -1; 223 | } 224 | 225 | int 226 | dbm_store(db, key, val, flags) 227 | register DBM *db; 228 | datum key; 229 | datum val; 230 | int flags; 231 | { 232 | int need; 233 | register long hash; 234 | 235 | if (db == NULL || bad(key)) 236 | return errno = EINVAL, -1; 237 | if (dbm_rdonly(db)) 238 | return errno = EPERM, -1; 239 | 240 | need = key.dsize + val.dsize; 241 | /* 242 | * is the pair too big (or too small) for this database ?? 243 | */ 244 | if (need < 0 || need > PAIRMAX) 245 | return errno = EINVAL, -1; 246 | 247 | if (getpage(db, (hash = exhash(key)))) { 248 | /* 249 | * if we need to replace, delete the key/data pair 250 | * first. If it is not there, ignore. 251 | */ 252 | if (flags == DBM_REPLACE) 253 | (void) delpair(db->pagbuf, key); 254 | #ifdef SEEDUPS 255 | else if (duppair(db->pagbuf, key)) 256 | return 1; 257 | #endif 258 | /* 259 | * if we do not have enough room, we have to split. 260 | */ 261 | if (!fitpair(db->pagbuf, need)) 262 | if (!makroom(db, hash, need)) 263 | return ioerr(db), -1; 264 | /* 265 | * we have enough room or split is successful. insert the key, 266 | * and update the page file. 267 | */ 268 | (void) putpair(db->pagbuf, key, val); 269 | 270 | if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 271 | || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) 272 | return ioerr(db), -1; 273 | /* 274 | * success 275 | */ 276 | return 0; 277 | } 278 | 279 | return ioerr(db), -1; 280 | } 281 | 282 | /* 283 | * makroom - make room by splitting the overfull page 284 | * this routine will attempt to make room for SPLTMAX times before 285 | * giving up. 286 | */ 287 | static int 288 | makroom(db, hash, need) 289 | register DBM *db; 290 | long hash; 291 | int need; 292 | { 293 | long newp; 294 | char twin[PBLKSIZ]; 295 | char *pag = db->pagbuf; 296 | char *new = twin; 297 | register int smax = SPLTMAX; 298 | 299 | do { 300 | /* 301 | * split the current page 302 | */ 303 | (void) splpage(pag, new, db->hmask + 1); 304 | /* 305 | * address of the new page 306 | */ 307 | newp = (hash & db->hmask) | (db->hmask + 1); 308 | 309 | /* 310 | * write delay, read avoidence/cache shuffle: 311 | * select the page for incoming pair: if key is to go to the new page, 312 | * write out the previous one, and copy the new one over, thus making 313 | * it the current page. If not, simply write the new page, and we are 314 | * still looking at the page of interest. current page is not updated 315 | * here, as dbm_store will do so, after it inserts the incoming pair. 316 | */ 317 | if (hash & (db->hmask + 1)) { 318 | if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 319 | || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) 320 | return 0; 321 | db->pagbno = newp; 322 | (void) memcpy(pag, new, PBLKSIZ); 323 | } 324 | else if (lseek(db->pagf, OFF_PAG(newp), SEEK_SET) < 0 325 | || write(db->pagf, new, PBLKSIZ) < 0) 326 | return 0; 327 | 328 | if (!setdbit(db, db->curbit)) 329 | return 0; 330 | /* 331 | * see if we have enough room now 332 | */ 333 | if (fitpair(pag, need)) 334 | return 1; 335 | /* 336 | * try again... update curbit and hmask as getpage would have 337 | * done. because of our update of the current page, we do not 338 | * need to read in anything. BUT we have to write the current 339 | * [deferred] page out, as the window of failure is too great. 340 | */ 341 | db->curbit = 2 * db->curbit + 342 | ((hash & (db->hmask + 1)) ? 2 : 1); 343 | db->hmask |= db->hmask + 1; 344 | 345 | if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0 346 | || write(db->pagf, db->pagbuf, PBLKSIZ) < 0) 347 | return 0; 348 | 349 | } while (--smax); 350 | /* 351 | * if we are here, this is real bad news. After SPLTMAX splits, 352 | * we still cannot fit the key. say goodnight. 353 | */ 354 | #ifdef BADMESS 355 | (void) write(2, "sdbm: cannot insert after SPLTMAX attempts.\n", 44); 356 | #endif 357 | return 0; 358 | 359 | } 360 | 361 | /* 362 | * the following two routines will break if 363 | * deletions aren't taken into account. (ndbm bug) 364 | */ 365 | datum 366 | dbm_firstkey(db) 367 | register DBM *db; 368 | { 369 | if (db == NULL) 370 | return errno = EINVAL, nullitem; 371 | /* 372 | * start at page 0 373 | */ 374 | if (lseek(db->pagf, OFF_PAG(0), SEEK_SET) < 0 375 | || read(db->pagf, db->pagbuf, PBLKSIZ) < 0) 376 | return ioerr(db), nullitem; 377 | db->pagbno = 0; 378 | db->blkptr = 0; 379 | db->keyptr = 0; 380 | 381 | return getnext(db); 382 | } 383 | 384 | datum 385 | dbm_nextkey(db) 386 | register DBM *db; 387 | { 388 | if (db == NULL) 389 | return errno = EINVAL, nullitem; 390 | return getnext(db); 391 | } 392 | 393 | /* 394 | * all important binary trie traversal 395 | */ 396 | static int 397 | getpage(db, hash) 398 | register DBM *db; 399 | register long hash; 400 | { 401 | register int hbit; 402 | register long dbit; 403 | register long pagb; 404 | 405 | dbit = 0; 406 | hbit = 0; 407 | while (dbit < db->maxbno && getdbit(db, dbit)) 408 | dbit = 2 * dbit + ((hash & (1 << hbit++)) ? 2 : 1); 409 | 410 | debug(("dbit: %d...", dbit)); 411 | 412 | db->curbit = dbit; 413 | db->hmask = masks[hbit]; 414 | 415 | pagb = hash & db->hmask; 416 | /* 417 | * see if the block we need is already in memory. 418 | * note: this lookaside cache has about 10% hit rate. 419 | */ 420 | if (pagb != db->pagbno) { 421 | /* 422 | * note: here, we assume a "hole" is read as 0s. 423 | * if not, must zero pagbuf first. 424 | */ 425 | if (lseek(db->pagf, OFF_PAG(pagb), SEEK_SET) < 0 426 | || read(db->pagf, db->pagbuf, PBLKSIZ) < 0) 427 | return 0; 428 | if (!chkpage(db->pagbuf)) 429 | return 0; 430 | db->pagbno = pagb; 431 | 432 | debug(("pag read: %d\n", pagb)); 433 | } 434 | return 1; 435 | } 436 | 437 | static int 438 | getdbit(db, dbit) 439 | register DBM *db; 440 | register long dbit; 441 | { 442 | register long c; 443 | register long dirb; 444 | 445 | c = dbit / BYTESIZ; 446 | dirb = c / DBLKSIZ; 447 | 448 | if (dirb != db->dirbno) { 449 | if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0 450 | || read(db->dirf, db->dirbuf, DBLKSIZ) < 0) 451 | return 0; 452 | db->dirbno = dirb; 453 | 454 | debug(("dir read: %d\n", dirb)); 455 | } 456 | 457 | return db->dirbuf[c % DBLKSIZ] & (1 << dbit % BYTESIZ); 458 | } 459 | 460 | static int 461 | setdbit(db, dbit) 462 | register DBM *db; 463 | register long dbit; 464 | { 465 | register long c; 466 | register long dirb; 467 | 468 | c = dbit / BYTESIZ; 469 | dirb = c / DBLKSIZ; 470 | 471 | if (dirb != db->dirbno) { 472 | if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0 473 | || read(db->dirf, db->dirbuf, DBLKSIZ) < 0) 474 | return 0; 475 | db->dirbno = dirb; 476 | 477 | debug(("dir read: %d\n", dirb)); 478 | } 479 | 480 | db->dirbuf[c % DBLKSIZ] |= (1 << dbit % BYTESIZ); 481 | 482 | if (dbit >= db->maxbno) 483 | db->maxbno += DBLKSIZ * BYTESIZ; 484 | 485 | if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0 486 | || write(db->dirf, db->dirbuf, DBLKSIZ) < 0) 487 | return 0; 488 | 489 | return 1; 490 | } 491 | 492 | /* 493 | * getnext - get the next key in the page, and if done with 494 | * the page, try the next page in sequence 495 | */ 496 | static datum 497 | getnext(db) 498 | register DBM *db; 499 | { 500 | datum key; 501 | 502 | for (;;) { 503 | db->keyptr++; 504 | key = getnkey(db->pagbuf, db->keyptr); 505 | if (key.dptr != NULL) 506 | return key; 507 | /* 508 | * we either run out, or there is nothing on this page.. 509 | * try the next one... If we lost our position on the 510 | * file, we will have to seek. 511 | */ 512 | db->keyptr = 0; 513 | if (db->pagbno != db->blkptr++) 514 | if (lseek(db->pagf, OFF_PAG(db->blkptr), SEEK_SET) < 0) 515 | break; 516 | db->pagbno = db->blkptr; 517 | if (read(db->pagf, db->pagbuf, PBLKSIZ) <= 0) 518 | break; 519 | if (!chkpage(db->pagbuf)) 520 | break; 521 | } 522 | 523 | return ioerr(db), nullitem; 524 | } 525 | -------------------------------------------------------------------------------- /sdbm.h: -------------------------------------------------------------------------------- 1 | /* 2 | * sdbm - ndbm work-alike hashed database library 3 | * based on Per-Ake Larson's Dynamic Hashing algorithms. BIT 18 (1978). 4 | * author: oz@nexus.yorku.ca 5 | * status: public domain. 6 | */ 7 | #define DBLKSIZ 4096 8 | #define PBLKSIZ 1024 9 | #define PAIRMAX 1008 /* arbitrary on PBLKSIZ-N */ 10 | #define SPLTMAX 10 /* maximum allowed splits */ 11 | /* for a single insertion */ 12 | #define DIRFEXT ".dir" 13 | #define PAGFEXT ".pag" 14 | 15 | typedef struct { 16 | int dirf; /* directory file descriptor */ 17 | int pagf; /* page file descriptor */ 18 | int flags; /* status/error flags, see below */ 19 | long maxbno; /* size of dirfile in bits */ 20 | long curbit; /* current bit number */ 21 | long hmask; /* current hash mask */ 22 | long blkptr; /* current block for nextkey */ 23 | int keyptr; /* current key for nextkey */ 24 | long blkno; /* current page to read/write */ 25 | long pagbno; /* current page in pagbuf */ 26 | char pagbuf[PBLKSIZ]; /* page file block buffer */ 27 | long dirbno; /* current block in dirbuf */ 28 | char dirbuf[DBLKSIZ]; /* directory file block buffer */ 29 | } DBM; 30 | 31 | #define DBM_RDONLY 0x1 /* data base open read-only */ 32 | #define DBM_IOERR 0x2 /* data base I/O error */ 33 | 34 | /* 35 | * utility macros 36 | */ 37 | #define dbm_rdonly(db) ((db)->flags & DBM_RDONLY) 38 | #define dbm_error(db) ((db)->flags & DBM_IOERR) 39 | 40 | #define dbm_clearerr(db) ((db)->flags &= ~DBM_IOERR) /* ouch */ 41 | 42 | #define dbm_dirfno(db) ((db)->dirf) 43 | #define dbm_pagfno(db) ((db)->pagf) 44 | 45 | typedef struct { 46 | char *dptr; 47 | int dsize; 48 | } datum; 49 | 50 | extern datum nullitem; 51 | 52 | #ifdef __STDC__ 53 | #define proto(p) p 54 | #else 55 | #define proto(p) () 56 | #endif 57 | 58 | /* 59 | * flags to dbm_store 60 | */ 61 | #define DBM_INSERT 0 62 | #define DBM_REPLACE 1 63 | 64 | /* 65 | * ndbm interface 66 | */ 67 | extern DBM *dbm_open proto((char *, int, int)); 68 | extern void dbm_close proto((DBM *)); 69 | extern datum dbm_fetch proto((DBM *, datum)); 70 | extern int dbm_delete proto((DBM *, datum)); 71 | extern int dbm_store proto((DBM *, datum, datum, int)); 72 | extern datum dbm_firstkey proto((DBM *)); 73 | extern datum dbm_nextkey proto((DBM *)); 74 | 75 | /* 76 | * other 77 | */ 78 | extern DBM *dbm_prep proto((char *, char *, int, int)); 79 | extern long dbm_hash proto((char *, int)); 80 | -------------------------------------------------------------------------------- /tune.h: -------------------------------------------------------------------------------- 1 | /* 2 | * sdbm - ndbm work-alike hashed database library 3 | * tuning and portability constructs [not nearly enough] 4 | * author: oz@nexus.yorku.ca 5 | */ 6 | 7 | #define BYTESIZ 8 8 | 9 | #ifdef SVID 10 | #include 11 | #endif 12 | 13 | #ifdef BSD42 14 | #define SEEK_SET L_SET 15 | #define memset(s,c,n) bzero(s, n) /* only when c is zero */ 16 | #define memcpy(s1,s2,n) bcopy(s2, s1, n) 17 | #define memcmp(s1,s2,n) bcmp(s1,s2,n) 18 | #endif 19 | 20 | /* 21 | * important tuning parms (hah) 22 | */ 23 | 24 | #define SEEDUPS /* always detect duplicates */ 25 | #define BADMESS /* generate a message for worst case: 26 | cannot make room after SPLTMAX splits */ 27 | /* 28 | * misc 29 | */ 30 | #ifdef DEBUG 31 | #define debug(x) printf x 32 | #else 33 | #define debug(x) 34 | #endif 35 | -------------------------------------------------------------------------------- /util.c: -------------------------------------------------------------------------------- 1 | #include 2 | #ifdef SDBM 3 | #include "sdbm.h" 4 | #else 5 | #include "ndbm.h" 6 | #endif 7 | 8 | void 9 | oops(s1, s2) 10 | register char *s1; 11 | register char *s2; 12 | { 13 | extern int errno, sys_nerr; 14 | extern char *sys_errlist[]; 15 | extern char *progname; 16 | 17 | if (progname) 18 | fprintf(stderr, "%s: ", progname); 19 | fprintf(stderr, s1, s2); 20 | if (errno > 0 && errno < sys_nerr) 21 | fprintf(stderr, " (%s)", sys_errlist[errno]); 22 | fprintf(stderr, "\n"); 23 | exit(1); 24 | } 25 | 26 | int 27 | okpage(pag) 28 | char *pag; 29 | { 30 | register unsigned n; 31 | register off; 32 | register short *ino = (short *) pag; 33 | 34 | if ((n = ino[0]) > PBLKSIZ / sizeof(short)) 35 | return 0; 36 | 37 | if (!n) 38 | return 1; 39 | 40 | off = PBLKSIZ; 41 | for (ino++; n; ino += 2) { 42 | if (ino[0] > off || ino[1] > off || 43 | ino[1] > ino[0]) 44 | return 0; 45 | off = ino[1]; 46 | n -= 2; 47 | } 48 | 49 | return 1; 50 | } 51 | --------------------------------------------------------------------------------