├── CHANGES
├── COMPARE
├── biblio
├── dba.c
├── dbd.c
├── dbe.1
├── dbe.c
├── dbm.c
├── dbm.h
├── dbu.c
├── grind
├── hash.c
├── makefile
├── pair.c
├── pair.h
├── readme.ms
├── readme.txt
├── sdbm.3
├── sdbm.bun
├── sdbm.c
├── sdbm.h
├── tune.h
└── util.c


/CHANGES:
--------------------------------------------------------------------------------
 1 | June 1997:
 2 | 
 3 | o fixed a long-hidden memmove bug in delpair that causes database
 4 |   corruption in MEMMOVE versions of sdbm. [sdbm defaults to duff's
 5 |   device to move data, so memmove version is almost never used.]
 6 | 
 7 | Changes from the earlier BETA releases.
 8 | 
 9 | o dbm_prep does everything now, so dbm_open is just a simple
10 |   wrapper that builds the default filenames. dbm_prep no longer
11 |   requires a (DBM *) db parameter: it allocates one itself. It
12 |   returns (DBM *) db or (DBM *) NULL.
13 | 
14 | o makroom is now reliable. In the common-case optimization of the page
15 |   split, the page into which the incoming key/value pair is to be inserted
16 |   is write-deferred (if the split is successful), thereby saving a cosly
17 |   write.  BUT, if the split does not make enough room (unsuccessful), the
18 |   deferred page is written out, as the failure-window is now dependent on
19 |   the number of split attempts.
20 | 
21 | o if -DDUFF is defined, hash function will also use the DUFF construct.
22 |   This may look like a micro-performance tweak (maybe it is), but in fact,
23 |   the hash function is the third most-heavily used function, after read
24 |   and write.
25 | 


--------------------------------------------------------------------------------
/COMPARE:
--------------------------------------------------------------------------------
 1 | 
 2 | Script started on Thu Sep 28 15:41:06 1989
 3 | % uname -a
 4 | titan titan 4_0 UMIPS mips
 5 | % make all x-dbm
 6 |         cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c dbm.c
 7 |         cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c sdbm.c
 8 |         cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c pair.c
 9 |         cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c hash.c
10 |         ar cr libsdbm.a sdbm.o pair.o hash.o
11 |         ranlib libsdbm.a
12 |         cc  -o dbm dbm.o libsdbm.a
13 |         cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c dba.c
14 |         cc  -o dba dba.o
15 |         cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c dbd.c
16 |         cc  -o dbd dbd.o
17 |         cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -o x-dbm dbm.o
18 | % 
19 | % 
20 | % wc history
21 |   65110 218344 3204883 history
22 | % 
23 | % /bin/time dbm build foo <history
24 | 
25 | real     5:56.9
26 | user       13.3
27 | sys        26.3
28 | % ls -s
29 | total 14251
30 |    5 README           2 dbd.c            1 hash.c           1 pair.h
31 |    0 SCRIPT           5 dbd.o            1 hash.o           5 pair.o
32 |    1 WISHLIST        62 dbm           3130 history          1 port.h
33 |   46 dba              5 dbm.c           11 howtodbm.txt    11 sdbm.c
34 |    3 dba.c            8 dbm.o           14 libsdbm.a        2 sdbm.h
35 |    6 dba.o            4 foo.dir          1 makefile         8 sdbm.o
36 |   46 dbd           10810 foo.pag         6 pair.c          60 x-dbm
37 | % ls -l foo.*
38 | -rw-r--r--  1 oz           4096 Sep 28 15:48 foo.dir
39 | -rw-r--r--  1 oz       11069440 Sep 28 15:48 foo.pag
40 | % 
41 | % /bin/time x-dbm build bar <history
42 | 
43 | real     5:59.4
44 | user       24.7
45 | sys        29.1
46 | % 
47 | % ls -s
48 | total 27612
49 |    5 README          46 dbd              1 hash.c           5 pair.o
50 |    1 SCRIPT           2 dbd.c            1 hash.o           1 port.h
51 |    1 WISHLIST         5 dbd.o         3130 history         11 sdbm.c
52 |    4 bar.dir         62 dbm             11 howtodbm.txt     2 sdbm.h
53 | 13356 bar.pag         5 dbm.c           14 libsdbm.a        8 sdbm.o
54 |   46 dba              8 dbm.o            1 makefile        60 x-dbm
55 |    3 dba.c            4 foo.dir          6 pair.c
56 |    6 dba.o         10810 foo.pag         1 pair.h
57 | % 
58 | % ls -l bar.*
59 | -rw-r--r--  1 oz           4096 Sep 28 15:54 bar.dir
60 | -rw-r--r--  1 oz       13676544 Sep 28 15:54 bar.pag
61 | % 
62 | % dba foo | tail
63 | #10801: ok. no entries.
64 | #10802: ok. no entries.
65 | #10803: ok. no entries.
66 | #10804: ok. no entries.
67 | #10805: ok. no entries.
68 | #10806: ok. no entries.
69 | #10807: ok. no entries.
70 | #10808: ok. no entries.
71 | #10809: ok.  11 entries 67% used free 337.
72 | 10810 pages (6036 holes):  65073 entries
73 | % 
74 | % dba bar | tail
75 | #13347: ok. no entries.
76 | #13348: ok. no entries.
77 | #13349: ok. no entries.
78 | #13350: ok. no entries.
79 | #13351: ok. no entries.
80 | #13352: ok. no entries.
81 | #13353: ok. no entries.
82 | #13354: ok. no entries.
83 | #13355: ok.   7 entries 33% used free 676.
84 | 13356 pages (8643 holes):  65073 entries
85 | %
86 | % exit
87 | script done on Thu Sep 28 16:08:45 1989
88 | 
89 | 


--------------------------------------------------------------------------------
/biblio:
--------------------------------------------------------------------------------
 1 | %A R. J. Enbody
 2 | %A H. C. Du
 3 | %T Dynamic Hashing Schemes
 4 | %J ACM Computing Surveys
 5 | %V 20
 6 | %N 2
 7 | %D June 1988
 8 | %P 85-113
 9 | %K surveys
10 | 
11 | %A P.-A. Larson
12 | %T Dynamic Hashing
13 | %J BIT
14 | %V 18
15 | %P 184-201
16 | %D 1978
17 | %K dynamic
18 | 
19 | %A W. Litwin
20 | %T Linear Hashing: A new tool for file and table addressing
21 | %J Proceedings of the 6th Conference on Very Large Dabatases (Montreal)
22 | %I Very Large Database Foundation
23 | %C Saratoga, Calif.
24 | %P 212-223
25 | %D 1980
26 | %K linear
27 | 
28 | %A R. Fagin
29 | %A J. Nievergelt
30 | %A N. Pippinger
31 | %A H. R. Strong
32 | %T Extendible Hashing - A Fast Access Method for Dynamic Files
33 | %J ACM Trans. Database Syst.
34 | %V 4
35 | %N 3
36 | %D Sept. 1979
37 | %P 315-344
38 | %K extend
39 | 
40 | %A G. N. Martin
41 | %T Spiral Storage: Incrementally Augmentable Hash Addressed Storage
42 | %J Technical Report #27
43 | %I University of Varwick
44 | %C Coventry, U.K.
45 | %D 1979
46 | %K spiral
47 | 
48 | %A Chris Torek
49 | %T Re: dbm.a and ndbm.a archives
50 | %B USENET newsgroup comp.unix
51 | %D 1987
52 | %K torek
53 | 
54 | %A Rich Wales
55 | %T Discusson of "dbm" data base system
56 | %B USENET newsgroup unix.wizards
57 | %D Jan. 1984
58 | %K rich
59 | 
60 | 
61 | 
62 | 
63 | 
64 | 
65 | 


--------------------------------------------------------------------------------
/dba.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * dba	dbm analysis/recovery
 3 |  */
 4 | 
 5 | #include <stdio.h>
 6 | #include <sys/file.h>
 7 | #include "sdbm.h"
 8 | 
 9 | char *progname;
10 | extern void oops();
11 | 
12 | int
13 | main(argc, argv)
14 | char **argv;
15 | {
16 | 	int n;
17 | 	char *p;
18 | 	char *name;
19 | 	int pagf;
20 | 
21 | 	progname = argv[0];
22 | 
23 | 	if (p = argv[1]) {
24 | 		name = (char *) malloc((n = strlen(p)) + 5);
25 | 		strcpy(name, p);
26 | 		strcpy(name + n, ".pag");
27 | 
28 | 		if ((pagf = open(name, O_RDONLY)) < 0)
29 | 			oops("cannot open %s.", name);
30 | 
31 | 		sdump(pagf);
32 | 	}
33 | 	else
34 | 		oops("usage: %s dbname", progname);
35 | 
36 | 	return 0;
37 | }
38 | 
39 | sdump(pagf)
40 | int pagf;
41 | {
42 | 	register b;
43 | 	register n = 0;
44 | 	register t = 0;
45 | 	register o = 0;
46 | 	register e;
47 | 	char pag[PBLKSIZ];
48 | 
49 | 	while ((b = read(pagf, pag, PBLKSIZ)) > 0) {
50 | 		printf("#%d: ", n);
51 | 		if (!okpage(pag))
52 | 			printf("bad\n");
53 | 		else {
54 | 			printf("ok. ");
55 | 			if (!(e = pagestat(pag)))
56 | 			    o++;
57 | 			else
58 | 			    t += e;
59 | 		}
60 | 		n++;
61 | 	}
62 | 
63 | 	if (b == 0)
64 | 		printf("%d pages (%d holes):  %d entries\n", n, o, t);
65 | 	else
66 | 		oops("read failed: block %d", n);
67 | }
68 | 
69 | pagestat(pag)
70 | char *pag;
71 | {
72 | 	register n;
73 | 	register free;
74 | 	register short *ino = (short *) pag;
75 | 
76 | 	if (!(n = ino[0]))
77 | 		printf("no entries.\n");
78 | 	else {
79 | 		free = ino[n] - (n + 1) * sizeof(short);
80 | 		printf("%3d entries %2d%% used free %d.\n",
81 | 		       n / 2, ((PBLKSIZ - free) * 100) / PBLKSIZ, free);
82 | 	}
83 | 	return n / 2;
84 | }
85 | 


--------------------------------------------------------------------------------
/dbd.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * dbd - dump a dbm data file
  3 |  */
  4 | 
  5 | #include <stdio.h>
  6 | #include <sys/file.h>
  7 | #include "sdbm.h"
  8 | 
  9 | char *progname;
 10 | extern void oops();
 11 | 
 12 | 
 13 | #define empty(page)	(((short *) page)[0] == 0)
 14 | 
 15 | int
 16 | main(argc, argv)
 17 | char **argv;
 18 | {
 19 | 	int n;
 20 | 	char *p;
 21 | 	char *name;
 22 | 	int pagf;
 23 | 
 24 | 	progname = argv[0];
 25 | 
 26 | 	if (p = argv[1]) {
 27 | 		name = (char *) malloc((n = strlen(p)) + 5);
 28 | 		strcpy(name, p);
 29 | 		strcpy(name + n, ".pag");
 30 | 
 31 | 		if ((pagf = open(name, O_RDONLY)) < 0)
 32 | 			oops("cannot open %s.", name);
 33 | 
 34 | 		sdump(pagf);
 35 | 	}
 36 | 	else
 37 | 		oops("usage: %s dbname", progname);
 38 | 	return 0;
 39 | }
 40 | 
 41 | sdump(pagf)
 42 | int pagf;
 43 | {
 44 | 	register r;
 45 | 	register n = 0;
 46 | 	register o = 0;
 47 | 	char pag[PBLKSIZ];
 48 | 
 49 | 	while ((r = read(pagf, pag, PBLKSIZ)) > 0) {
 50 | 		if (!okpage(pag))
 51 | 			fprintf(stderr, "%d: bad page.\n", n);
 52 | 		else if (empty(pag))
 53 | 			o++;
 54 | 		else
 55 | 			dispage(pag);
 56 | 		n++;
 57 | 	}
 58 | 
 59 | 	if (r == 0)
 60 | 		fprintf(stderr, "%d pages (%d holes).\n", n, o);
 61 | 	else
 62 | 		oops("read failed: block %d", n);
 63 | }
 64 | 
 65 | 
 66 | #ifdef OLD
 67 | dispage(pag)
 68 | char *pag;
 69 | {
 70 | 	register i, n;
 71 | 	register off;
 72 | 	register short *ino = (short *) pag;
 73 | 
 74 | 	off = PBLKSIZ;
 75 | 	for (i = 1; i < ino[0]; i += 2) {
 76 | 		printf("\t[%d]: ", ino[i]);
 77 | 		for (n = ino[i]; n < off; n++)
 78 | 			putchar(pag[n]);
 79 | 		putchar(' ');
 80 | 		off = ino[i];
 81 | 		printf("[%d]: ", ino[i + 1]);
 82 | 		for (n = ino[i + 1]; n < off; n++)
 83 | 			putchar(pag[n]);
 84 | 		off = ino[i + 1];
 85 | 		putchar('\n');
 86 | 	}
 87 | }
 88 | #else
 89 | dispage(pag)
 90 | char *pag;
 91 | {
 92 | 	register i, n;
 93 | 	register off;
 94 | 	register short *ino = (short *) pag;
 95 | 
 96 | 	off = PBLKSIZ;
 97 | 	for (i = 1; i < ino[0]; i += 2) {
 98 | 		for (n = ino[i]; n < off; n++)
 99 | 			if (pag[n] != 0)
100 | 				putchar(pag[n]);
101 | 		putchar('\t');
102 | 		off = ino[i];
103 | 		for (n = ino[i + 1]; n < off; n++)
104 | 			if (pag[n] != 0)
105 | 				putchar(pag[n]);
106 | 		putchar('\n');
107 | 		off = ino[i + 1];
108 | 	}
109 | }
110 | #endif
111 | 


--------------------------------------------------------------------------------
/dbe.1:
--------------------------------------------------------------------------------
 1 | .TH dbe 1 "ndbm(3) EDITOR"
 2 | .SH NAME
 3 | dbe \- Edit a ndbm(3) database
 4 | .SH USAGE
 5 | dbe <database> [-m r|w|rw] [-crtvx] -a|-d|-f|-F|-s [<key> [<content>]]
 6 | .SH DESCRIPTION
 7 | \fIdbme\fP operates on ndbm(3) databases.
 8 | It can be used to create them, look at them or change them.
 9 | When specifying the value of a key or the content of its associated entry,
10 | \\nnn, \\0, \\n, \\t, \\f and \\r are interpreted as usual.
11 | When displaying key/content pairs, non-printable characters are displayed
12 | using the \\nnn notation.
13 | .SH OPTIONS
14 | .IP -a
15 | List all entries in the database.
16 | .IP -c
17 | Create the database if it does not exist.
18 | .IP -d
19 | Delete the entry associated with the specified key.
20 | .IP -f
21 | Fetch and display the entry associated with the specified key.
22 | .IP -F
23 | Fetch and display all the entries whose key match the specified
24 | regular-expression
25 | .IP "-m r|w|rw"
26 | Open the database in read-only, write-only or read-write mode
27 | .IP -r
28 | Replace the entry associated with the specified key if it already exists.
29 | See option -s.
30 | .IP -s
31 | Store an entry under a specific key.
32 | An error occurs if the key already exists and the option -r was not specified.
33 | .IP -t
34 | Re-initialize the database before executing the command.
35 | .IP -v
36 | Verbose mode.
37 | Confirm stores and deletions.
38 | .IP -x
39 | If option -x is used with option -c, then if the database already exists,
40 | an error occurs.
41 | This can be used to implement a simple exclusive access locking mechanism.
42 | .SH SEE ALSO
43 | ndbm(3)
44 | .SH AUTHOR
45 | janick@bnr.ca
46 | 
47 | 


--------------------------------------------------------------------------------
/dbe.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #ifndef VMS
  3 | #include <sys/file.h>
  4 | #include <ndbm.h>
  5 | #else
  6 | #include "file.h"
  7 | #include "ndbm.h"
  8 | #endif
  9 | #include <ctype.h>
 10 | 
 11 | /***************************************************************************\
 12 | **                                                                         **
 13 | **   Function name: getopt()                                               **
 14 | **   Author:        Henry Spencer, UofT                                    **
 15 | **   Coding date:   84/04/28                                               **
 16 | **                                                                         **
 17 | **   Description:                                                          **
 18 | **                                                                         **
 19 | **   Parses argv[] for arguments.                                          **
 20 | **   Works with Whitesmith's C compiler.                                   **
 21 | **                                                                         **
 22 | **   Inputs   - The number of arguments                                    **
 23 | **            - The base address of the array of arguments                 **
 24 | **            - A string listing the valid options (':' indicates an       **
 25 | **              argument to the preceding option is required, a ';'        **
 26 | **              indicates an argument to the preceding option is optional) **
 27 | **                                                                         **
 28 | **   Outputs  - Returns the next option character,                         **
 29 | **              '?' for non '-' arguments                                  **
 30 | **              or ':' when there is no more arguments.                    **
 31 | **                                                                         **
 32 | **   Side Effects + The argument to an option is pointed to by 'optarg'    **
 33 | **                                                                         **
 34 | *****************************************************************************
 35 | **                                                                         **
 36 | **   REVISION HISTORY:                                                     **
 37 | **                                                                         **
 38 | **     DATE           NAME                        DESCRIPTION              **
 39 | **   YY/MM/DD  ------------------   ------------------------------------   **
 40 | **   88/10/20  Janick Bergeron      Returns '?' on unamed arguments        **
 41 | **                                  returns '!' on unknown options         **
 42 | **                                  and 'EOF' only when exhausted.         **
 43 | **   88/11/18  Janick Bergeron      Return ':' when no more arguments      **
 44 | **   89/08/11  Janick Bergeron      Optional optarg when ';' in optstring  **
 45 | **                                                                         **
 46 | \***************************************************************************/
 47 | 
 48 | char *optarg;			       /* Global argument pointer. */
 49 | 
 50 | #ifdef VMS
 51 | #define index  strchr
 52 | #endif
 53 | 
 54 | char
 55 | getopt(argc, argv, optstring)
 56 | int argc;
 57 | char **argv;
 58 | char *optstring;
 59 | {
 60 | 	register int c;
 61 | 	register char *place;
 62 | 	extern char *index();
 63 | 	static int optind = 0;
 64 | 	static char *scan = NULL;
 65 | 
 66 | 	optarg = NULL;
 67 | 
 68 | 	if (scan == NULL || *scan == '\0') {
 69 | 
 70 | 		if (optind == 0)
 71 | 			optind++;
 72 | 		if (optind >= argc)
 73 | 			return ':';
 74 | 
 75 | 		optarg = place = argv[optind++];
 76 | 		if (place[0] != '-' || place[1] == '\0')
 77 | 			return '?';
 78 | 		if (place[1] == '-' && place[2] == '\0')
 79 | 			return '?';
 80 | 		scan = place + 1;
 81 | 	}
 82 | 
 83 | 	c = *scan++;
 84 | 	place = index(optstring, c);
 85 | 	if (place == NULL || c == ':' || c == ';') {
 86 | 
 87 | 		(void) fprintf(stderr, "%s: unknown option %c\n", argv[0], c);
 88 | 		scan = NULL;
 89 | 		return '!';
 90 | 	}
 91 | 	if (*++place == ':') {
 92 | 
 93 | 		if (*scan != '\0') {
 94 | 
 95 | 			optarg = scan;
 96 | 			scan = NULL;
 97 | 
 98 | 		}
 99 | 		else {
100 | 
101 | 			if (optind >= argc) {
102 | 
103 | 				(void) fprintf(stderr, "%s: %c requires an argument\n",
104 | 					       argv[0], c);
105 | 				return '!';
106 | 			}
107 | 			optarg = argv[optind];
108 | 			optind++;
109 | 		}
110 | 	}
111 | 	else if (*place == ';') {
112 | 
113 | 		if (*scan != '\0') {
114 | 
115 | 			optarg = scan;
116 | 			scan = NULL;
117 | 
118 | 		}
119 | 		else {
120 | 
121 | 			if (optind >= argc || *argv[optind] == '-')
122 | 				optarg = NULL;
123 | 			else {
124 | 				optarg = argv[optind];
125 | 				optind++;
126 | 			}
127 | 		}
128 | 	}
129 | 	return c;
130 | }
131 | 
132 | 
133 | void
134 | print_datum(db)
135 | datum db;
136 | {
137 | 	int i;
138 | 
139 | 	putchar('"');
140 | 	for (i = 0; i < db.dsize; i++) {
141 | 		if (isprint(db.dptr[i]))
142 | 			putchar(db.dptr[i]);
143 | 		else {
144 | 			putchar('\\');
145 | 			putchar('0' + ((db.dptr[i] >> 6) & 0x07));
146 | 			putchar('0' + ((db.dptr[i] >> 3) & 0x07));
147 | 			putchar('0' + (db.dptr[i] & 0x07));
148 | 		}
149 | 	}
150 | 	putchar('"');
151 | }
152 | 
153 | 
154 | datum
155 | read_datum(s)
156 | char *s;
157 | {
158 | 	datum db;
159 | 	char *p;
160 | 	int i;
161 | 
162 | 	db.dsize = 0;
163 | 	db.dptr = (char *) malloc(strlen(s) * sizeof(char));
164 | 	for (p = db.dptr; *s != '\0'; p++, db.dsize++, s++) {
165 | 		if (*s == '\\') {
166 | 			if (*++s == 'n')
167 | 				*p = '\n';
168 | 			else if (*s == 'r')
169 | 				*p = '\r';
170 | 			else if (*s == 'f')
171 | 				*p = '\f';
172 | 			else if (*s == 't')
173 | 				*p = '\t';
174 | 			else if (isdigit(*s) && isdigit(*(s + 1)) && isdigit(*(s + 2))) {
175 | 				i = (*s++ - '0') << 6;
176 | 				i |= (*s++ - '0') << 3;
177 | 				i |= *s - '0';
178 | 				*p = i;
179 | 			}
180 | 			else if (*s == '0')
181 | 				*p = '\0';
182 | 			else
183 | 				*p = *s;
184 | 		}
185 | 		else
186 | 			*p = *s;
187 | 	}
188 | 
189 | 	return db;
190 | }
191 | 
192 | 
193 | char *
194 | key2s(db)
195 | datum db;
196 | {
197 | 	char *buf;
198 | 	char *p1, *p2;
199 | 
200 | 	buf = (char *) malloc((db.dsize + 1) * sizeof(char));
201 | 	for (p1 = buf, p2 = db.dptr; *p2 != '\0'; *p1++ = *p2++);
202 | 	*p1 = '\0';
203 | 	return buf;
204 | }
205 | 
206 | 
207 | main(argc, argv)
208 | int argc;
209 | char **argv;
210 | {
211 | 	typedef enum {
212 | 		YOW, FETCH, STORE, DELETE, SCAN, REGEXP
213 | 	} commands;
214 | 	char opt;
215 | 	int flags;
216 | 	int giveusage = 0;
217 | 	int verbose = 0;
218 | 	commands what = YOW;
219 | 	char *comarg[3];
220 | 	int st_flag = DBM_INSERT;
221 | 	int argn;
222 | 	DBM *db;
223 | 	datum key;
224 | 	datum content;
225 | 
226 | 	flags = O_RDWR;
227 | 	argn = 0;
228 | 
229 | 	while ((opt = getopt(argc, argv, "acdfFm:rstvx")) != ':') {
230 | 		switch (opt) {
231 | 		case 'a':
232 | 			what = SCAN;
233 | 			break;
234 | 		case 'c':
235 | 			flags |= O_CREAT;
236 | 			break;
237 | 		case 'd':
238 | 			what = DELETE;
239 | 			break;
240 | 		case 'f':
241 | 			what = FETCH;
242 | 			break;
243 | 		case 'F':
244 | 			what = REGEXP;
245 | 			break;
246 | 		case 'm':
247 | 			flags &= ~(000007);
248 | 			if (strcmp(optarg, "r") == 0)
249 | 				flags |= O_RDONLY;
250 | 			else if (strcmp(optarg, "w") == 0)
251 | 				flags |= O_WRONLY;
252 | 			else if (strcmp(optarg, "rw") == 0)
253 | 				flags |= O_RDWR;
254 | 			else {
255 | 				fprintf(stderr, "Invalid mode: \"%s\"\n", optarg);
256 | 				giveusage = 1;
257 | 			}
258 | 			break;
259 | 		case 'r':
260 | 			st_flag = DBM_REPLACE;
261 | 			break;
262 | 		case 's':
263 | 			what = STORE;
264 | 			break;
265 | 		case 't':
266 | 			flags |= O_TRUNC;
267 | 			break;
268 | 		case 'v':
269 | 			verbose = 1;
270 | 			break;
271 | 		case 'x':
272 | 			flags |= O_EXCL;
273 | 			break;
274 | 		case '!':
275 | 			giveusage = 1;
276 | 			break;
277 | 		case '?':
278 | 			if (argn < 3)
279 | 				comarg[argn++] = optarg;
280 | 			else {
281 | 				fprintf(stderr, "Too many arguments.\n");
282 | 				giveusage = 1;
283 | 			}
284 | 			break;
285 | 		}
286 | 	}
287 | 
288 | 	if (giveusage | what == YOW | argn < 1) {
289 | 		fprintf(stderr, "Usage: %s databse [-m r|w|rw] [-crtx] -a|-d|-f|-F|-s [key [content]]\n", argv[0]);
290 | 		exit(-1);
291 | 	}
292 | 
293 | 	if ((db = dbm_open(comarg[0], flags, 0777)) == NULL) {
294 | 		fprintf(stderr, "Error opening database \"%s\"\n", comarg[0]);
295 | 		exit(-1);
296 | 	}
297 | 
298 | 	if (argn > 1)
299 | 		key = read_datum(comarg[1]);
300 | 	if (argn > 2)
301 | 		content = read_datum(comarg[2]);
302 | 
303 | 	switch (what) {
304 | 
305 | 	case SCAN:
306 | 		key = dbm_firstkey(db);
307 | 		if (dbm_error(db)) {
308 | 			fprintf(stderr, "Error when fetching first key\n");
309 | 			goto db_exit;
310 | 		}
311 | 		while (key.dptr != NULL) {
312 | 			content = dbm_fetch(db, key);
313 | 			if (dbm_error(db)) {
314 | 				fprintf(stderr, "Error when fetching ");
315 | 				print_datum(key);
316 | 				printf("\n");
317 | 				goto db_exit;
318 | 			}
319 | 			print_datum(key);
320 | 			printf(": ");
321 | 			print_datum(content);
322 | 			printf("\n");
323 | 			if (dbm_error(db)) {
324 | 				fprintf(stderr, "Error when fetching next key\n");
325 | 				goto db_exit;
326 | 			}
327 | 			key = dbm_nextkey(db);
328 | 		}
329 | 		break;
330 | 
331 | 	case REGEXP:
332 | 		if (argn < 2) {
333 | 			fprintf(stderr, "Missing regular expression.\n");
334 | 			goto db_exit;
335 | 		}
336 | 		if (re_comp(comarg[1])) {
337 | 			fprintf(stderr, "Invalid regular expression\n");
338 | 			goto db_exit;
339 | 		}
340 | 		key = dbm_firstkey(db);
341 | 		if (dbm_error(db)) {
342 | 			fprintf(stderr, "Error when fetching first key\n");
343 | 			goto db_exit;
344 | 		}
345 | 		while (key.dptr != NULL) {
346 | 			if (re_exec(key2s(key))) {
347 | 				content = dbm_fetch(db, key);
348 | 				if (dbm_error(db)) {
349 | 					fprintf(stderr, "Error when fetching ");
350 | 					print_datum(key);
351 | 					printf("\n");
352 | 					goto db_exit;
353 | 				}
354 | 				print_datum(key);
355 | 				printf(": ");
356 | 				print_datum(content);
357 | 				printf("\n");
358 | 				if (dbm_error(db)) {
359 | 					fprintf(stderr, "Error when fetching next key\n");
360 | 					goto db_exit;
361 | 				}
362 | 			}
363 | 			key = dbm_nextkey(db);
364 | 		}
365 | 		break;
366 | 
367 | 	case FETCH:
368 | 		if (argn < 2) {
369 | 			fprintf(stderr, "Missing fetch key.\n");
370 | 			goto db_exit;
371 | 		}
372 | 		content = dbm_fetch(db, key);
373 | 		if (dbm_error(db)) {
374 | 			fprintf(stderr, "Error when fetching ");
375 | 			print_datum(key);
376 | 			printf("\n");
377 | 			goto db_exit;
378 | 		}
379 | 		if (content.dptr == NULL) {
380 | 			fprintf(stderr, "Cannot find ");
381 | 			print_datum(key);
382 | 			printf("\n");
383 | 			goto db_exit;
384 | 		}
385 | 		print_datum(key);
386 | 		printf(": ");
387 | 		print_datum(content);
388 | 		printf("\n");
389 | 		break;
390 | 
391 | 	case DELETE:
392 | 		if (argn < 2) {
393 | 			fprintf(stderr, "Missing delete key.\n");
394 | 			goto db_exit;
395 | 		}
396 | 		if (dbm_delete(db, key) || dbm_error(db)) {
397 | 			fprintf(stderr, "Error when deleting ");
398 | 			print_datum(key);
399 | 			printf("\n");
400 | 			goto db_exit;
401 | 		}
402 | 		if (verbose) {
403 | 			print_datum(key);
404 | 			printf(": DELETED\n");
405 | 		}
406 | 		break;
407 | 
408 | 	case STORE:
409 | 		if (argn < 3) {
410 | 			fprintf(stderr, "Missing key and/or content.\n");
411 | 			goto db_exit;
412 | 		}
413 | 		if (dbm_store(db, key, content, st_flag) || dbm_error(db)) {
414 | 			fprintf(stderr, "Error when storing ");
415 | 			print_datum(key);
416 | 			printf("\n");
417 | 			goto db_exit;
418 | 		}
419 | 		if (verbose) {
420 | 			print_datum(key);
421 | 			printf(": ");
422 | 			print_datum(content);
423 | 			printf(" STORED\n");
424 | 		}
425 | 		break;
426 | 	}
427 | 
428 | db_exit:
429 | 	dbm_clearerr(db);
430 | 	dbm_close(db);
431 | 	if (dbm_error(db)) {
432 | 		fprintf(stderr, "Error closing database \"%s\"\n", comarg[0]);
433 | 		exit(-1);
434 | 	}
435 | }
436 | 


--------------------------------------------------------------------------------
/dbm.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 1985 The Regents of the University of California.
  3 |  * All rights reserved.
  4 |  *
  5 |  * Redistribution and use in source and binary forms are permitted
  6 |  * provided that the above copyright notice and this paragraph are
  7 |  * duplicated in all such forms and that any documentation,
  8 |  * advertising materials, and other materials related to such
  9 |  * distribution and use acknowledge that the software was developed
 10 |  * by the University of California, Berkeley.  The name of the
 11 |  * University may not be used to endorse or promote products derived
 12 |  * from this software without specific prior written permission.
 13 |  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
 14 |  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
 15 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 16 |  */
 17 | 
 18 | #ifndef lint
 19 | static char sccsid[] = "@(#)dbm.c    5.4 (Berkeley) 5/24/89";
 20 | #endif /* not lint */
 21 | 
 22 | #include    "dbm.h"
 23 | 
 24 | #define    NODB    ((DBM *)0)
 25 | 
 26 | static DBM *cur_db = NODB;
 27 | 
 28 | static char no_db[] = "dbm: no open database\n";
 29 | 
 30 | dbminit(file)
 31 |     char *file;
 32 | {
 33 |     if (cur_db != NODB)
 34 |         dbm_close(cur_db);
 35 | 
 36 |     cur_db = dbm_open(file, 2, 0);
 37 |     if (cur_db == NODB) {
 38 |         cur_db = dbm_open(file, 0, 0);
 39 |         if (cur_db == NODB)
 40 |             return (-1);
 41 |     }
 42 |     return (0);
 43 | }
 44 | 
 45 | long
 46 | forder(key)
 47 | datum key;
 48 | {
 49 |     if (cur_db == NODB) {
 50 |         printf(no_db);
 51 |         return (0L);
 52 |     }
 53 |     return (dbm_forder(cur_db, key));
 54 | }
 55 | 
 56 | datum
 57 | fetch(key)
 58 | datum key;
 59 | {
 60 |     datum item;
 61 | 
 62 |     if (cur_db == NODB) {
 63 |         printf(no_db);
 64 |         item.dptr = 0;
 65 |         return (item);
 66 |     }
 67 |     return (dbm_fetch(cur_db, key));
 68 | }
 69 | 
 70 | delete(key)
 71 | datum key;
 72 | {
 73 |     if (cur_db == NODB) {
 74 |         printf(no_db);
 75 |         return (-1);
 76 |     }
 77 |     if (dbm_rdonly(cur_db))
 78 |         return (-1);
 79 |     return (dbm_delete(cur_db, key));
 80 | }
 81 | 
 82 | store(key, dat)
 83 | datum key, dat;
 84 | {
 85 |     if (cur_db == NODB) {
 86 |         printf(no_db);
 87 |         return (-1);
 88 |     }
 89 |     if (dbm_rdonly(cur_db))
 90 |         return (-1);
 91 | 
 92 |     return (dbm_store(cur_db, key, dat, DBM_REPLACE));
 93 | }
 94 | 
 95 | datum
 96 | firstkey()
 97 | {
 98 |     datum item;
 99 | 
100 |     if (cur_db == NODB) {
101 |         printf(no_db);
102 |         item.dptr = 0;
103 |         return (item);
104 |     }
105 |     return (dbm_firstkey(cur_db));
106 | }
107 | 
108 | datum
109 | nextkey(key)
110 | datum key;
111 | {
112 |     datum item;
113 | 
114 |     if (cur_db == NODB) {
115 |         printf(no_db);
116 |         item.dptr = 0;
117 |         return (item);
118 |     }
119 |     return (dbm_nextkey(cur_db, key));
120 | }
121 | 


--------------------------------------------------------------------------------
/dbm.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 1983 The Regents of the University of California.
 3 |  * All rights reserved.
 4 |  *
 5 |  * Redistribution and use in source and binary forms are permitted
 6 |  * provided that the above copyright notice and this paragraph are
 7 |  * duplicated in all such forms and that any documentation,
 8 |  * advertising materials, and other materials related to such
 9 |  * distribution and use acknowledge that the software was developed
10 |  * by the University of California, Berkeley.  The name of the
11 |  * University may not be used to endorse or promote products derived
12 |  * from this software without specific prior written permission.
13 |  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
14 |  * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
15 |  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16 |  *
17 |  *    @(#)dbm.h    5.2 (Berkeley) 5/24/89
18 |  */
19 | 
20 | #ifndef NULL
21 | /*
22 |  * this is lunacy, we no longer use it (and never should have
23 |  * unconditionally defined it), but, this whole file is for
24 |  * backwards compatability - someone may rely on this.
25 |  */
26 | #define    NULL    ((char *) 0)
27 | #endif
28 | 
29 | #include <ndbm.h>
30 | 
31 | datum    fetch();
32 | datum    firstkey();
33 | datum    nextkey();
34 | 


--------------------------------------------------------------------------------
/dbu.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <sys/file.h>
  3 | #ifdef SDBM
  4 | #include "sdbm.h"
  5 | #else
  6 | #include <ndbm.h>
  7 | #endif
  8 | #include <string.h>
  9 | 
 10 | #ifdef BSD42
 11 | #define strchr	index
 12 | #endif
 13 | 
 14 | extern int	getopt();
 15 | extern char	*strchr();
 16 | extern void	oops();
 17 | 
 18 | char *progname;
 19 | 
 20 | static int rflag;
 21 | static char *usage = "%s [-R] cat | look |... dbmname";
 22 | 
 23 | #define DERROR	0
 24 | #define DLOOK	1
 25 | #define DINSERT	2
 26 | #define DDELETE 3
 27 | #define	DCAT	4
 28 | #define DBUILD	5
 29 | #define DPRESS	6
 30 | #define DCREAT	7
 31 | 
 32 | #define LINEMAX	8192
 33 | 
 34 | typedef struct {
 35 | 	char *sname;
 36 | 	int scode;
 37 | 	int flags;
 38 | } cmd;
 39 | 
 40 | static cmd cmds[] = {
 41 | 
 42 | 	"fetch", DLOOK, 	O_RDONLY,
 43 | 	"get", DLOOK,		O_RDONLY,
 44 | 	"look", DLOOK,		O_RDONLY,
 45 | 	"add", DINSERT,		O_RDWR,
 46 | 	"insert", DINSERT,	O_RDWR,
 47 | 	"store", DINSERT,	O_RDWR,
 48 | 	"delete", DDELETE,	O_RDWR,
 49 | 	"remove", DDELETE,	O_RDWR,
 50 | 	"dump", DCAT,		O_RDONLY,
 51 | 	"list", DCAT, 		O_RDONLY,
 52 | 	"cat", DCAT,		O_RDONLY,
 53 | 	"creat", DCREAT,	O_RDWR | O_CREAT | O_TRUNC,
 54 | 	"new", DCREAT,		O_RDWR | O_CREAT | O_TRUNC,
 55 | 	"build", DBUILD,	O_RDWR | O_CREAT,
 56 | 	"squash", DPRESS,	O_RDWR,
 57 | 	"compact", DPRESS,	O_RDWR,
 58 | 	"compress", DPRESS,	O_RDWR
 59 | };
 60 | 
 61 | #define CTABSIZ (sizeof (cmds)/sizeof (cmd))
 62 | 
 63 | static cmd *parse();
 64 | static void badk(), doit(), prdatum();
 65 | 
 66 | int
 67 | main(argc, argv)
 68 | int	argc;
 69 | char *argv[];
 70 | {
 71 | 	int c;
 72 | 	register cmd *act;
 73 | 	extern int optind;
 74 | 	extern char *optarg;
 75 | 
 76 | 	progname = argv[0];
 77 | 
 78 | 	while ((c = getopt(argc, argv, "R")) != EOF)
 79 | 		switch (c) {
 80 | 		case 'R':	       /* raw processing  */
 81 | 			rflag++;
 82 | 			break;
 83 | 
 84 | 		default:
 85 | 			oops("usage: %s", usage);
 86 | 			break;
 87 | 		}
 88 | 
 89 | 	if ((argc -= optind) < 2)
 90 | 		oops("usage: %s", usage);
 91 | 
 92 | 	if ((act = parse(argv[optind])) == NULL)
 93 | 		badk(argv[optind]);
 94 | 	optind++;
 95 | 	doit(act, argv[optind]);
 96 | 	return 0;
 97 | }
 98 | 
 99 | static void
100 | doit(act, file)
101 | register cmd *act;
102 | char *file;
103 | {
104 | 	datum key;
105 | 	datum val;
106 | 	register DBM *db;
107 | 	register char *op;
108 | 	register int n;
109 | 	char *line;
110 | #ifdef TIME
111 | 	long start;
112 | 	extern long time();
113 | #endif
114 | 
115 | 	if ((db = dbm_open(file, act->flags, 0644)) == NULL)
116 | 		oops("cannot open: %s", file);
117 | 
118 | 	if ((line = (char *) malloc(LINEMAX)) == NULL)
119 | 		oops("%s: cannot get memory", "line alloc");
120 | 
121 | 	switch (act->scode) {
122 | 
123 | 	case DLOOK:
124 | 		while (fgets(line, LINEMAX, stdin) != NULL) {
125 | 			n = strlen(line) - 1;
126 | 			line[n] = 0;
127 | 			key.dptr = line;
128 | 			key.dsize = n;
129 | 			val = dbm_fetch(db, key);
130 | 			if (val.dptr != NULL) {
131 | 				prdatum(stdout, val);
132 | 				putchar('\n');
133 | 				continue;
134 | 			}
135 | 			prdatum(stderr, key);
136 | 			fprintf(stderr, ": not found.\n");
137 | 		}
138 | 		break;
139 | 	case DINSERT:
140 | 		break;
141 | 	case DDELETE:
142 | 		while (fgets(line, LINEMAX, stdin) != NULL) {
143 | 			n = strlen(line) - 1;
144 | 			line[n] = 0;
145 | 			key.dptr = line;
146 | 			key.dsize = n;
147 | 			if (dbm_delete(db, key) == -1) {
148 | 				prdatum(stderr, key);
149 | 				fprintf(stderr, ": not found.\n");
150 | 			}
151 | 		}
152 | 		break;
153 | 	case DCAT:
154 | 		for (key = dbm_firstkey(db); key.dptr != 0; 
155 | 		     key = dbm_nextkey(db)) {
156 | 			prdatum(stdout, key);
157 | 			putchar('\t');
158 | 			prdatum(stdout, dbm_fetch(db, key));
159 | 			putchar('\n');
160 | 		}
161 | 		break;
162 | 	case DBUILD:
163 | #ifdef TIME
164 | 		start = time(0);
165 | #endif
166 | 		while (fgets(line, LINEMAX, stdin) != NULL) {
167 | 			n = strlen(line) - 1;
168 | 			line[n] = 0;
169 | 			key.dptr = line;
170 | 			if ((op = strchr(line, '\t')) != 0) {
171 | 				key.dsize = op - line;
172 | 				*op++ = 0;
173 | 				val.dptr = op;
174 | 				val.dsize = line + n - op;
175 | 			}
176 | 			else
177 | 				oops("bad input; %s", line);
178 | 	
179 | 			if (dbm_store(db, key, val, DBM_REPLACE) < 0) {
180 | 				prdatum(stderr, key);
181 | 				fprintf(stderr, ": ");
182 | 				oops("store: %s", "failed");
183 | 			}
184 | 		}
185 | #ifdef TIME
186 | 		printf("done: %d seconds.\n", time(0) - start);
187 | #endif
188 | 		break;
189 | 	case DPRESS:
190 | 		break;
191 | 	case DCREAT:
192 | 		break;
193 | 	}
194 | 
195 | 	dbm_close(db);
196 | }
197 | 
198 | static void
199 | badk(word)
200 | char *word;
201 | {
202 | 	register int i;
203 | 
204 | 	if (progname)
205 | 		fprintf(stderr, "%s: ", progname);
206 | 	fprintf(stderr, "bad keywd %s. use one of\n", word);
207 | 	for (i = 0; i < (int)CTABSIZ; i++)
208 | 		fprintf(stderr, "%-8s%c", cmds[i].sname,
209 | 			((i + 1) % 6 == 0) ? '\n' : ' ');
210 | 	fprintf(stderr, "\n");
211 | 	exit(1);
212 | 	/*NOTREACHED*/
213 | }
214 | 
215 | static cmd *
216 | parse(str)
217 | register char *str;
218 | {
219 | 	register int i = CTABSIZ;
220 | 	register cmd *p;
221 | 	
222 | 	for (p = cmds; i--; p++)
223 | 		if (strcmp(p->sname, str) == 0)
224 | 			return p;
225 | 	return NULL;
226 | }
227 | 
228 | static void
229 | prdatum(stream, d)
230 | FILE *stream;
231 | datum d;
232 | {
233 | 	register int c;
234 | 	register char *p = d.dptr;
235 | 	register int n = d.dsize;
236 | 
237 | 	while (n--) {
238 | 		c = *p++ & 0377;
239 | 		if (c & 0200) {
240 | 			fprintf(stream, "M-");
241 | 			c &= 0177;
242 | 		}
243 | 		if (c == 0177 || c < ' ') 
244 | 			fprintf(stream, "^%c", (c == 0177) ? '?' : c + '@');
245 | 		else
246 | 			putc(c, stream);
247 | 	}
248 | }
249 | 
250 | 
251 | 


--------------------------------------------------------------------------------
/grind:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | rm -f /tmp/*.dir /tmp/*.pag
 3 | awk -e '{
 4 |         printf "%s\t", $0
 5 |         for (i = 0; i < 40; i++)
 6 |                 printf "%s.", $0
 7 |         printf "\n"
 8 | }' < /usr/dict/words | $1 build /tmp/$2
 9 | 
10 | 


--------------------------------------------------------------------------------
/hash.c:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * sdbm - ndbm work-alike hashed database library
 3 |  * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
 4 |  * author: oz@nexus.yorku.ca
 5 |  * status: public domain. keep it that way.
 6 |  *
 7 |  * hashing routine
 8 |  */
 9 | 
10 | #include "sdbm.h"
11 | /*
12 |  * polynomial conversion ignoring overflows
13 |  * [this seems to work remarkably well, in fact better
14 |  * then the ndbm hash function. Replace at your own risk]
15 |  * use: 65599	nice.
16 |  *      65587   even better. 
17 |  */
18 | long
19 | dbm_hash(str, len)
20 | register char *str;
21 | register int len;
22 | {
23 | 	register unsigned long n = 0;
24 | 
25 | #ifdef DUFF
26 | 
27 | #define HASHC	n = *str++ + 65599 * n
28 | 
29 | 	if (len > 0) {
30 | 		register int loop = (len + 8 - 1) >> 3;
31 | 
32 | 		switch(len & (8 - 1)) {
33 | 		case 0:	do {
34 | 			HASHC;	case 7:	HASHC;
35 | 		case 6:	HASHC;	case 5:	HASHC;
36 | 		case 4:	HASHC;	case 3:	HASHC;
37 | 		case 2:	HASHC;	case 1:	HASHC;
38 | 			} while (--loop);
39 | 		}
40 | 
41 | 	}
42 | #else
43 | 	while (len--)
44 | 		n = *str++ + 65599 * n;
45 | #endif
46 | 	return n;
47 | }
48 | 


--------------------------------------------------------------------------------
/makefile:
--------------------------------------------------------------------------------
 1 | #
 2 | # makefile for public domain ndbm-clone: sdbm
 3 | # DUFF: use duff's device (loop unroll) in parts of the code
 4 | #
 5 | CFLAGS = -O -DSDBM -DDUFF -DBSD42
 6 | #LDFLAGS = -p
 7 | 
 8 | OBJS = sdbm.o pair.o hash.o
 9 | SRCS = sdbm.c pair.c hash.c dbu.c dba.c dbd.c util.c
10 | HDRS = tune.h sdbm.h pair.h
11 | MISC = README CHANGES COMPARE sdbm.3 dbe.c dbe.1 dbm.c dbm.h biblio \
12 |        readme.ms readme.ps
13 | 
14 | all: dbu dba dbd dbe
15 | 
16 | dbu: dbu.o sdbm util.o
17 | 	cc $(LDFLAGS) -o dbu dbu.o util.o libsdbm.a
18 | 
19 | dba: dba.o util.o
20 | 	cc $(LDFLAGS) -o dba dba.o util.o
21 | dbd: dbd.o util.o
22 | 	cc $(LDFLAGS) -o dbd dbd.o util.o
23 | dbe: dbe.o sdbm
24 | 	cc $(LDFLAGS) -o dbe dbe.o libsdbm.a
25 | 
26 | sdbm: $(OBJS)
27 | 	ar cr libsdbm.a $(OBJS)
28 | 	ranlib libsdbm.a
29 | ###	cp libsdbm.a /usr/lib/libsdbm.a
30 | 
31 | dba.o: sdbm.h
32 | dbu.o: sdbm.h
33 | util.o:sdbm.h
34 | 
35 | $(OBJS): sdbm.h tune.h pair.h
36 | 
37 | #
38 | # dbu using berkelezoid ndbm routines [if you have them] for testing
39 | #
40 | #x-dbu: dbu.o util.o
41 | #	cc $(CFLAGS) -o x-dbu dbu.o util.o
42 | lint:
43 | 	lint -abchx $(SRCS)
44 | 
45 | clean:
46 | 	rm -f *.o mon.out core
47 | 
48 | purge: 	clean
49 | 	rm -f dbu libsdbm.a dbd dba dbe x-dbu *.dir *.pag
50 | 
51 | shar:
52 | 	shar $(MISC) makefile $(SRCS) $(HDRS) >SDBM.SHAR
53 | 
54 | readme:
55 | 	nroff -ms readme.ms | col -b >README
56 | 


--------------------------------------------------------------------------------
/pair.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * sdbm - ndbm work-alike hashed database library
  3 |  * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
  4 |  * author: oz@nexus.yorku.ca
  5 |  * status: public domain.
  6 |  *
  7 |  * page-level routines
  8 |  */
  9 | 
 10 | #ifndef lint
 11 | static char rcsid[] = "$Id: pair.c,v 1.10 90/12/13 13:00:35 oz Exp $";
 12 | #endif
 13 | 
 14 | #include "sdbm.h"
 15 | #include "tune.h"
 16 | #include "pair.h"
 17 | 
 18 | #ifndef BSD42
 19 | #include <memory.h>
 20 | #endif
 21 | 
 22 | #define exhash(item)	dbm_hash((item).dptr, (item).dsize)
 23 | 
 24 | /* 
 25 |  * forward 
 26 |  */
 27 | static int seepair proto((char *, int, char *, int));
 28 | 
 29 | /*
 30 |  * page format:
 31 |  *	+------------------------------+
 32 |  * ino	| n | keyoff | datoff | keyoff |
 33 |  * 	+------------+--------+--------+
 34 |  *	| datoff | - - - ---->	       |
 35 |  *	+--------+---------------------+
 36 |  *	|	 F R E E A R E A       |
 37 |  *	+--------------+---------------+
 38 |  *	|  <---- - - - | data          |
 39 |  *	+--------+-----+----+----------+
 40 |  *	|  key   | data     | key      |
 41 |  *	+--------+----------+----------+
 42 |  *
 43 |  * calculating the offsets for free area:  if the number
 44 |  * of entries (ino[0]) is zero, the offset to the END of
 45 |  * the free area is the block size. Otherwise, it is the
 46 |  * nth (ino[ino[0]]) entry's offset.
 47 |  */
 48 | 
 49 | int
 50 | fitpair(pag, need)
 51 | char *pag;
 52 | int need;
 53 | {
 54 | 	register int n;
 55 | 	register int off;
 56 | 	register int free;
 57 | 	register short *ino = (short *) pag;
 58 | 
 59 | 	off = ((n = ino[0]) > 0) ? ino[n] : PBLKSIZ;
 60 | 	free = off - (n + 1) * sizeof(short);
 61 | 	need += 2 * sizeof(short);
 62 | 
 63 | 	debug(("free %d need %d\n", free, need));
 64 | 
 65 | 	return need <= free;
 66 | }
 67 | 
 68 | void
 69 | putpair(pag, key, val)
 70 | char *pag;
 71 | datum key;
 72 | datum val;
 73 | {
 74 | 	register int n;
 75 | 	register int off;
 76 | 	register short *ino = (short *) pag;
 77 | 
 78 | 	off = ((n = ino[0]) > 0) ? ino[n] : PBLKSIZ;
 79 | /*
 80 |  * enter the key first
 81 |  */
 82 | 	off -= key.dsize;
 83 | 	(void) memcpy(pag + off, key.dptr, key.dsize);
 84 | 	ino[n + 1] = off;
 85 | /*
 86 |  * now the data
 87 |  */
 88 | 	off -= val.dsize;
 89 | 	(void) memcpy(pag + off, val.dptr, val.dsize);
 90 | 	ino[n + 2] = off;
 91 | /*
 92 |  * adjust item count
 93 |  */
 94 | 	ino[0] += 2;
 95 | }
 96 | 
 97 | datum
 98 | getpair(pag, key)
 99 | char *pag;
100 | datum key;
101 | {
102 | 	register int i;
103 | 	register int n;
104 | 	datum val;
105 | 	register short *ino = (short *) pag;
106 | 
107 | 	if ((n = ino[0]) == 0)
108 | 		return nullitem;
109 | 
110 | 	if ((i = seepair(pag, n, key.dptr, key.dsize)) == 0)
111 | 		return nullitem;
112 | 
113 | 	val.dptr = pag + ino[i + 1];
114 | 	val.dsize = ino[i] - ino[i + 1];
115 | 	return val;
116 | }
117 | 
118 | #ifdef SEEDUPS
119 | int
120 | duppair(pag, key)
121 | char *pag;
122 | datum key;
123 | {
124 | 	register short *ino = (short *) pag;
125 | 	return ino[0] > 0 && seepair(pag, ino[0], key.dptr, key.dsize) > 0;
126 | }
127 | #endif
128 | 
129 | datum
130 | getnkey(pag, num)
131 | char *pag;
132 | int num;
133 | {
134 | 	datum key;
135 | 	register int off;
136 | 	register short *ino = (short *) pag;
137 | 
138 | 	num = num * 2 - 1;
139 | 	if (ino[0] == 0 || num > ino[0])
140 | 		return nullitem;
141 | 
142 | 	off = (num > 1) ? ino[num - 1] : PBLKSIZ;
143 | 
144 | 	key.dptr = pag + ino[num];
145 | 	key.dsize = off - ino[num];
146 | 
147 | 	return key;
148 | }
149 | 
150 | int
151 | delpair(pag, key)
152 | char *pag;
153 | datum key;
154 | {
155 | 	register int n;
156 | 	register int i;
157 | 	register short *ino = (short *) pag;
158 | 
159 | 	if ((n = ino[0]) == 0)
160 | 		return 0;
161 | 
162 | 	if ((i = seepair(pag, n, key.dptr, key.dsize)) == 0)
163 | 		return 0;
164 | /*
165 |  * found the key. if it is the last entry
166 |  * [i.e. i == n - 1] we just adjust the entry count.
167 |  * hard case: move all data down onto the deleted pair,
168 |  * shift offsets onto deleted offsets, and adjust them.
169 |  * [note: 0 < i < n]
170 |  */
171 | 	if (i < n - 1) {
172 | 		register int m;
173 | 		register char *dst = pag + (i == 1 ? PBLKSIZ : ino[i - 1]);
174 | 		register char *src = pag + ino[i + 1];
175 | 		register int   zoo = dst - src;
176 | 
177 | 		debug(("free-up %d ", zoo));
178 | /*
179 |  * shift data/keys down
180 |  */
181 | 		m = ino[i + 1] - ino[n];
182 | #ifdef DUFF
183 | #define MOVB 	*--dst = *--src
184 | 
185 | 		if (m > 0) {
186 | 			register int loop = (m + 8 - 1) >> 3;
187 | 
188 | 			switch (m & (8 - 1)) {
189 | 			case 0:	do {
190 | 				MOVB;	case 7:	MOVB;
191 | 			case 6:	MOVB;	case 5:	MOVB;
192 | 			case 4:	MOVB;	case 3:	MOVB;
193 | 			case 2:	MOVB;	case 1:	MOVB;
194 | 				} while (--loop);
195 | 			}
196 | 		}
197 | #else
198 | #ifdef MEMMOVE
199 | 		memmove(dst - m, src - m, m);
200 | #else
201 | 		while (m--)
202 | 			*--dst = *--src;
203 | #endif
204 | #endif
205 | /*
206 |  * adjust offset index up
207 |  */
208 | 		while (i < n - 1) {
209 | 			ino[i] = ino[i + 2] + zoo;
210 | 			i++;
211 | 		}
212 | 	}
213 | 	ino[0] -= 2;
214 | 	return 1;
215 | }
216 | 
217 | /*
218 |  * search for the key in the page.
219 |  * return offset index in the range 0 < i < n.
220 |  * return 0 if not found.
221 |  */
222 | static int
223 | seepair(pag, n, key, siz)
224 | char *pag;
225 | register int n;
226 | register char *key;
227 | register int siz;
228 | {
229 | 	register int i;
230 | 	register int off = PBLKSIZ;
231 | 	register short *ino = (short *) pag;
232 | 
233 | 	for (i = 1; i < n; i += 2) {
234 | 		if (siz == off - ino[i] &&
235 | 		    memcmp(key, pag + ino[i], siz) == 0)
236 | 			return i;
237 | 		off = ino[i + 1];
238 | 	}
239 | 	return 0;
240 | }
241 | 
242 | void
243 | splpage(pag, new, sbit)
244 | char *pag;
245 | char *new;
246 | long sbit;
247 | {
248 | 	datum key;
249 | 	datum val;
250 | 
251 | 	register int n;
252 | 	register int off = PBLKSIZ;
253 | 	char cur[PBLKSIZ];
254 | 	register short *ino = (short *) cur;
255 | 
256 | 	(void) memcpy(cur, pag, PBLKSIZ);
257 | 	(void) memset(pag, 0, PBLKSIZ);
258 | 	(void) memset(new, 0, PBLKSIZ);
259 | 
260 | 	n = ino[0];
261 | 	for (ino++; n > 0; ino += 2) {
262 | 		key.dptr = cur + ino[0]; 
263 | 		key.dsize = off - ino[0];
264 | 		val.dptr = cur + ino[1];
265 | 		val.dsize = ino[0] - ino[1];
266 | /*
267 |  * select the page pointer (by looking at sbit) and insert
268 |  */
269 | 		(void) putpair((exhash(key) & sbit) ? new : pag, key, val);
270 | 
271 | 		off = ino[1];
272 | 		n -= 2;
273 | 	}
274 | 
275 | 	debug(("%d split %d/%d\n", ((short *) cur)[0] / 2, 
276 | 	       ((short *) new)[0] / 2,
277 | 	       ((short *) pag)[0] / 2));
278 | }
279 | 
280 | /*
281 |  * check page sanity: 
282 |  * number of entries should be something
283 |  * reasonable, and all offsets in the index should be in order.
284 |  * this could be made more rigorous.
285 |  */
286 | int
287 | chkpage(pag)
288 | char *pag;
289 | {
290 | 	register int n;
291 | 	register int off;
292 | 	register short *ino = (short *) pag;
293 | 
294 | 	if ((n = ino[0]) < 0 || n > PBLKSIZ / sizeof(short))
295 | 		return 0;
296 | 
297 | 	if (n > 0) {
298 | 		off = PBLKSIZ;
299 | 		for (ino++; n > 0; ino += 2) {
300 | 			if (ino[0] > off || ino[1] > off ||
301 | 			    ino[1] > ino[0])
302 | 				return 0;
303 | 			off = ino[1];
304 | 			n -= 2;
305 | 		}
306 | 	}
307 | 	return 1;
308 | }
309 | 


--------------------------------------------------------------------------------
/pair.h:
--------------------------------------------------------------------------------
 1 | extern int fitpair proto((char *, int));
 2 | extern void  putpair proto((char *, datum, datum));
 3 | extern datum	getpair proto((char *, datum));
 4 | extern int  delpair proto((char *, datum));
 5 | extern int  chkpage proto((char *));
 6 | extern datum getnkey proto((char *, int));
 7 | extern void splpage proto((char *, char *, long));
 8 | #ifdef SEEDUPS
 9 | extern int duppair proto((char *, datum));
10 | #endif
11 | 


--------------------------------------------------------------------------------
/readme.ms:
--------------------------------------------------------------------------------
  1 | .\" tbl | readme.ms | [tn]roff -ms | ...
  2 | .\" note the "C" (courier) and "CB" fonts: you will probably have to
  3 | .\" change these.
  4 | .\" $Id: readme.ms,v 1.1 90/12/13 13:09:15 oz Exp Locker: oz $
  5 | 
  6 | .de P1
  7 | .br
  8 | .nr dT 4
  9 | .nf
 10 | .ft C
 11 | .sp .5
 12 | .nr t \\n(dT*\\w'x'u
 13 | .ta 1u*\\ntu 2u*\\ntu 3u*\\ntu 4u*\\ntu 5u*\\ntu 6u*\\ntu 7u*\\ntu 8u*\\ntu 9u*\\ntu 10u*\\ntu 11u*\\ntu 12u*\\ntu 13u*\\ntu 14u*\\ntu
 14 | ..
 15 | .de P2
 16 | .br
 17 | .ft 1
 18 | .br
 19 | .sp .5
 20 | .br
 21 | .fi
 22 | ..
 23 | .\" CW uses the typewriter/courier font.
 24 | .de CW
 25 | \fC\\$1\\fP\\$2
 26 | ..
 27 | 
 28 | .\" Footnote numbering [by Henry Spencer]
 29 | .\" <text>\*f for a footnote number..
 30 | .\" .FS
 31 | .\" \*F <footnote text>
 32 | .\" .FE
 33 | .\"
 34 | .ds f \\u\\s-2\\n+f\\s+2\\d
 35 | .nr f 0 1
 36 | .ds F \\n+F.
 37 | .nr F 0 1
 38 | 
 39 | .ND
 40 | .LP
 41 | .TL
 42 | \fIsdbm\fP \(em Substitute DBM
 43 | .br
 44 | or
 45 | .br
 46 | Berkeley \fIndbm\fP for Every UN*X\** Made Simple
 47 | .AU
 48 | Ozan (oz) Yigit
 49 | .AI
 50 | The Guild of PD Software Toolmakers
 51 | Toronto - Canada
 52 | .sp
 53 | oz@nexus.yorku.ca
 54 | .LP
 55 | .FS
 56 | UN*X is not a trademark of any (dis)organization.
 57 | .FE
 58 | .sp 2
 59 | \fIImplementation is the sincerest form of flattery. \(em L. Peter Deutsch\fP
 60 | .SH
 61 | A The Clone of the \fIndbm\fP library
 62 | .PP
 63 | The sources accompanying this notice \(em \fIsdbm\fP \(em constitute
 64 | the first public release (Dec. 1990) of a complete clone of
 65 | the Berkeley UN*X \fIndbm\fP library. The \fIsdbm\fP library is meant to
 66 | clone the proven functionality of \fIndbm\fP as closely as possible,
 67 | including a few improvements. It is practical, easy to understand, and
 68 | compatible.
 69 | The \fIsdbm\fP library is not derived from any licensed, proprietary or
 70 | copyrighted software.
 71 | .PP
 72 | The \fIsdbm\fP implementation is based on a 1978 algorithm
 73 | [Lar78] by P.-A. (Paul) Larson known as ``Dynamic Hashing''.
 74 | In the course of searching for a substitute for \fIndbm\fP, I
 75 | prototyped three different external-hashing algorithms [Lar78, Fag79, Lit80]
 76 | and ultimately chose Larson's algorithm as a basis of the \fIsdbm\fP
 77 | implementation. The Bell Labs
 78 | \fIdbm\fP (and therefore \fIndbm\fP) is based on an algorithm invented by
 79 | Ken Thompson, [Tho90, Tor87] and predates Larson's work.
 80 | .PP
 81 | The \fIsdbm\fR programming interface is totally compatible
 82 | with \fIndbm\fP and includes a slight improvement in database initialization.
 83 | It is also expected to be binary-compatible under most UN*X versions that
 84 | support the \fIndbm\fP library.
 85 | .PP
 86 | The \fIsdbm\fP implementation shares the shortcomings of the \fIndbm\fP
 87 | library, as a side effect of various simplifications to the original Larson
 88 | algorithm. It does produce \fIholes\fP in the page file as it writes
 89 | pages past the end of file. (Larson's paper include a clever solution to
 90 | this problem that is a result of using the hash value directly as a block
 91 | address.) On the other hand, extensive tests seem to indicate that \fIsdbm\fP
 92 | creates fewer holes in general, and the resulting pagefiles are
 93 | smaller. The \fIsdbm\fP implementation is also faster than \fIndbm\fP
 94 | in database creation.
 95 | Unlike the \fIndbm\fP, the \fIsdbm\fP
 96 | .CW store
 97 | operation will not ``wander away'' trying to split its
 98 | data pages to insert a datum that \fIcannot\fP (due to elaborate worst-case
 99 | situations) be inserted. (It will fail after a pre-defined number of attempts.)
100 | .SH
101 | Important Compatibility Warning
102 | .PP
103 | The \fIsdbm\fP and \fIndbm\fP
104 | libraries \fIcannot\fP share databases: one cannot read the (dir/pag)
105 | database created by the other. This is due to the differences
106 | between the \fIndbm\fP and \fIsdbm\fP algorithms\**, 
107 | .FS
108 | Torek's discussion [Tor87]
109 | indicates that \fIdbm/ndbm\fP implementations use the hash
110 | value to traverse the radix trie differently than \fIsdbm\fP
111 | and as a result, the page indexes are generated in \fIdifferent\fP order.
112 | For more information, send e-mail to the author.
113 | .FE
114 | and the hash functions
115 | used.
116 | It is easy to convert between the \fIdbm/ndbm\fP databases and \fIsdbm\fP
117 | by ignoring the index completely: see
118 | .CW dbd ,
119 | .CW dbu
120 | etc.
121 | .R
122 | .LP
123 | .SH
124 | Notice of Intellectual Property
125 | .LP
126 | \fIThe entire\fP sdbm  \fIlibrary package, as authored by me,\fP Ozan S. Yigit,
127 | \fIis hereby placed in the public domain.\fP As such, the author is not
128 | responsible for the consequences of use of this software, no matter how
129 | awful, even if they arise from defects in it. There is no expressed or
130 | implied warranty for the \fIsdbm\fP library.
131 | .PP
132 | Since the \fIsdbm\fP
133 | library package is in the public domain, this \fIoriginal\fP
134 | release or any additional public-domain releases of the modified original
135 | cannot possibly (by definition) be withheld from you. Also by definition,
136 | You (singular) have all the rights to this code (including the right to
137 | sell without permission, the right to hoard\**
138 | .FS
139 | You cannot really hoard something that is available to the public at
140 | large, but try if it makes you feel any better.
141 | .FE
142 | and the right to do other icky things as
143 | you see fit) but those rights are also granted to everyone else.
144 | .PP
145 | Please note that all previous distributions of this software contained
146 | a copyright (which is now dropped) to protect its
147 | origins and its current public domain status against any possible claims
148 | and/or challenges.
149 | .SH
150 | Acknowledgments
151 | .PP
152 | Many people have been very helpful and supportive.  A partial list would
153 | necessarily include Rayan Zacherissen (who contributed the man page,
154 | and also hacked a MMAP version of \fIsdbm\fP),
155 | Arnold Robbins, Chris Lewis,
156 | Bill Davidsen, Henry Spencer, Geoff Collyer, Rich Salz (who got me started
157 | in the first place), Johannes Ruschein
158 | (who did the minix port) and David Tilbrook. I thank you all.
159 | .SH
160 | Distribution Manifest and Notes
161 | .LP
162 | This distribution of \fIsdbm\fP includes (at least) the following:
163 | .P1
164 | 	CHANGES		change log
165 | 	README		this file.
166 | 	biblio		a small bibliography on external hashing
167 | 	dba.c		a crude (n/s)dbm page file analyzer
168 | 	dbd.c		a crude (n/s)dbm page file dumper (for conversion)
169 | 	dbe.1		man page for dbe.c
170 | 	dbe.c		Janick's database editor
171 | 	dbm.c		a dbm library emulation wrapper for ndbm/sdbm
172 | 	dbm.h		header file for the above
173 | 	dbu.c		a crude db management utility
174 | 	hash.c		hashing function
175 | 	makefile	guess.
176 | 	pair.c		page-level routines (posted earlier)
177 | 	pair.h		header file for the above
178 | 	readme.ms	troff source for the README file
179 | 	sdbm.3		man page
180 | 	sdbm.c		the real thing
181 | 	sdbm.h		header file for the above
182 | 	tune.h		place for tuning & portability thingies
183 | 	util.c		miscellaneous
184 | .P2
185 | .PP
186 | .CW dbu
187 | is a simple database manipulation program\** that tries to look
188 | .FS
189 | The 
190 | .CW dbd ,
191 | .CW dba ,
192 | .CW dbu
193 | utilities are quick hacks and are not fit for production use. They were
194 | developed late one night, just to test out \fIsdbm\fP, and convert some
195 | databases.
196 | .FE
197 | like Bell Labs'
198 | .CW cbt
199 | utility. It is currently incomplete in functionality.
200 | I use
201 | .CW dbu
202 | to test out the routines: it takes (from stdin) tab separated
203 | key/value pairs for commands like
204 | .CW build
205 | or
206 | .CW insert
207 | or takes keys for
208 | commands like
209 | .CW delete
210 | or
211 | .CW look .
212 | .P1
213 | 	dbu <build|creat|look|insert|cat|delete> dbmfile
214 | .P2
215 | .PP
216 | .CW dba
217 | is a crude analyzer of \fIdbm/sdbm/ndbm\fP
218 | page files. It scans the entire
219 | page file, reporting page level statistics, and totals at the end.
220 | .PP
221 | .CW dbd
222 | is a crude dump program for \fIdbm/ndbm/sdbm\fP
223 | databases. It ignores the
224 | bitmap, and dumps the data pages in sequence. It can be used to create
225 | input for the
226 | .CW dbu 
227 | utility.
228 | Note that
229 | .CW dbd
230 | will skip any NULLs in the key and data
231 | fields, thus is unsuitable to convert some peculiar databases that
232 | insist in including the terminating null.
233 | .PP
234 | I have also included a copy of the
235 | .CW dbe
236 | (\fIndbm\fP DataBase Editor) by Janick Bergeron [janick@bnr.ca] for
237 | your pleasure. You may find it more useful than the little
238 | .CW dbu
239 | utility.
240 | .PP
241 | .CW dbm.[ch]
242 | is a \fIdbm\fP library emulation on top of \fIndbm\fP
243 | (and hence suitable for \fIsdbm\fP). Written by Robert Elz.
244 | .PP
245 | The \fIsdbm\fP
246 | library has been around in beta test for quite a long time, and from whatever
247 | little feedback I received (maybe no news is good news), I believe it has been
248 | functioning without any significant problems. I would, of course, appreciate
249 | all fixes and/or improvements. Portability enhancements would especially be
250 | useful.
251 | .SH
252 | Implementation Issues
253 | .PP
254 | Hash functions:
255 | The algorithm behind \fIsdbm\fP implementation needs a good bit-scrambling
256 | hash function to be effective. I ran into a set of constants for a simple
257 | hash function that seem to help \fIsdbm\fP perform better than \fIndbm\fP
258 | for various inputs:
259 | .P1
260 | 	/*
261 | 	 * polynomial conversion ignoring overflows
262 | 	 * 65599 nice. 65587 even better.
263 | 	 */
264 | 	long
265 | 	dbm_hash(char *str, int len) {
266 | 		register unsigned long n = 0;
267 | 	
268 | 		while (len--)
269 | 			n = n * 65599 + *str++;
270 | 		return n;
271 | 	}
272 | .P2
273 | .PP
274 | There may be better hash functions for the purposes of dynamic hashing.
275 | Try your favorite, and check the pagefile. If it contains too many pages
276 | with too many holes, (in relation to this one for example) or if
277 | \fIsdbm\fP
278 | simply stops working (fails after 
279 | .CW SPLTMAX
280 | attempts to split) when you feed your
281 | NEWS 
282 | .CW history
283 | file to it, you probably do not have a good hashing function.
284 | If you do better (for different types of input), I would like to know
285 | about the function you use.
286 | .PP
287 | Block sizes: It seems (from various tests on a few machines) that a page
288 | file block size
289 | .CW PBLKSIZ
290 | of 1024 is by far the best for performance, but
291 | this also happens to limit the size of a key/value pair. Depending on your
292 | needs, you may wish to increase the page size, and also adjust
293 | .CW PAIRMAX
294 | (the maximum size of a key/value pair allowed: should always be at least
295 | three words smaller than
296 | .CW PBLKSIZ .)
297 | accordingly. The system-wide version of the library
298 | should probably be
299 | configured with 1024 (distribution default), as this appears to be sufficient
300 | for most common uses of \fIsdbm\fP.
301 | .SH
302 | Portability
303 | .PP
304 | This package has been tested in many different UN*Xes even including minix,
305 | and appears to be reasonably portable. This does not mean it will port
306 | easily to non-UN*X systems.
307 | .SH
308 | Notes and Miscellaneous
309 | .PP
310 | The \fIsdbm\fP is not a very complicated package, at least not after you
311 | familiarize yourself with the literature on external hashing. There are
312 | other interesting algorithms in existence that ensure (approximately)
313 | single-read access to a data value associated with any key. These are
314 | directory-less schemes such as \fIlinear hashing\fP [Lit80] (+ Larson
315 | variations), \fIspiral storage\fP [Mar79] or directory schemes such as
316 | \fIextensible hashing\fP [Fag79] by Fagin et al. I do hope these sources
317 | provide a reasonable playground for experimentation with other algorithms.
318 | See the June 1988 issue of ACM Computing Surveys [Enb88] for an
319 | excellent overview of the field. 
320 | .PG
321 | .SH
322 | References
323 | .LP
324 | .IP [Lar78] 4m
325 | P.-A. Larson,
326 | ``Dynamic Hashing'', \fIBIT\fP, vol.  18,  pp. 184-201, 1978.
327 | .IP [Tho90] 4m
328 | Ken Thompson, \fIprivate communication\fP, Nov. 1990
329 | .IP [Lit80] 4m
330 | W. Litwin,
331 | `` Linear Hashing: A new tool  for  file  and table addressing'',
332 | \fIProceedings of the 6th Conference on Very Large  Dabatases  (Montreal)\fP,
333 | pp.  212-223,  Very Large Database Foundation, Saratoga, Calif., 1980.
334 | .IP [Fag79] 4m
335 | R. Fagin, J.  Nievergelt,  N.  Pippinger,  and  H.  R. Strong,
336 | ``Extendible Hashing - A Fast Access Method for Dynamic Files'',
337 | \fIACM Trans. Database Syst.\fP, vol. 4,  no.3, pp. 315-344, Sept. 1979.
338 | .IP [Wal84] 4m
339 | Rich Wales,
340 | ``Discussion of "dbm" data base system'', \fIUSENET newsgroup unix.wizards\fP,
341 | Jan. 1984.
342 | .IP [Tor87] 4m
343 | Chris Torek,
344 | ``Re:  dbm.a  and  ndbm.a  archives'', \fIUSENET newsgroup comp.unix\fP,
345 | 1987.
346 | .IP [Mar79] 4m
347 | G. N. Martin,
348 | ``Spiral Storage: Incrementally  Augmentable  Hash  Addressed  Storage'',
349 | \fITechnical Report #27\fP, University of Varwick, Coventry, U.K., 1979.
350 | .IP [Enb88] 4m
351 | R. J. Enbody and H. C. Du,
352 | ``Dynamic Hashing  Schemes'',\fIACM Computing Surveys\fP,
353 | vol. 20, no. 2, pp. 85-113, June 1988.
354 | 


--------------------------------------------------------------------------------
/readme.txt:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | 
  4 | 
  5 | 
  6 | 
  7 |                    sdbm - Substitute DBM
  8 |                              or
  9 |         Berkeley ndbm for Every UN*X[1] Made Simple
 10 | 
 11 |                       Ozan (oz) Yigit
 12 | 
 13 |             The Guild of PD Software Toolmakers
 14 |                       Toronto - Canada
 15 | 
 16 |                      oz@nexus.yorku.ca
 17 | 
 18 | 
 19 | 
 20 | Implementation is the sincerest form of flattery. - L. Peter
 21 | Deutsch
 22 | 
 23 | A The Clone of the ndbm library
 24 | 
 25 |      The sources accompanying this notice - sdbm  -  consti-
 26 | tute  the  first  public  release  (Dec. 1990) of a complete
 27 | clone of the Berkeley UN*X ndbm library. The sdbm library is
 28 | meant  to  clone the proven functionality of ndbm as closely
 29 | as possible, including a few improvements. It is  practical,
 30 | easy to understand, and compatible.  The sdbm library is not
 31 | derived  from  any  licensed,  proprietary  or   copyrighted
 32 | software.
 33 | 
 34 |      The sdbm implementation is based on  a  1978  algorithm
 35 | [Lar78] by P.-A. (Paul) Larson known as ``Dynamic Hashing''.
 36 | In the course of searching for a substitute for ndbm, I pro-
 37 | totyped  three different external-hashing algorithms [Lar78,
 38 | Fag79, Lit80] and ultimately chose Larson's algorithm  as  a
 39 | basis  of  the  sdbm  implementation. The Bell Labs dbm (and
 40 | therefore ndbm) is based on an  algorithm  invented  by  Ken
 41 | Thompson, [Tho90, Tor87] and predates Larson's work.
 42 | 
 43 |      The sdbm programming interface  is  totally  compatible
 44 | with ndbm and includes a slight improvement in database ini-
 45 | tialization.  It is also expected  to  be  binary-compatible
 46 | under most UN*X versions that support the ndbm library.
 47 | 
 48 |      The sdbm implementation shares the shortcomings of  the
 49 | ndbm library, as a side effect of various simplifications to
 50 | the original Larson algorithm. It does produce holes in  the
 51 | page file as it writes pages past the end of file. (Larson's
 52 | paper include a clever solution to this problem  that  is  a
 53 | result of using the hash value directly as a block address.)
 54 | On the other hand, extensive tests  seem  to  indicate  that
 55 | sdbm creates fewer holes in general, and the resulting page-
 56 | files are smaller. The sdbm implementation  is  also  faster
 57 | than  ndbm  in database creation.  Unlike the ndbm, the sdbm
 58 | _________________________
 59 | 
 60 |   [1] UN*X is not a trademark of any (dis)organization.
 61 | 
 62 | 
 63 | 
 64 | 
 65 | 
 66 | 
 67 | 
 68 | 
 69 | 
 70 |                            - 2 -
 71 | 
 72 | 
 73 | store operation will not ``wander away'' trying to split its
 74 | data  pages  to insert a datum that cannot (due to elaborate
 75 | worst-case situations) be inserted. (It will  fail  after  a
 76 | pre-defined number of attempts.)
 77 | 
 78 | Important Compatibility Warning
 79 | 
 80 |      The sdbm and ndbm libraries cannot share databases: one
 81 | cannot  read  the  (dir/pag)  database created by the other.
 82 | This is due to the differences between  the  ndbm  and  sdbm
 83 | algorithms[2], and the hash functions used.  It is  easy  to
 84 | convert  between the dbm/ndbm databases and sdbm by ignoring
 85 | the index completely: see dbd, dbu etc.
 86 | 
 87 | 
 88 | Notice of Intellectual Property
 89 | 
 90 | The entire sdbm  library package, as authored by me, Ozan S.
 91 | Yigit,  is  hereby placed in the public domain. As such, the
 92 | author is not responsible for the  consequences  of  use  of
 93 | this  software, no matter how awful, even if they arise from
 94 | defects in it. There is no expressed or implied warranty for
 95 | the sdbm library.
 96 | 
 97 |      Since the sdbm library package is in the public domain,
 98 | this   original  release  or  any  additional  public-domain
 99 | releases of the modified original cannot possibly (by defin-
100 | ition) be withheld from you. Also by definition, You (singu-
101 | lar) have all the rights to this code (including  the  right
102 | to sell without permission, the right to  hoard[3]  and  the
103 | right  to  do  other  icky  things as you see fit) but those
104 | rights are also granted to everyone else.
105 | 
106 |      Please note that all  previous  distributions  of  this
107 | software  contained  a  copyright  (which is now dropped) to
108 | protect its origins and its  current  public  domain  status
109 | against any possible claims and/or challenges.
110 | 
111 | Acknowledgments
112 | 
113 |      Many people have been very helpful and  supportive.   A
114 | partial  list  would  necessarily  include Rayan Zacherissen
115 | (who contributed the  man  page,  and  also  hacked  a  MMAP
116 | _________________________
117 | 
118 |   [2] Torek's   discussion   [Tor87]   indicates   that
119 | dbm/ndbm implementations use the hash value to traverse
120 | the radix trie differently than sdbm and as  a  result,
121 | the page indexes are generated in different order.  For
122 | more information, send e-mail to the author.
123 |   [3] You  cannot really hoard something that is avail-
124 | able to the public at large, but try if  it  makes  you
125 | feel any better.
126 | 
127 | 
128 | 
129 | 
130 | 
131 | 
132 | 
133 | 
134 | 
135 | 
136 |                            - 3 -
137 | 
138 | 
139 | version of sdbm), Arnold Robbins, Chris Lewis,  Bill  David-
140 | sen,  Henry  Spencer,  Geoff  Collyer, Rich Salz (who got me
141 | started in the first place), Johannes Ruschein (who did  the
142 | minix port) and David Tilbrook. I thank you all.
143 | 
144 | Distribution Manifest and Notes
145 | 
146 | This distribution of sdbm includes (at least) the following:
147 | 
148 |     CHANGES     change log
149 |     README      this file.
150 |     biblio      a small bibliography on external hashing
151 |     dba.c       a crude (n/s)dbm page file analyzer
152 |     dbd.c       a crude (n/s)dbm page file dumper (for conversion)
153 |     dbe.1       man page for dbe.c
154 |     dbe.c       Janick's database editor
155 |     dbm.c       a dbm library emulation wrapper for ndbm/sdbm
156 |     dbm.h       header file for the above
157 |     dbu.c       a crude db management utility
158 |     hash.c      hashing function
159 |     makefile    guess.
160 |     pair.c      page-level routines (posted earlier)
161 |     pair.h      header file for the above
162 |     readme.ms   troff source for the README file
163 |     sdbm.3      man page
164 |     sdbm.c      the real thing
165 |     sdbm.h      header file for the above
166 |     tune.h      place for tuning & portability thingies
167 |     util.c      miscellaneous
168 | 
169 |      dbu is a simple database manipulation  program[4]  that
170 | tries to look like Bell Labs' cbt utility. It  is  currently
171 | incomplete in functionality.  I use dbu to test out the rou-
172 | tines: it takes (from stdin) tab separated  key/value  pairs
173 | for commands like build or insert or takes keys for commands
174 | like delete or look.
175 | 
176 |     dbu <build|creat|look|insert|cat|delete> dbmfile
177 | 
178 |      dba is a crude analyzer of dbm/sdbm/ndbm page files. It
179 | scans the entire page file, reporting page level statistics,
180 | and totals at the end.
181 | 
182 |      dbd is a crude dump  program  for  dbm/ndbm/sdbm  data-
183 | bases.  It  ignores  the bitmap, and dumps the data pages in
184 | sequence. It can be used to create input for the  dbu  util-
185 | ity.   Note that dbd will skip any NULLs in the key and data
186 | fields,  thus  is  unsuitable  to  convert   some   peculiar
187 | _________________________
188 | 
189 |   [4] The dbd, dba, dbu utilities are quick  hacks  and
190 | are  not  fit  for  production use. They were developed
191 | late one night, just to test out sdbm, and convert some
192 | databases.
193 | 
194 | 
195 | 
196 | 
197 | 
198 | 
199 | 
200 | 
201 | 
202 |                            - 4 -
203 | 
204 | 
205 | databases that insist in including the terminating null.
206 | 
207 |      I have also included a copy of the dbe  (ndbm  DataBase
208 | Editor)  by  Janick Bergeron [janick@bnr.ca] for your pleas-
209 | ure. You may find it more useful than the little  dbu  util-
210 | ity.
211 | 
212 |      dbm.[ch] is a dbm library emulation on top of ndbm (and
213 | hence suitable for sdbm). Written by Robert Elz.
214 | 
215 |      The sdbm library has been around in beta test for quite
216 | a  long  time,  and from whatever little feedback I received
217 | (maybe no news is good news), I believe it  has  been  func-
218 | tioning  without  any  significant  problems.  I  would,  of
219 | course, appreciate all fixes and/or improvements.  Portabil-
220 | ity enhancements would especially be useful.
221 | 
222 | Implementation Issues
223 | 
224 |      Hash functions: The algorithm behind  sdbm  implementa-
225 | tion  needs a good bit-scrambling hash function to be effec-
226 | tive. I ran into a set of constants for a simple hash  func-
227 | tion  that  seem  to  help sdbm perform better than ndbm for
228 | various inputs:
229 | 
230 |     /*
231 |      * polynomial conversion ignoring overflows
232 |      * 65599 nice. 65587 even better.
233 |      */
234 |     long
235 |     dbm_hash(char *str, int len) {
236 |         register unsigned long n = 0;
237 | 
238 |         while (len--)
239 |             n = n * 65599 + *str++;
240 |         return n;
241 |     }
242 | 
243 |      There may be better hash functions for the purposes  of
244 | dynamic hashing.  Try your favorite, and check the pagefile.
245 | If it contains too many pages with too many holes, (in rela-
246 | tion  to this one for example) or if sdbm simply stops work-
247 | ing (fails after SPLTMAX attempts to split)  when  you  feed
248 | your  NEWS  history  file  to it, you probably do not have a
249 | good hashing function.  If  you  do  better  (for  different
250 | types of input), I would like to know about the function you
251 | use.
252 | 
253 |      Block sizes: It seems (from  various  tests  on  a  few
254 | machines)  that a page file block size PBLKSIZ of 1024 is by
255 | far the best for performance, but this also happens to limit
256 | the  size  of a key/value pair. Depending on your needs, you
257 | may wish to increase the page size, and also adjust  PAIRMAX
258 | (the maximum size of a key/value pair allowed: should always
259 | 
260 | 
261 | 
262 | 
263 | 
264 | 
265 | 
266 | 
267 | 
268 |                            - 5 -
269 | 
270 | 
271 | be at least three words smaller than PBLKSIZ.)  accordingly.
272 | The  system-wide  version  of the library should probably be
273 | configured with 1024 (distribution default), as this appears
274 | to be sufficient for most common uses of sdbm.
275 | 
276 | Portability
277 | 
278 |      This package has been tested in many  different  UN*Xes
279 | even including minix, and appears to be reasonably portable.
280 | This does not mean it will port easily to non-UN*X systems.
281 | 
282 | Notes and Miscellaneous
283 | 
284 |      The sdbm is not a very complicated  package,  at  least
285 | not  after  you  familiarize yourself with the literature on
286 | external hashing. There are other interesting algorithms  in
287 | existence  that ensure (approximately) single-read access to
288 | a data value associated with any key. These  are  directory-
289 | less schemes such as linear hashing [Lit80] (+ Larson varia-
290 | tions), spiral storage [Mar79] or directory schemes such  as
291 | extensible  hashing  [Fag79] by Fagin et al. I do hope these
292 | sources provide a reasonable playground for  experimentation
293 | with  other algorithms.  See the June 1988 issue of ACM Com-
294 | puting Surveys [Enb88] for  an  excellent  overview  of  the
295 | field.
296 | 
297 | References
298 | 
299 | 
300 | [Lar78]
301 |     P.-A. Larson, ``Dynamic Hashing'', BIT, vol.   18,   pp.
302 |     184-201, 1978.
303 | 
304 | [Tho90]
305 |     Ken Thompson, private communication, Nov. 1990
306 | 
307 | [Lit80]
308 |     W. Litwin, `` Linear Hashing: A new tool  for  file  and
309 |     table addressing'', Proceedings of the 6th Conference on
310 |     Very Large  Dabatases  (Montreal), pp.   212-223,   Very
311 |     Large Database Foundation, Saratoga, Calif., 1980.
312 | 
313 | [Fag79]
314 |     R. Fagin, J.  Nievergelt,  N.  Pippinger,  and   H.   R.
315 |     Strong,  ``Extendible Hashing - A Fast Access Method for
316 |     Dynamic Files'', ACM  Trans.  Database  Syst.,  vol.  4,
317 |     no.3, pp. 315-344, Sept. 1979.
318 | 
319 | [Wal84]
320 |     Rich Wales, ``Discussion of "dbm"  data  base  system'',
321 |     USENET newsgroup unix.wizards, Jan. 1984.
322 | 
323 | [Tor87]
324 |     Chris Torek,  ``Re:   dbm.a   and   ndbm.a   archives'',
325 | 
326 | 
327 | 
328 | 
329 | 
330 | 
331 | 
332 | 
333 | 
334 |                            - 6 -
335 | 
336 | 
337 |     USENET newsgroup comp.unix, 1987.
338 | 
339 | [Mar79]
340 |     G. N. Martin, ``Spiral Storage: Incrementally   Augment-
341 |     able   Hash  Addressed  Storage'', Technical Report #27,
342 |     University of Varwick, Coventry, U.K., 1979.
343 | 
344 | [Enb88]
345 |     R.  J.  Enbody  and  H.   C.   Du,   ``Dynamic   Hashing
346 |     Schemes'',ACM  Computing  Surveys,  vol.  20, no. 2, pp.
347 |     85-113, June 1988.
348 | 
349 | 
350 | 
351 | 
352 | 
353 | 
354 | 
355 | 
356 | 
357 | 
358 | 
359 | 
360 | 
361 | 
362 | 
363 | 
364 | 
365 | 
366 | 
367 | 
368 | 
369 | 
370 | 
371 | 
372 | 
373 | 
374 | 
375 | 
376 | 
377 | 
378 | 
379 | 
380 | 
381 | 
382 | 
383 | 
384 | 
385 | 
386 | 
387 | 
388 | 
389 | 
390 | 
391 | 
392 | 
393 | 
394 | 
395 | 
396 | 
397 | 


--------------------------------------------------------------------------------
/sdbm.3:
--------------------------------------------------------------------------------
  1 | .\" $Id: sdbm.3,v 1.2 90/12/13 13:00:57 oz Exp $
  2 | .TH SDBM 3 "1 March 1990"
  3 | .SH NAME
  4 | sdbm, dbm_open, dbm_prep, dbm_close, dbm_fetch, dbm_store, dbm_delete, dbm_firstkey, dbm_nextkey, dbm_hash, dbm_rdonly, dbm_error, dbm_clearerr, dbm_dirfno, dbm_pagfno \- data base subroutines
  5 | .SH SYNOPSIS
  6 | .nf
  7 | .ft B
  8 | #include <sdbm.h>
  9 | .sp
 10 | typedef struct {
 11 | 	char *dptr;
 12 | 	int dsize;
 13 | } datum;
 14 | .sp
 15 | datum nullitem = { NULL, 0 };
 16 | .sp
 17 | \s-1DBM\s0 *dbm_open(char *file, int flags, int mode)
 18 | .sp
 19 | \s-1DBM\s0 *dbm_prep(char *dirname, char *pagname, int flags, int mode)
 20 | .sp
 21 | void dbm_close(\s-1DBM\s0 *db)
 22 | .sp
 23 | datum dbm_fetch(\s-1DBM\s0 *db, key)
 24 | .sp
 25 | int dbm_store(\s-1DBM\s0 *db, datum key, datum val, int flags)
 26 | .sp
 27 | int dbm_delete(\s-1DBM\s0 *db, datum key)
 28 | .sp
 29 | datum dbm_firstkey(\s-1DBM\s0 *db)
 30 | .sp
 31 | datum dbm_nextkey(\s-1DBM\s0 *db)
 32 | .sp
 33 | long dbm_hash(char *string, int len)
 34 | .sp
 35 | int dbm_rdonly(\s-1DBM\s0 *db)
 36 | int dbm_error(\s-1DBM\s0 *db)
 37 | dbm_clearerr(\s-1DBM\s0 *db)
 38 | int dbm_dirfno(\s-1DBM\s0 *db)
 39 | int dbm_pagfno(\s-1DBM\s0 *db)
 40 | .ft R
 41 | .fi
 42 | .SH DESCRIPTION
 43 | .IX "database library" sdbm "" "\fLsdbm\fR"
 44 | .IX dbm_open "" "\fLdbm_open\fR \(em open \fLsdbm\fR database"
 45 | .IX dbm_prep "" "\fLdbm_prep\fR \(em prepare \fLsdbm\fR database"
 46 | .IX dbm_close "" "\fLdbm_close\fR \(em close \fLsdbm\fR routine"
 47 | .IX dbm_fetch "" "\fLdbm_fetch\fR \(em fetch \fLsdbm\fR database data"
 48 | .IX dbm_store "" "\fLdbm_store\fR \(em add data to \fLsdbm\fR database"
 49 | .IX dbm_delete "" "\fLdbm_delete\fR \(em remove data from \fLsdbm\fR database"
 50 | .IX dbm_firstkey "" "\fLdbm_firstkey\fR \(em access \fLsdbm\fR database"
 51 | .IX dbm_nextkey "" "\fLdbm_nextkey\fR \(em access \fLsdbm\fR database"
 52 | .IX dbm_hash "" "\fLdbm_hash\fR \(em string hash for \fLsdbm\fR database"
 53 | .IX dbm_rdonly "" "\fLdbm_rdonly\fR \(em return \fLsdbm\fR database read-only mode"
 54 | .IX dbm_error "" "\fLdbm_error\fR \(em return \fLsdbm\fR database error condition"
 55 | .IX dbm_clearerr "" "\fLdbm_clearerr\fR \(em clear \fLsdbm\fR database error condition"
 56 | .IX dbm_dirfno "" "\fLdbm_dirfno\fR \(em return \fLsdbm\fR database bitmap file descriptor"
 57 | .IX dbm_pagfno "" "\fLdbm_pagfno\fR \(em return \fLsdbm\fR database data file descriptor"
 58 | .IX "database functions \(em \fLsdbm\fR"  dbm_open  ""  \fLdbm_open\fP
 59 | .IX "database functions \(em \fLsdbm\fR"  dbm_prep  ""  \fLdbm_prep\fP
 60 | .IX "database functions \(em \fLsdbm\fR"  dbm_close  ""  \fLdbm_close\fP
 61 | .IX "database functions \(em \fLsdbm\fR"  dbm_fetch  ""  \fLdbm_fetch\fP
 62 | .IX "database functions \(em \fLsdbm\fR"  dbm_store  ""  \fLdbm_store\fP
 63 | .IX "database functions \(em \fLsdbm\fR"  dbm_delete  ""  \fLdbm_delete\fP
 64 | .IX "database functions \(em \fLsdbm\fR"  dbm_firstkey  ""  \fLdbm_firstkey\fP
 65 | .IX "database functions \(em \fLsdbm\fR"  dbm_nextkey  ""  \fLdbm_nextkey\fP
 66 | .IX "database functions \(em \fLsdbm\fR"  dbm_rdonly  ""  \fLdbm_rdonly\fP
 67 | .IX "database functions \(em \fLsdbm\fR"  dbm_error  ""  \fLdbm_error\fP
 68 | .IX "database functions \(em \fLsdbm\fR"  dbm_clearerr  ""  \fLdbm_clearerr\fP
 69 | .IX "database functions \(em \fLsdbm\fR"  dbm_dirfno  ""  \fLdbm_dirfno\fP
 70 | .IX "database functions \(em \fLsdbm\fR"  dbm_pagfno  ""  \fLdbm_pagfno\fP
 71 | .LP
 72 | This package allows an application to maintain a mapping of <key,value> pairs
 73 | in disk files.  This is not to be considered a real database system, but is
 74 | still useful in many simple applications built around fast retrieval of a data
 75 | value from a key.  This implementation uses an external hashing scheme,
 76 | called Dynamic Hashing, as described by Per-Aake Larson in BIT 18 (1978) pp.
 77 | 184-201.  Retrieval of any item usually requires a single disk access.
 78 | The application interface is compatible with the
 79 | .IR ndbm (3)
 80 | library.
 81 | .LP
 82 | An
 83 | .B sdbm
 84 | database is kept in two files usually given the extensions
 85 | .B \.dir
 86 | and
 87 | .BR \.pag .
 88 | The
 89 | .B \.dir
 90 | file contains a bitmap representing a forest of binary hash trees, the leaves
 91 | of which indicate data pages in the
 92 | .B \.pag
 93 | file.
 94 | .LP
 95 | The application interface uses the
 96 | .B datum
 97 | structure to describe both
 98 | .I keys
 99 | and
100 | .IR value s.
101 | A
102 | .B datum
103 | specifies a byte sequence of
104 | .I dsize
105 | size pointed to by
106 | .IR dptr .
107 | If you use
108 | .SM ASCII
109 | strings as
110 | .IR key s
111 | or
112 | .IR value s,
113 | then you must decide whether or not to include the terminating
114 | .SM NUL
115 | byte which sometimes defines strings.  Including it will require larger
116 | database files, but it will be possible to get sensible output from a
117 | .IR strings (1)
118 | command applied to the data file.
119 | .LP
120 | In order to allow a process using this package to manipulate multiple
121 | databases, the applications interface always requires a
122 | .IR handle ,
123 | a
124 | .BR "DBM *" ,
125 | to identify the database to be manipulated.  Such a handle can be obtained
126 | from the only routines that do not require it, namely
127 | .BR dbm_open (\|)
128 | or
129 | .BR dbm_prep (\|).
130 | Either of these will open or create the two necessary files.  The
131 | difference is that the latter allows explicitly naming the bitmap and data
132 | files whereas
133 | .BR dbm_open (\|)
134 | will take a base file name and call
135 | .BR dbm_prep (\|)
136 | with the default extensions.
137 | The
138 | .I flags
139 | and
140 | .I mode
141 | parameters are the same as for
142 | .BR open (2).
143 | .LP
144 | To free the resources occupied while a database handle is active, call
145 | .BR dbm_close (\|).
146 | .LP
147 | Given a handle, one can retrieve data associated with a key by using the
148 | .BR dbm_fetch (\|)
149 | routine, and associate data with a key by using the
150 | .BR dbm_store (\|)
151 | routine.
152 | .LP
153 | The values of the
154 | .I flags
155 | parameter for
156 | .BR dbm_store (\|)
157 | can be either
158 | .BR \s-1DBM_INSERT\s0 ,
159 | which will not change an existing entry with the same key, or
160 | .BR \s-1DBM_REPLACE\s0 ,
161 | which will replace an existing entry with the same key.
162 | Keys are unique within the database.
163 | .LP
164 | To delete a key and its associated value use the
165 | .BR dbm_delete (\|)
166 | routine.
167 | .LP
168 | To retrieve every key in the database, use a loop like:
169 | .sp
170 | .nf
171 | .ft B
172 | for (key = dbm_firstkey(db); key.dptr != NULL; key = dbm_nextkey(db))
173 |         ;
174 | .ft R
175 | .fi
176 | .LP
177 | The order of retrieval is unspecified.
178 | .LP
179 | If you determine that the performance of the database is inadequate or
180 | you notice clustering or other effects that may be due to the hashing
181 | algorithm used by this package, you can override it by supplying your
182 | own
183 | .BR dbm_hash (\|)
184 | routine.  Doing so will make the database unintelligable to any other
185 | applications that do not use your specialized hash function.
186 | .sp
187 | .LP
188 | The following macros are defined in the header file:
189 | .IP
190 | .BR dbm_rdonly (\|)
191 | returns true if the database has been opened read\-only.
192 | .IP
193 | .BR dbm_error (\|)
194 | returns true if an I/O error has occurred.
195 | .IP
196 | .BR dbm_clearerr (\|)
197 | allows you to clear the error flag if you think you know what the error
198 | was and insist on ignoring it.
199 | .IP
200 | .BR dbm_dirfno (\|)
201 | returns the file descriptor associated with the bitmap file.
202 | .IP
203 | .BR dbm_pagfno (\|)
204 | returns the file descriptor associated with the data file.
205 | .SH SEE ALSO
206 | .IR open (2).
207 | .SH DIAGNOSTICS
208 | Functions that return a
209 | .B "DBM *"
210 | handle will use
211 | .SM NULL
212 | to indicate an error.
213 | Functions that return an
214 | .B int
215 | will use \-1 to indicate an error.  The normal return value in that case is 0.
216 | Functions that return a
217 | .B datum
218 | will return
219 | .B nullitem
220 | to indicate an error.
221 | .LP
222 | As a special case of
223 | .BR dbm_store (\|),
224 | if it is called with the
225 | .B \s-1DBM_INSERT\s0
226 | flag and the key already exists in the database, the return value will be 1.
227 | .LP
228 | In general, if a function parameter is invalid,
229 | .B errno
230 | will be set to
231 | .BR \s-1EINVAL\s0 .
232 | If a write operation is requested on a read-only database,
233 | .B errno
234 | will be set to
235 | .BR \s-1ENOPERM\s0 .
236 | If a memory allocation (using
237 | .IR malloc (3))
238 | failed,
239 | .B errno
240 | will be set to
241 | .BR \s-1ENOMEM\s0 .
242 | For I/O operation failures
243 | .B errno
244 | will contain the value set by the relevant failed system call, either
245 | .IR read (2),
246 | .IR write (2),
247 | or
248 | .IR lseek (2).
249 | .SH AUTHOR
250 | .IP "Ozan S. Yigit" (oz@nexus.yorku.ca)
251 | .SH BUGS
252 | The sum of key and value data sizes must not exceed
253 | .B \s-1PAIRMAX\s0
254 | (1008 bytes).
255 | .LP
256 | The sum of the key and value data sizes where several keys hash to the
257 | same value must fit within one bitmap page.
258 | .LP
259 | The
260 | .B \.pag
261 | file will contain holes, so its apparent size is larger than its contents.
262 | When copied through the filesystem the holes will be filled.
263 | .LP
264 | The contents of
265 | .B datum
266 | values returned are in volatile storage.  If you want to retain the values
267 | pointed to, you must copy them immediately before another call to this package.
268 | .LP
269 | The only safe way for multiple processes to (read and) update a database at
270 | the same time, is to implement a private locking scheme outside this package
271 | and open and close the database between lock acquisitions.  It is safe for
272 | multiple processes to concurrently access a database read-only.
273 | .SH APPLICATIONS PORTABILITY
274 | For complete source code compatibility with the Berkeley Unix
275 | .IR ndbm (3)
276 | library, the 
277 | .B sdbm.h
278 | header file should be installed in
279 | .BR /usr/include/ndbm.h .
280 | .LP
281 | The
282 | .B nullitem
283 | data item, and the
284 | .BR dbm_prep (\|),
285 | .BR dbm_hash (\|),
286 | .BR dbm_rdonly (\|),
287 | .BR dbm_dirfno (\|),
288 | and
289 | .BR dbm_pagfno (\|)
290 | functions are unique to this package.
291 | 


--------------------------------------------------------------------------------
/sdbm.bun:
--------------------------------------------------------------------------------
   1 | 
   2 | : to unbundle, sh this file
   3 | echo x - CHANGES 1>&2
   4 | sed 's/^X//' >CHANGES <<'@@@End of CHANGES'
   5 | XJune 1997:
   6 | X
   7 | Xo fixed a long-hidden memmove bug in delpair that causes database
   8 | X  corruption in MEMMOVE versions of sdbm. [sdbm defaults to duff's
   9 | X  device to move data, so memmove version is almost never used.]
  10 | X
  11 | XChanges from the earlier BETA releases.
  12 | X
  13 | Xo dbm_prep does everything now, so dbm_open is just a simple
  14 | X  wrapper that builds the default filenames. dbm_prep no longer
  15 | X  requires a (DBM *) db parameter: it allocates one itself. It
  16 | X  returns (DBM *) db or (DBM *) NULL.
  17 | X
  18 | Xo makroom is now reliable. In the common-case optimization of the page
  19 | X  split, the page into which the incoming key/value pair is to be inserted
  20 | X  is write-deferred (if the split is successful), thereby saving a cosly
  21 | X  write.  BUT, if the split does not make enough room (unsuccessful), the
  22 | X  deferred page is written out, as the failure-window is now dependent on
  23 | X  the number of split attempts.
  24 | X
  25 | Xo if -DDUFF is defined, hash function will also use the DUFF construct.
  26 | X  This may look like a micro-performance tweak (maybe it is), but in fact,
  27 | X  the hash function is the third most-heavily used function, after read
  28 | X  and write.
  29 | @@@End of CHANGES
  30 | echo x - COMPARE 1>&2
  31 | sed 's/^X//' >COMPARE <<'@@@End of COMPARE'
  32 | X
  33 | XScript started on Thu Sep 28 15:41:06 1989
  34 | X% uname -a
  35 | Xtitan titan 4_0 UMIPS mips
  36 | X% make all x-dbm
  37 | X        cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c dbm.c
  38 | X        cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c sdbm.c
  39 | X        cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c pair.c
  40 | X        cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c hash.c
  41 | X        ar cr libsdbm.a sdbm.o pair.o hash.o
  42 | X        ranlib libsdbm.a
  43 | X        cc  -o dbm dbm.o libsdbm.a
  44 | X        cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c dba.c
  45 | X        cc  -o dba dba.o
  46 | X        cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -c dbd.c
  47 | X        cc  -o dbd dbd.o
  48 | X        cc -O -DSDBM -DDUFF -DDUPERROR -DSPLITFAIL -o x-dbm dbm.o
  49 | X% 
  50 | X% 
  51 | X% wc history
  52 | X  65110 218344 3204883 history
  53 | X% 
  54 | X% /bin/time dbm build foo <history
  55 | X
  56 | Xreal     5:56.9
  57 | Xuser       13.3
  58 | Xsys        26.3
  59 | X% ls -s
  60 | Xtotal 14251
  61 | X   5 README           2 dbd.c            1 hash.c           1 pair.h
  62 | X   0 SCRIPT           5 dbd.o            1 hash.o           5 pair.o
  63 | X   1 WISHLIST        62 dbm           3130 history          1 port.h
  64 | X  46 dba              5 dbm.c           11 howtodbm.txt    11 sdbm.c
  65 | X   3 dba.c            8 dbm.o           14 libsdbm.a        2 sdbm.h
  66 | X   6 dba.o            4 foo.dir          1 makefile         8 sdbm.o
  67 | X  46 dbd           10810 foo.pag         6 pair.c          60 x-dbm
  68 | X% ls -l foo.*
  69 | X-rw-r--r--  1 oz           4096 Sep 28 15:48 foo.dir
  70 | X-rw-r--r--  1 oz       11069440 Sep 28 15:48 foo.pag
  71 | X% 
  72 | X% /bin/time x-dbm build bar <history
  73 | X
  74 | Xreal     5:59.4
  75 | Xuser       24.7
  76 | Xsys        29.1
  77 | X% 
  78 | X% ls -s
  79 | Xtotal 27612
  80 | X   5 README          46 dbd              1 hash.c           5 pair.o
  81 | X   1 SCRIPT           2 dbd.c            1 hash.o           1 port.h
  82 | X   1 WISHLIST         5 dbd.o         3130 history         11 sdbm.c
  83 | X   4 bar.dir         62 dbm             11 howtodbm.txt     2 sdbm.h
  84 | X13356 bar.pag         5 dbm.c           14 libsdbm.a        8 sdbm.o
  85 | X  46 dba              8 dbm.o            1 makefile        60 x-dbm
  86 | X   3 dba.c            4 foo.dir          6 pair.c
  87 | X   6 dba.o         10810 foo.pag         1 pair.h
  88 | X% 
  89 | X% ls -l bar.*
  90 | X-rw-r--r--  1 oz           4096 Sep 28 15:54 bar.dir
  91 | X-rw-r--r--  1 oz       13676544 Sep 28 15:54 bar.pag
  92 | X% 
  93 | X% dba foo | tail
  94 | X#10801: ok. no entries.
  95 | X#10802: ok. no entries.
  96 | X#10803: ok. no entries.
  97 | X#10804: ok. no entries.
  98 | X#10805: ok. no entries.
  99 | X#10806: ok. no entries.
 100 | X#10807: ok. no entries.
 101 | X#10808: ok. no entries.
 102 | X#10809: ok.  11 entries 67% used free 337.
 103 | X10810 pages (6036 holes):  65073 entries
 104 | X% 
 105 | X% dba bar | tail
 106 | X#13347: ok. no entries.
 107 | X#13348: ok. no entries.
 108 | X#13349: ok. no entries.
 109 | X#13350: ok. no entries.
 110 | X#13351: ok. no entries.
 111 | X#13352: ok. no entries.
 112 | X#13353: ok. no entries.
 113 | X#13354: ok. no entries.
 114 | X#13355: ok.   7 entries 33% used free 676.
 115 | X13356 pages (8643 holes):  65073 entries
 116 | X%
 117 | X% exit
 118 | Xscript done on Thu Sep 28 16:08:45 1989
 119 | X
 120 | @@@End of COMPARE
 121 | echo x - biblio 1>&2
 122 | sed 's/^X//' >biblio <<'@@@End of biblio'
 123 | X%A R. J. Enbody
 124 | X%A H. C. Du
 125 | X%T Dynamic Hashing Schemes
 126 | X%J ACM Computing Surveys
 127 | X%V 20
 128 | X%N 2
 129 | X%D June 1988
 130 | X%P 85-113
 131 | X%K surveys
 132 | X
 133 | X%A P.-A. Larson
 134 | X%T Dynamic Hashing
 135 | X%J BIT
 136 | X%V 18
 137 | X%P 184-201
 138 | X%D 1978
 139 | X%K dynamic
 140 | X
 141 | X%A W. Litwin
 142 | X%T Linear Hashing: A new tool for file and table addressing
 143 | X%J Proceedings of the 6th Conference on Very Large Dabatases (Montreal)
 144 | X%I Very Large Database Foundation
 145 | X%C Saratoga, Calif.
 146 | X%P 212-223
 147 | X%D 1980
 148 | X%K linear
 149 | X
 150 | X%A R. Fagin
 151 | X%A J. Nievergelt
 152 | X%A N. Pippinger
 153 | X%A H. R. Strong
 154 | X%T Extendible Hashing - A Fast Access Method for Dynamic Files
 155 | X%J ACM Trans. Database Syst.
 156 | X%V 4
 157 | X%N 3
 158 | X%D Sept. 1979
 159 | X%P 315-344
 160 | X%K extend
 161 | X
 162 | X%A G. N. Martin
 163 | X%T Spiral Storage: Incrementally Augmentable Hash Addressed Storage
 164 | X%J Technical Report #27
 165 | X%I University of Varwick
 166 | X%C Coventry, U.K.
 167 | X%D 1979
 168 | X%K spiral
 169 | X
 170 | X%A Chris Torek
 171 | X%T Re: dbm.a and ndbm.a archives
 172 | X%B USENET newsgroup comp.unix
 173 | X%D 1987
 174 | X%K torek
 175 | X
 176 | X%A Rich Wales
 177 | X%T Discusson of "dbm" data base system
 178 | X%B USENET newsgroup unix.wizards
 179 | X%D Jan. 1984
 180 | X%K rich
 181 | X
 182 | X
 183 | X
 184 | X
 185 | X
 186 | X
 187 | @@@End of biblio
 188 | echo x - dba.c 1>&2
 189 | sed 's/^X//' >dba.c <<'@@@End of dba.c'
 190 | X/*
 191 | X * dba	dbm analysis/recovery
 192 | X */
 193 | X
 194 | X#include <stdio.h>
 195 | X#include <sys/file.h>
 196 | X#include "sdbm.h"
 197 | X
 198 | Xchar *progname;
 199 | Xextern void oops();
 200 | X
 201 | Xint
 202 | Xmain(argc, argv)
 203 | Xchar **argv;
 204 | X{
 205 | X	int n;
 206 | X	char *p;
 207 | X	char *name;
 208 | X	int pagf;
 209 | X
 210 | X	progname = argv[0];
 211 | X
 212 | X	if (p = argv[1]) {
 213 | X		name = (char *) malloc((n = strlen(p)) + 5);
 214 | X		strcpy(name, p);
 215 | X		strcpy(name + n, ".pag");
 216 | X
 217 | X		if ((pagf = open(name, O_RDONLY)) < 0)
 218 | X			oops("cannot open %s.", name);
 219 | X
 220 | X		sdump(pagf);
 221 | X	}
 222 | X	else
 223 | X		oops("usage: %s dbname", progname);
 224 | X
 225 | X	return 0;
 226 | X}
 227 | X
 228 | Xsdump(pagf)
 229 | Xint pagf;
 230 | X{
 231 | X	register b;
 232 | X	register n = 0;
 233 | X	register t = 0;
 234 | X	register o = 0;
 235 | X	register e;
 236 | X	char pag[PBLKSIZ];
 237 | X
 238 | X	while ((b = read(pagf, pag, PBLKSIZ)) > 0) {
 239 | X		printf("#%d: ", n);
 240 | X		if (!okpage(pag))
 241 | X			printf("bad\n");
 242 | X		else {
 243 | X			printf("ok. ");
 244 | X			if (!(e = pagestat(pag)))
 245 | X			    o++;
 246 | X			else
 247 | X			    t += e;
 248 | X		}
 249 | X		n++;
 250 | X	}
 251 | X
 252 | X	if (b == 0)
 253 | X		printf("%d pages (%d holes):  %d entries\n", n, o, t);
 254 | X	else
 255 | X		oops("read failed: block %d", n);
 256 | X}
 257 | X
 258 | Xpagestat(pag)
 259 | Xchar *pag;
 260 | X{
 261 | X	register n;
 262 | X	register free;
 263 | X	register short *ino = (short *) pag;
 264 | X
 265 | X	if (!(n = ino[0]))
 266 | X		printf("no entries.\n");
 267 | X	else {
 268 | X		free = ino[n] - (n + 1) * sizeof(short);
 269 | X		printf("%3d entries %2d%% used free %d.\n",
 270 | X		       n / 2, ((PBLKSIZ - free) * 100) / PBLKSIZ, free);
 271 | X	}
 272 | X	return n / 2;
 273 | X}
 274 | @@@End of dba.c
 275 | echo x - dbd.c 1>&2
 276 | sed 's/^X//' >dbd.c <<'@@@End of dbd.c'
 277 | X/*
 278 | X * dbd - dump a dbm data file
 279 | X */
 280 | X
 281 | X#include <stdio.h>
 282 | X#include <sys/file.h>
 283 | X#include "sdbm.h"
 284 | X
 285 | Xchar *progname;
 286 | Xextern void oops();
 287 | X
 288 | X
 289 | X#define empty(page)	(((short *) page)[0] == 0)
 290 | X
 291 | Xint
 292 | Xmain(argc, argv)
 293 | Xchar **argv;
 294 | X{
 295 | X	int n;
 296 | X	char *p;
 297 | X	char *name;
 298 | X	int pagf;
 299 | X
 300 | X	progname = argv[0];
 301 | X
 302 | X	if (p = argv[1]) {
 303 | X		name = (char *) malloc((n = strlen(p)) + 5);
 304 | X		strcpy(name, p);
 305 | X		strcpy(name + n, ".pag");
 306 | X
 307 | X		if ((pagf = open(name, O_RDONLY)) < 0)
 308 | X			oops("cannot open %s.", name);
 309 | X
 310 | X		sdump(pagf);
 311 | X	}
 312 | X	else
 313 | X		oops("usage: %s dbname", progname);
 314 | X	return 0;
 315 | X}
 316 | X
 317 | Xsdump(pagf)
 318 | Xint pagf;
 319 | X{
 320 | X	register r;
 321 | X	register n = 0;
 322 | X	register o = 0;
 323 | X	char pag[PBLKSIZ];
 324 | X
 325 | X	while ((r = read(pagf, pag, PBLKSIZ)) > 0) {
 326 | X		if (!okpage(pag))
 327 | X			fprintf(stderr, "%d: bad page.\n", n);
 328 | X		else if (empty(pag))
 329 | X			o++;
 330 | X		else
 331 | X			dispage(pag);
 332 | X		n++;
 333 | X	}
 334 | X
 335 | X	if (r == 0)
 336 | X		fprintf(stderr, "%d pages (%d holes).\n", n, o);
 337 | X	else
 338 | X		oops("read failed: block %d", n);
 339 | X}
 340 | X
 341 | X
 342 | X#ifdef OLD
 343 | Xdispage(pag)
 344 | Xchar *pag;
 345 | X{
 346 | X	register i, n;
 347 | X	register off;
 348 | X	register short *ino = (short *) pag;
 349 | X
 350 | X	off = PBLKSIZ;
 351 | X	for (i = 1; i < ino[0]; i += 2) {
 352 | X		printf("\t[%d]: ", ino[i]);
 353 | X		for (n = ino[i]; n < off; n++)
 354 | X			putchar(pag[n]);
 355 | X		putchar(' ');
 356 | X		off = ino[i];
 357 | X		printf("[%d]: ", ino[i + 1]);
 358 | X		for (n = ino[i + 1]; n < off; n++)
 359 | X			putchar(pag[n]);
 360 | X		off = ino[i + 1];
 361 | X		putchar('\n');
 362 | X	}
 363 | X}
 364 | X#else
 365 | Xdispage(pag)
 366 | Xchar *pag;
 367 | X{
 368 | X	register i, n;
 369 | X	register off;
 370 | X	register short *ino = (short *) pag;
 371 | X
 372 | X	off = PBLKSIZ;
 373 | X	for (i = 1; i < ino[0]; i += 2) {
 374 | X		for (n = ino[i]; n < off; n++)
 375 | X			if (pag[n] != 0)
 376 | X				putchar(pag[n]);
 377 | X		putchar('\t');
 378 | X		off = ino[i];
 379 | X		for (n = ino[i + 1]; n < off; n++)
 380 | X			if (pag[n] != 0)
 381 | X				putchar(pag[n]);
 382 | X		putchar('\n');
 383 | X		off = ino[i + 1];
 384 | X	}
 385 | X}
 386 | X#endif
 387 | @@@End of dbd.c
 388 | echo x - dbe.1 1>&2
 389 | sed 's/^X//' >dbe.1 <<'@@@End of dbe.1'
 390 | X.TH dbe 1 "ndbm(3) EDITOR"
 391 | X.SH NAME
 392 | Xdbe \- Edit a ndbm(3) database
 393 | X.SH USAGE
 394 | Xdbe <database> [-m r|w|rw] [-crtvx] -a|-d|-f|-F|-s [<key> [<content>]]
 395 | X.SH DESCRIPTION
 396 | X\fIdbme\fP operates on ndbm(3) databases.
 397 | XIt can be used to create them, look at them or change them.
 398 | XWhen specifying the value of a key or the content of its associated entry,
 399 | X\\nnn, \\0, \\n, \\t, \\f and \\r are interpreted as usual.
 400 | XWhen displaying key/content pairs, non-printable characters are displayed
 401 | Xusing the \\nnn notation.
 402 | X.SH OPTIONS
 403 | X.IP -a
 404 | XList all entries in the database.
 405 | X.IP -c
 406 | XCreate the database if it does not exist.
 407 | X.IP -d
 408 | XDelete the entry associated with the specified key.
 409 | X.IP -f
 410 | XFetch and display the entry associated with the specified key.
 411 | X.IP -F
 412 | XFetch and display all the entries whose key match the specified
 413 | Xregular-expression
 414 | X.IP "-m r|w|rw"
 415 | XOpen the database in read-only, write-only or read-write mode
 416 | X.IP -r
 417 | XReplace the entry associated with the specified key if it already exists.
 418 | XSee option -s.
 419 | X.IP -s
 420 | XStore an entry under a specific key.
 421 | XAn error occurs if the key already exists and the option -r was not specified.
 422 | X.IP -t
 423 | XRe-initialize the database before executing the command.
 424 | X.IP -v
 425 | XVerbose mode.
 426 | XConfirm stores and deletions.
 427 | X.IP -x
 428 | XIf option -x is used with option -c, then if the database already exists,
 429 | Xan error occurs.
 430 | XThis can be used to implement a simple exclusive access locking mechanism.
 431 | X.SH SEE ALSO
 432 | Xndbm(3)
 433 | X.SH AUTHOR
 434 | Xjanick@bnr.ca
 435 | X
 436 | @@@End of dbe.1
 437 | echo x - dbe.c 1>&2
 438 | sed 's/^X//' >dbe.c <<'@@@End of dbe.c'
 439 | X#include <stdio.h>
 440 | X#ifndef VMS
 441 | X#include <sys/file.h>
 442 | X#include <ndbm.h>
 443 | X#else
 444 | X#include "file.h"
 445 | X#include "ndbm.h"
 446 | X#endif
 447 | X#include <ctype.h>
 448 | X
 449 | X/***************************************************************************\
 450 | X**                                                                         **
 451 | X**   Function name: getopt()                                               **
 452 | X**   Author:        Henry Spencer, UofT                                    **
 453 | X**   Coding date:   84/04/28                                               **
 454 | X**                                                                         **
 455 | X**   Description:                                                          **
 456 | X**                                                                         **
 457 | X**   Parses argv[] for arguments.                                          **
 458 | X**   Works with Whitesmith's C compiler.                                   **
 459 | X**                                                                         **
 460 | X**   Inputs   - The number of arguments                                    **
 461 | X**            - The base address of the array of arguments                 **
 462 | X**            - A string listing the valid options (':' indicates an       **
 463 | X**              argument to the preceding option is required, a ';'        **
 464 | X**              indicates an argument to the preceding option is optional) **
 465 | X**                                                                         **
 466 | X**   Outputs  - Returns the next option character,                         **
 467 | X**              '?' for non '-' arguments                                  **
 468 | X**              or ':' when there is no more arguments.                    **
 469 | X**                                                                         **
 470 | X**   Side Effects + The argument to an option is pointed to by 'optarg'    **
 471 | X**                                                                         **
 472 | X*****************************************************************************
 473 | X**                                                                         **
 474 | X**   REVISION HISTORY:                                                     **
 475 | X**                                                                         **
 476 | X**     DATE           NAME                        DESCRIPTION              **
 477 | X**   YY/MM/DD  ------------------   ------------------------------------   **
 478 | X**   88/10/20  Janick Bergeron      Returns '?' on unamed arguments        **
 479 | X**                                  returns '!' on unknown options         **
 480 | X**                                  and 'EOF' only when exhausted.         **
 481 | X**   88/11/18  Janick Bergeron      Return ':' when no more arguments      **
 482 | X**   89/08/11  Janick Bergeron      Optional optarg when ';' in optstring  **
 483 | X**                                                                         **
 484 | X\***************************************************************************/
 485 | X
 486 | Xchar *optarg;			       /* Global argument pointer. */
 487 | X
 488 | X#ifdef VMS
 489 | X#define index  strchr
 490 | X#endif
 491 | X
 492 | Xchar
 493 | Xgetopt(argc, argv, optstring)
 494 | Xint argc;
 495 | Xchar **argv;
 496 | Xchar *optstring;
 497 | X{
 498 | X	register int c;
 499 | X	register char *place;
 500 | X	extern char *index();
 501 | X	static int optind = 0;
 502 | X	static char *scan = NULL;
 503 | X
 504 | X	optarg = NULL;
 505 | X
 506 | X	if (scan == NULL || *scan == '\0') {
 507 | X
 508 | X		if (optind == 0)
 509 | X			optind++;
 510 | X		if (optind >= argc)
 511 | X			return ':';
 512 | X
 513 | X		optarg = place = argv[optind++];
 514 | X		if (place[0] != '-' || place[1] == '\0')
 515 | X			return '?';
 516 | X		if (place[1] == '-' && place[2] == '\0')
 517 | X			return '?';
 518 | X		scan = place + 1;
 519 | X	}
 520 | X
 521 | X	c = *scan++;
 522 | X	place = index(optstring, c);
 523 | X	if (place == NULL || c == ':' || c == ';') {
 524 | X
 525 | X		(void) fprintf(stderr, "%s: unknown option %c\n", argv[0], c);
 526 | X		scan = NULL;
 527 | X		return '!';
 528 | X	}
 529 | X	if (*++place == ':') {
 530 | X
 531 | X		if (*scan != '\0') {
 532 | X
 533 | X			optarg = scan;
 534 | X			scan = NULL;
 535 | X
 536 | X		}
 537 | X		else {
 538 | X
 539 | X			if (optind >= argc) {
 540 | X
 541 | X				(void) fprintf(stderr, "%s: %c requires an argument\n",
 542 | X					       argv[0], c);
 543 | X				return '!';
 544 | X			}
 545 | X			optarg = argv[optind];
 546 | X			optind++;
 547 | X		}
 548 | X	}
 549 | X	else if (*place == ';') {
 550 | X
 551 | X		if (*scan != '\0') {
 552 | X
 553 | X			optarg = scan;
 554 | X			scan = NULL;
 555 | X
 556 | X		}
 557 | X		else {
 558 | X
 559 | X			if (optind >= argc || *argv[optind] == '-')
 560 | X				optarg = NULL;
 561 | X			else {
 562 | X				optarg = argv[optind];
 563 | X				optind++;
 564 | X			}
 565 | X		}
 566 | X	}
 567 | X	return c;
 568 | X}
 569 | X
 570 | X
 571 | Xvoid
 572 | Xprint_datum(db)
 573 | Xdatum db;
 574 | X{
 575 | X	int i;
 576 | X
 577 | X	putchar('"');
 578 | X	for (i = 0; i < db.dsize; i++) {
 579 | X		if (isprint(db.dptr[i]))
 580 | X			putchar(db.dptr[i]);
 581 | X		else {
 582 | X			putchar('\\');
 583 | X			putchar('0' + ((db.dptr[i] >> 6) & 0x07));
 584 | X			putchar('0' + ((db.dptr[i] >> 3) & 0x07));
 585 | X			putchar('0' + (db.dptr[i] & 0x07));
 586 | X		}
 587 | X	}
 588 | X	putchar('"');
 589 | X}
 590 | X
 591 | X
 592 | Xdatum
 593 | Xread_datum(s)
 594 | Xchar *s;
 595 | X{
 596 | X	datum db;
 597 | X	char *p;
 598 | X	int i;
 599 | X
 600 | X	db.dsize = 0;
 601 | X	db.dptr = (char *) malloc(strlen(s) * sizeof(char));
 602 | X	for (p = db.dptr; *s != '\0'; p++, db.dsize++, s++) {
 603 | X		if (*s == '\\') {
 604 | X			if (*++s == 'n')
 605 | X				*p = '\n';
 606 | X			else if (*s == 'r')
 607 | X				*p = '\r';
 608 | X			else if (*s == 'f')
 609 | X				*p = '\f';
 610 | X			else if (*s == 't')
 611 | X				*p = '\t';
 612 | X			else if (isdigit(*s) && isdigit(*(s + 1)) && isdigit(*(s + 2))) {
 613 | X				i = (*s++ - '0') << 6;
 614 | X				i |= (*s++ - '0') << 3;
 615 | X				i |= *s - '0';
 616 | X				*p = i;
 617 | X			}
 618 | X			else if (*s == '0')
 619 | X				*p = '\0';
 620 | X			else
 621 | X				*p = *s;
 622 | X		}
 623 | X		else
 624 | X			*p = *s;
 625 | X	}
 626 | X
 627 | X	return db;
 628 | X}
 629 | X
 630 | X
 631 | Xchar *
 632 | Xkey2s(db)
 633 | Xdatum db;
 634 | X{
 635 | X	char *buf;
 636 | X	char *p1, *p2;
 637 | X
 638 | X	buf = (char *) malloc((db.dsize + 1) * sizeof(char));
 639 | X	for (p1 = buf, p2 = db.dptr; *p2 != '\0'; *p1++ = *p2++);
 640 | X	*p1 = '\0';
 641 | X	return buf;
 642 | X}
 643 | X
 644 | X
 645 | Xmain(argc, argv)
 646 | Xint argc;
 647 | Xchar **argv;
 648 | X{
 649 | X	typedef enum {
 650 | X		YOW, FETCH, STORE, DELETE, SCAN, REGEXP
 651 | X	} commands;
 652 | X	char opt;
 653 | X	int flags;
 654 | X	int giveusage = 0;
 655 | X	int verbose = 0;
 656 | X	commands what = YOW;
 657 | X	char *comarg[3];
 658 | X	int st_flag = DBM_INSERT;
 659 | X	int argn;
 660 | X	DBM *db;
 661 | X	datum key;
 662 | X	datum content;
 663 | X
 664 | X	flags = O_RDWR;
 665 | X	argn = 0;
 666 | X
 667 | X	while ((opt = getopt(argc, argv, "acdfFm:rstvx")) != ':') {
 668 | X		switch (opt) {
 669 | X		case 'a':
 670 | X			what = SCAN;
 671 | X			break;
 672 | X		case 'c':
 673 | X			flags |= O_CREAT;
 674 | X			break;
 675 | X		case 'd':
 676 | X			what = DELETE;
 677 | X			break;
 678 | X		case 'f':
 679 | X			what = FETCH;
 680 | X			break;
 681 | X		case 'F':
 682 | X			what = REGEXP;
 683 | X			break;
 684 | X		case 'm':
 685 | X			flags &= ~(000007);
 686 | X			if (strcmp(optarg, "r") == 0)
 687 | X				flags |= O_RDONLY;
 688 | X			else if (strcmp(optarg, "w") == 0)
 689 | X				flags |= O_WRONLY;
 690 | X			else if (strcmp(optarg, "rw") == 0)
 691 | X				flags |= O_RDWR;
 692 | X			else {
 693 | X				fprintf(stderr, "Invalid mode: \"%s\"\n", optarg);
 694 | X				giveusage = 1;
 695 | X			}
 696 | X			break;
 697 | X		case 'r':
 698 | X			st_flag = DBM_REPLACE;
 699 | X			break;
 700 | X		case 's':
 701 | X			what = STORE;
 702 | X			break;
 703 | X		case 't':
 704 | X			flags |= O_TRUNC;
 705 | X			break;
 706 | X		case 'v':
 707 | X			verbose = 1;
 708 | X			break;
 709 | X		case 'x':
 710 | X			flags |= O_EXCL;
 711 | X			break;
 712 | X		case '!':
 713 | X			giveusage = 1;
 714 | X			break;
 715 | X		case '?':
 716 | X			if (argn < 3)
 717 | X				comarg[argn++] = optarg;
 718 | X			else {
 719 | X				fprintf(stderr, "Too many arguments.\n");
 720 | X				giveusage = 1;
 721 | X			}
 722 | X			break;
 723 | X		}
 724 | X	}
 725 | X
 726 | X	if (giveusage | what == YOW | argn < 1) {
 727 | X		fprintf(stderr, "Usage: %s databse [-m r|w|rw] [-crtx] -a|-d|-f|-F|-s [key [content]]\n", argv[0]);
 728 | X		exit(-1);
 729 | X	}
 730 | X
 731 | X	if ((db = dbm_open(comarg[0], flags, 0777)) == NULL) {
 732 | X		fprintf(stderr, "Error opening database \"%s\"\n", comarg[0]);
 733 | X		exit(-1);
 734 | X	}
 735 | X
 736 | X	if (argn > 1)
 737 | X		key = read_datum(comarg[1]);
 738 | X	if (argn > 2)
 739 | X		content = read_datum(comarg[2]);
 740 | X
 741 | X	switch (what) {
 742 | X
 743 | X	case SCAN:
 744 | X		key = dbm_firstkey(db);
 745 | X		if (dbm_error(db)) {
 746 | X			fprintf(stderr, "Error when fetching first key\n");
 747 | X			goto db_exit;
 748 | X		}
 749 | X		while (key.dptr != NULL) {
 750 | X			content = dbm_fetch(db, key);
 751 | X			if (dbm_error(db)) {
 752 | X				fprintf(stderr, "Error when fetching ");
 753 | X				print_datum(key);
 754 | X				printf("\n");
 755 | X				goto db_exit;
 756 | X			}
 757 | X			print_datum(key);
 758 | X			printf(": ");
 759 | X			print_datum(content);
 760 | X			printf("\n");
 761 | X			if (dbm_error(db)) {
 762 | X				fprintf(stderr, "Error when fetching next key\n");
 763 | X				goto db_exit;
 764 | X			}
 765 | X			key = dbm_nextkey(db);
 766 | X		}
 767 | X		break;
 768 | X
 769 | X	case REGEXP:
 770 | X		if (argn < 2) {
 771 | X			fprintf(stderr, "Missing regular expression.\n");
 772 | X			goto db_exit;
 773 | X		}
 774 | X		if (re_comp(comarg[1])) {
 775 | X			fprintf(stderr, "Invalid regular expression\n");
 776 | X			goto db_exit;
 777 | X		}
 778 | X		key = dbm_firstkey(db);
 779 | X		if (dbm_error(db)) {
 780 | X			fprintf(stderr, "Error when fetching first key\n");
 781 | X			goto db_exit;
 782 | X		}
 783 | X		while (key.dptr != NULL) {
 784 | X			if (re_exec(key2s(key))) {
 785 | X				content = dbm_fetch(db, key);
 786 | X				if (dbm_error(db)) {
 787 | X					fprintf(stderr, "Error when fetching ");
 788 | X					print_datum(key);
 789 | X					printf("\n");
 790 | X					goto db_exit;
 791 | X				}
 792 | X				print_datum(key);
 793 | X				printf(": ");
 794 | X				print_datum(content);
 795 | X				printf("\n");
 796 | X				if (dbm_error(db)) {
 797 | X					fprintf(stderr, "Error when fetching next key\n");
 798 | X					goto db_exit;
 799 | X				}
 800 | X			}
 801 | X			key = dbm_nextkey(db);
 802 | X		}
 803 | X		break;
 804 | X
 805 | X	case FETCH:
 806 | X		if (argn < 2) {
 807 | X			fprintf(stderr, "Missing fetch key.\n");
 808 | X			goto db_exit;
 809 | X		}
 810 | X		content = dbm_fetch(db, key);
 811 | X		if (dbm_error(db)) {
 812 | X			fprintf(stderr, "Error when fetching ");
 813 | X			print_datum(key);
 814 | X			printf("\n");
 815 | X			goto db_exit;
 816 | X		}
 817 | X		if (content.dptr == NULL) {
 818 | X			fprintf(stderr, "Cannot find ");
 819 | X			print_datum(key);
 820 | X			printf("\n");
 821 | X			goto db_exit;
 822 | X		}
 823 | X		print_datum(key);
 824 | X		printf(": ");
 825 | X		print_datum(content);
 826 | X		printf("\n");
 827 | X		break;
 828 | X
 829 | X	case DELETE:
 830 | X		if (argn < 2) {
 831 | X			fprintf(stderr, "Missing delete key.\n");
 832 | X			goto db_exit;
 833 | X		}
 834 | X		if (dbm_delete(db, key) || dbm_error(db)) {
 835 | X			fprintf(stderr, "Error when deleting ");
 836 | X			print_datum(key);
 837 | X			printf("\n");
 838 | X			goto db_exit;
 839 | X		}
 840 | X		if (verbose) {
 841 | X			print_datum(key);
 842 | X			printf(": DELETED\n");
 843 | X		}
 844 | X		break;
 845 | X
 846 | X	case STORE:
 847 | X		if (argn < 3) {
 848 | X			fprintf(stderr, "Missing key and/or content.\n");
 849 | X			goto db_exit;
 850 | X		}
 851 | X		if (dbm_store(db, key, content, st_flag) || dbm_error(db)) {
 852 | X			fprintf(stderr, "Error when storing ");
 853 | X			print_datum(key);
 854 | X			printf("\n");
 855 | X			goto db_exit;
 856 | X		}
 857 | X		if (verbose) {
 858 | X			print_datum(key);
 859 | X			printf(": ");
 860 | X			print_datum(content);
 861 | X			printf(" STORED\n");
 862 | X		}
 863 | X		break;
 864 | X	}
 865 | X
 866 | Xdb_exit:
 867 | X	dbm_clearerr(db);
 868 | X	dbm_close(db);
 869 | X	if (dbm_error(db)) {
 870 | X		fprintf(stderr, "Error closing database \"%s\"\n", comarg[0]);
 871 | X		exit(-1);
 872 | X	}
 873 | X}
 874 | @@@End of dbe.c
 875 | echo x - dbm.c 1>&2
 876 | sed 's/^X//' >dbm.c <<'@@@End of dbm.c'
 877 | X/*
 878 | X * Copyright (c) 1985 The Regents of the University of California.
 879 | X * All rights reserved.
 880 | X *
 881 | X * Redistribution and use in source and binary forms are permitted
 882 | X * provided that the above copyright notice and this paragraph are
 883 | X * duplicated in all such forms and that any documentation,
 884 | X * advertising materials, and other materials related to such
 885 | X * distribution and use acknowledge that the software was developed
 886 | X * by the University of California, Berkeley.  The name of the
 887 | X * University may not be used to endorse or promote products derived
 888 | X * from this software without specific prior written permission.
 889 | X * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
 890 | X * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
 891 | X * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 892 | X */
 893 | X
 894 | X#ifndef lint
 895 | Xstatic char sccsid[] = "@(#)dbm.c    5.4 (Berkeley) 5/24/89";
 896 | X#endif /* not lint */
 897 | X
 898 | X#include    "dbm.h"
 899 | X
 900 | X#define    NODB    ((DBM *)0)
 901 | X
 902 | Xstatic DBM *cur_db = NODB;
 903 | X
 904 | Xstatic char no_db[] = "dbm: no open database\n";
 905 | X
 906 | Xdbminit(file)
 907 | X    char *file;
 908 | X{
 909 | X    if (cur_db != NODB)
 910 | X        dbm_close(cur_db);
 911 | X
 912 | X    cur_db = dbm_open(file, 2, 0);
 913 | X    if (cur_db == NODB) {
 914 | X        cur_db = dbm_open(file, 0, 0);
 915 | X        if (cur_db == NODB)
 916 | X            return (-1);
 917 | X    }
 918 | X    return (0);
 919 | X}
 920 | X
 921 | Xlong
 922 | Xforder(key)
 923 | Xdatum key;
 924 | X{
 925 | X    if (cur_db == NODB) {
 926 | X        printf(no_db);
 927 | X        return (0L);
 928 | X    }
 929 | X    return (dbm_forder(cur_db, key));
 930 | X}
 931 | X
 932 | Xdatum
 933 | Xfetch(key)
 934 | Xdatum key;
 935 | X{
 936 | X    datum item;
 937 | X
 938 | X    if (cur_db == NODB) {
 939 | X        printf(no_db);
 940 | X        item.dptr = 0;
 941 | X        return (item);
 942 | X    }
 943 | X    return (dbm_fetch(cur_db, key));
 944 | X}
 945 | X
 946 | Xdelete(key)
 947 | Xdatum key;
 948 | X{
 949 | X    if (cur_db == NODB) {
 950 | X        printf(no_db);
 951 | X        return (-1);
 952 | X    }
 953 | X    if (dbm_rdonly(cur_db))
 954 | X        return (-1);
 955 | X    return (dbm_delete(cur_db, key));
 956 | X}
 957 | X
 958 | Xstore(key, dat)
 959 | Xdatum key, dat;
 960 | X{
 961 | X    if (cur_db == NODB) {
 962 | X        printf(no_db);
 963 | X        return (-1);
 964 | X    }
 965 | X    if (dbm_rdonly(cur_db))
 966 | X        return (-1);
 967 | X
 968 | X    return (dbm_store(cur_db, key, dat, DBM_REPLACE));
 969 | X}
 970 | X
 971 | Xdatum
 972 | Xfirstkey()
 973 | X{
 974 | X    datum item;
 975 | X
 976 | X    if (cur_db == NODB) {
 977 | X        printf(no_db);
 978 | X        item.dptr = 0;
 979 | X        return (item);
 980 | X    }
 981 | X    return (dbm_firstkey(cur_db));
 982 | X}
 983 | X
 984 | Xdatum
 985 | Xnextkey(key)
 986 | Xdatum key;
 987 | X{
 988 | X    datum item;
 989 | X
 990 | X    if (cur_db == NODB) {
 991 | X        printf(no_db);
 992 | X        item.dptr = 0;
 993 | X        return (item);
 994 | X    }
 995 | X    return (dbm_nextkey(cur_db, key));
 996 | X}
 997 | @@@End of dbm.c
 998 | echo x - dbm.h 1>&2
 999 | sed 's/^X//' >dbm.h <<'@@@End of dbm.h'
1000 | X/*
1001 | X * Copyright (c) 1983 The Regents of the University of California.
1002 | X * All rights reserved.
1003 | X *
1004 | X * Redistribution and use in source and binary forms are permitted
1005 | X * provided that the above copyright notice and this paragraph are
1006 | X * duplicated in all such forms and that any documentation,
1007 | X * advertising materials, and other materials related to such
1008 | X * distribution and use acknowledge that the software was developed
1009 | X * by the University of California, Berkeley.  The name of the
1010 | X * University may not be used to endorse or promote products derived
1011 | X * from this software without specific prior written permission.
1012 | X * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
1013 | X * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
1014 | X * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
1015 | X *
1016 | X *    @(#)dbm.h    5.2 (Berkeley) 5/24/89
1017 | X */
1018 | X
1019 | X#ifndef NULL
1020 | X/*
1021 | X * this is lunacy, we no longer use it (and never should have
1022 | X * unconditionally defined it), but, this whole file is for
1023 | X * backwards compatability - someone may rely on this.
1024 | X */
1025 | X#define    NULL    ((char *) 0)
1026 | X#endif
1027 | X
1028 | X#include <ndbm.h>
1029 | X
1030 | Xdatum    fetch();
1031 | Xdatum    firstkey();
1032 | Xdatum    nextkey();
1033 | @@@End of dbm.h
1034 | echo x - dbu.c 1>&2
1035 | sed 's/^X//' >dbu.c <<'@@@End of dbu.c'
1036 | X#include <stdio.h>
1037 | X#include <sys/file.h>
1038 | X#ifdef SDBM
1039 | X#include "sdbm.h"
1040 | X#else
1041 | X#include <ndbm.h>
1042 | X#endif
1043 | X#include <string.h>
1044 | X
1045 | X#ifdef BSD42
1046 | X#define strchr	index
1047 | X#endif
1048 | X
1049 | Xextern int	getopt();
1050 | Xextern char	*strchr();
1051 | Xextern void	oops();
1052 | X
1053 | Xchar *progname;
1054 | X
1055 | Xstatic int rflag;
1056 | Xstatic char *usage = "%s [-R] cat | look |... dbmname";
1057 | X
1058 | X#define DERROR	0
1059 | X#define DLOOK	1
1060 | X#define DINSERT	2
1061 | X#define DDELETE 3
1062 | X#define	DCAT	4
1063 | X#define DBUILD	5
1064 | X#define DPRESS	6
1065 | X#define DCREAT	7
1066 | X
1067 | X#define LINEMAX	8192
1068 | X
1069 | Xtypedef struct {
1070 | X	char *sname;
1071 | X	int scode;
1072 | X	int flags;
1073 | X} cmd;
1074 | X
1075 | Xstatic cmd cmds[] = {
1076 | X
1077 | X	"fetch", DLOOK, 	O_RDONLY,
1078 | X	"get", DLOOK,		O_RDONLY,
1079 | X	"look", DLOOK,		O_RDONLY,
1080 | X	"add", DINSERT,		O_RDWR,
1081 | X	"insert", DINSERT,	O_RDWR,
1082 | X	"store", DINSERT,	O_RDWR,
1083 | X	"delete", DDELETE,	O_RDWR,
1084 | X	"remove", DDELETE,	O_RDWR,
1085 | X	"dump", DCAT,		O_RDONLY,
1086 | X	"list", DCAT, 		O_RDONLY,
1087 | X	"cat", DCAT,		O_RDONLY,
1088 | X	"creat", DCREAT,	O_RDWR | O_CREAT | O_TRUNC,
1089 | X	"new", DCREAT,		O_RDWR | O_CREAT | O_TRUNC,
1090 | X	"build", DBUILD,	O_RDWR | O_CREAT,
1091 | X	"squash", DPRESS,	O_RDWR,
1092 | X	"compact", DPRESS,	O_RDWR,
1093 | X	"compress", DPRESS,	O_RDWR
1094 | X};
1095 | X
1096 | X#define CTABSIZ (sizeof (cmds)/sizeof (cmd))
1097 | X
1098 | Xstatic cmd *parse();
1099 | Xstatic void badk(), doit(), prdatum();
1100 | X
1101 | Xint
1102 | Xmain(argc, argv)
1103 | Xint	argc;
1104 | Xchar *argv[];
1105 | X{
1106 | X	int c;
1107 | X	register cmd *act;
1108 | X	extern int optind;
1109 | X	extern char *optarg;
1110 | X
1111 | X	progname = argv[0];
1112 | X
1113 | X	while ((c = getopt(argc, argv, "R")) != EOF)
1114 | X		switch (c) {
1115 | X		case 'R':	       /* raw processing  */
1116 | X			rflag++;
1117 | X			break;
1118 | X
1119 | X		default:
1120 | X			oops("usage: %s", usage);
1121 | X			break;
1122 | X		}
1123 | X
1124 | X	if ((argc -= optind) < 2)
1125 | X		oops("usage: %s", usage);
1126 | X
1127 | X	if ((act = parse(argv[optind])) == NULL)
1128 | X		badk(argv[optind]);
1129 | X	optind++;
1130 | X	doit(act, argv[optind]);
1131 | X	return 0;
1132 | X}
1133 | X
1134 | Xstatic void
1135 | Xdoit(act, file)
1136 | Xregister cmd *act;
1137 | Xchar *file;
1138 | X{
1139 | X	datum key;
1140 | X	datum val;
1141 | X	register DBM *db;
1142 | X	register char *op;
1143 | X	register int n;
1144 | X	char *line;
1145 | X#ifdef TIME
1146 | X	long start;
1147 | X	extern long time();
1148 | X#endif
1149 | X
1150 | X	if ((db = dbm_open(file, act->flags, 0644)) == NULL)
1151 | X		oops("cannot open: %s", file);
1152 | X
1153 | X	if ((line = (char *) malloc(LINEMAX)) == NULL)
1154 | X		oops("%s: cannot get memory", "line alloc");
1155 | X
1156 | X	switch (act->scode) {
1157 | X
1158 | X	case DLOOK:
1159 | X		while (fgets(line, LINEMAX, stdin) != NULL) {
1160 | X			n = strlen(line) - 1;
1161 | X			line[n] = 0;
1162 | X			key.dptr = line;
1163 | X			key.dsize = n;
1164 | X			val = dbm_fetch(db, key);
1165 | X			if (val.dptr != NULL) {
1166 | X				prdatum(stdout, val);
1167 | X				putchar('\n');
1168 | X				continue;
1169 | X			}
1170 | X			prdatum(stderr, key);
1171 | X			fprintf(stderr, ": not found.\n");
1172 | X		}
1173 | X		break;
1174 | X	case DINSERT:
1175 | X		break;
1176 | X	case DDELETE:
1177 | X		while (fgets(line, LINEMAX, stdin) != NULL) {
1178 | X			n = strlen(line) - 1;
1179 | X			line[n] = 0;
1180 | X			key.dptr = line;
1181 | X			key.dsize = n;
1182 | X			if (dbm_delete(db, key) == -1) {
1183 | X				prdatum(stderr, key);
1184 | X				fprintf(stderr, ": not found.\n");
1185 | X			}
1186 | X		}
1187 | X		break;
1188 | X	case DCAT:
1189 | X		for (key = dbm_firstkey(db); key.dptr != 0; 
1190 | X		     key = dbm_nextkey(db)) {
1191 | X			prdatum(stdout, key);
1192 | X			putchar('\t');
1193 | X			prdatum(stdout, dbm_fetch(db, key));
1194 | X			putchar('\n');
1195 | X		}
1196 | X		break;
1197 | X	case DBUILD:
1198 | X#ifdef TIME
1199 | X		start = time(0);
1200 | X#endif
1201 | X		while (fgets(line, LINEMAX, stdin) != NULL) {
1202 | X			n = strlen(line) - 1;
1203 | X			line[n] = 0;
1204 | X			key.dptr = line;
1205 | X			if ((op = strchr(line, '\t')) != 0) {
1206 | X				key.dsize = op - line;
1207 | X				*op++ = 0;
1208 | X				val.dptr = op;
1209 | X				val.dsize = line + n - op;
1210 | X			}
1211 | X			else
1212 | X				oops("bad input; %s", line);
1213 | X	
1214 | X			if (dbm_store(db, key, val, DBM_REPLACE) < 0) {
1215 | X				prdatum(stderr, key);
1216 | X				fprintf(stderr, ": ");
1217 | X				oops("store: %s", "failed");
1218 | X			}
1219 | X		}
1220 | X#ifdef TIME
1221 | X		printf("done: %d seconds.\n", time(0) - start);
1222 | X#endif
1223 | X		break;
1224 | X	case DPRESS:
1225 | X		break;
1226 | X	case DCREAT:
1227 | X		break;
1228 | X	}
1229 | X
1230 | X	dbm_close(db);
1231 | X}
1232 | X
1233 | Xstatic void
1234 | Xbadk(word)
1235 | Xchar *word;
1236 | X{
1237 | X	register int i;
1238 | X
1239 | X	if (progname)
1240 | X		fprintf(stderr, "%s: ", progname);
1241 | X	fprintf(stderr, "bad keywd %s. use one of\n", word);
1242 | X	for (i = 0; i < (int)CTABSIZ; i++)
1243 | X		fprintf(stderr, "%-8s%c", cmds[i].sname,
1244 | X			((i + 1) % 6 == 0) ? '\n' : ' ');
1245 | X	fprintf(stderr, "\n");
1246 | X	exit(1);
1247 | X	/*NOTREACHED*/
1248 | X}
1249 | X
1250 | Xstatic cmd *
1251 | Xparse(str)
1252 | Xregister char *str;
1253 | X{
1254 | X	register int i = CTABSIZ;
1255 | X	register cmd *p;
1256 | X	
1257 | X	for (p = cmds; i--; p++)
1258 | X		if (strcmp(p->sname, str) == 0)
1259 | X			return p;
1260 | X	return NULL;
1261 | X}
1262 | X
1263 | Xstatic void
1264 | Xprdatum(stream, d)
1265 | XFILE *stream;
1266 | Xdatum d;
1267 | X{
1268 | X	register int c;
1269 | X	register char *p = d.dptr;
1270 | X	register int n = d.dsize;
1271 | X
1272 | X	while (n--) {
1273 | X		c = *p++ & 0377;
1274 | X		if (c & 0200) {
1275 | X			fprintf(stream, "M-");
1276 | X			c &= 0177;
1277 | X		}
1278 | X		if (c == 0177 || c < ' ') 
1279 | X			fprintf(stream, "^%c", (c == 0177) ? '?' : c + '@');
1280 | X		else
1281 | X			putc(c, stream);
1282 | X	}
1283 | X}
1284 | X
1285 | X
1286 | @@@End of dbu.c
1287 | echo x - grind 1>&2
1288 | sed 's/^X//' >grind <<'@@@End of grind'
1289 | X#!/bin/sh
1290 | Xrm -f /tmp/*.dir /tmp/*.pag
1291 | Xawk -e '{
1292 | X        printf "%s\t", $0
1293 | X        for (i = 0; i < 40; i++)
1294 | X                printf "%s.", $0
1295 | X        printf "\n"
1296 | X}' < /usr/dict/words | $1 build /tmp/$2
1297 | X
1298 | @@@End of grind
1299 | echo x - hash.c 1>&2
1300 | sed 's/^X//' >hash.c <<'@@@End of hash.c'
1301 | X/*
1302 | X * sdbm - ndbm work-alike hashed database library
1303 | X * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
1304 | X * author: oz@nexus.yorku.ca
1305 | X * status: public domain. keep it that way.
1306 | X *
1307 | X * hashing routine
1308 | X */
1309 | X
1310 | X#include "sdbm.h"
1311 | X/*
1312 | X * polynomial conversion ignoring overflows
1313 | X * [this seems to work remarkably well, in fact better
1314 | X * then the ndbm hash function. Replace at your own risk]
1315 | X * use: 65599	nice.
1316 | X *      65587   even better. 
1317 | X */
1318 | Xlong
1319 | Xdbm_hash(str, len)
1320 | Xregister char *str;
1321 | Xregister int len;
1322 | X{
1323 | X	register unsigned long n = 0;
1324 | X
1325 | X#ifdef DUFF
1326 | X
1327 | X#define HASHC	n = *str++ + 65599 * n
1328 | X
1329 | X	if (len > 0) {
1330 | X		register int loop = (len + 8 - 1) >> 3;
1331 | X
1332 | X		switch(len & (8 - 1)) {
1333 | X		case 0:	do {
1334 | X			HASHC;	case 7:	HASHC;
1335 | X		case 6:	HASHC;	case 5:	HASHC;
1336 | X		case 4:	HASHC;	case 3:	HASHC;
1337 | X		case 2:	HASHC;	case 1:	HASHC;
1338 | X			} while (--loop);
1339 | X		}
1340 | X
1341 | X	}
1342 | X#else
1343 | X	while (len--)
1344 | X		n = *str++ + 65599 * n;
1345 | X#endif
1346 | X	return n;
1347 | X}
1348 | @@@End of hash.c
1349 | echo x - makefile 1>&2
1350 | sed 's/^X//' >makefile <<'@@@End of makefile'
1351 | X#
1352 | X# makefile for public domain ndbm-clone: sdbm
1353 | X# DUFF: use duff's device (loop unroll) in parts of the code
1354 | X#
1355 | XCFLAGS = -O -DSDBM -DDUFF -DBSD42
1356 | X#LDFLAGS = -p
1357 | X
1358 | XOBJS = sdbm.o pair.o hash.o
1359 | XSRCS = sdbm.c pair.c hash.c dbu.c dba.c dbd.c util.c
1360 | XHDRS = tune.h sdbm.h pair.h
1361 | XMISC = README CHANGES COMPARE sdbm.3 dbe.c dbe.1 dbm.c dbm.h biblio \
1362 | X       readme.ms readme.ps
1363 | X
1364 | Xall: dbu dba dbd dbe
1365 | X
1366 | Xdbu: dbu.o sdbm util.o
1367 | X	cc $(LDFLAGS) -o dbu dbu.o util.o libsdbm.a
1368 | X
1369 | Xdba: dba.o util.o
1370 | X	cc $(LDFLAGS) -o dba dba.o util.o
1371 | Xdbd: dbd.o util.o
1372 | X	cc $(LDFLAGS) -o dbd dbd.o util.o
1373 | Xdbe: dbe.o sdbm
1374 | X	cc $(LDFLAGS) -o dbe dbe.o libsdbm.a
1375 | X
1376 | Xsdbm: $(OBJS)
1377 | X	ar cr libsdbm.a $(OBJS)
1378 | X	ranlib libsdbm.a
1379 | X###	cp libsdbm.a /usr/lib/libsdbm.a
1380 | X
1381 | Xdba.o: sdbm.h
1382 | Xdbu.o: sdbm.h
1383 | Xutil.o:sdbm.h
1384 | X
1385 | X$(OBJS): sdbm.h tune.h pair.h
1386 | X
1387 | X#
1388 | X# dbu using berkelezoid ndbm routines [if you have them] for testing
1389 | X#
1390 | X#x-dbu: dbu.o util.o
1391 | X#	cc $(CFLAGS) -o x-dbu dbu.o util.o
1392 | Xlint:
1393 | X	lint -abchx $(SRCS)
1394 | X
1395 | Xclean:
1396 | X	rm -f *.o mon.out core
1397 | X
1398 | Xpurge: 	clean
1399 | X	rm -f dbu libsdbm.a dbd dba dbe x-dbu *.dir *.pag
1400 | X
1401 | Xshar:
1402 | X	shar $(MISC) makefile $(SRCS) $(HDRS) >SDBM.SHAR
1403 | X
1404 | Xreadme:
1405 | X	nroff -ms readme.ms | col -b >README
1406 | @@@End of makefile
1407 | echo x - pair.c 1>&2
1408 | sed 's/^X//' >pair.c <<'@@@End of pair.c'
1409 | X/*
1410 | X * sdbm - ndbm work-alike hashed database library
1411 | X * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
1412 | X * author: oz@nexus.yorku.ca
1413 | X * status: public domain.
1414 | X *
1415 | X * page-level routines
1416 | X */
1417 | X
1418 | X#ifndef lint
1419 | Xstatic char rcsid[] = "$Id: pair.c,v 1.10 90/12/13 13:00:35 oz Exp $";
1420 | X#endif
1421 | X
1422 | X#include "sdbm.h"
1423 | X#include "tune.h"
1424 | X#include "pair.h"
1425 | X
1426 | X#ifndef BSD42
1427 | X#include <memory.h>
1428 | X#endif
1429 | X
1430 | X#define exhash(item)	dbm_hash((item).dptr, (item).dsize)
1431 | X
1432 | X/* 
1433 | X * forward 
1434 | X */
1435 | Xstatic int seepair proto((char *, int, char *, int));
1436 | X
1437 | X/*
1438 | X * page format:
1439 | X *	+------------------------------+
1440 | X * ino	| n | keyoff | datoff | keyoff |
1441 | X * 	+------------+--------+--------+
1442 | X *	| datoff | - - - ---->	       |
1443 | X *	+--------+---------------------+
1444 | X *	|	 F R E E A R E A       |
1445 | X *	+--------------+---------------+
1446 | X *	|  <---- - - - | data          |
1447 | X *	+--------+-----+----+----------+
1448 | X *	|  key   | data     | key      |
1449 | X *	+--------+----------+----------+
1450 | X *
1451 | X * calculating the offsets for free area:  if the number
1452 | X * of entries (ino[0]) is zero, the offset to the END of
1453 | X * the free area is the block size. Otherwise, it is the
1454 | X * nth (ino[ino[0]]) entry's offset.
1455 | X */
1456 | X
1457 | Xint
1458 | Xfitpair(pag, need)
1459 | Xchar *pag;
1460 | Xint need;
1461 | X{
1462 | X	register int n;
1463 | X	register int off;
1464 | X	register int free;
1465 | X	register short *ino = (short *) pag;
1466 | X
1467 | X	off = ((n = ino[0]) > 0) ? ino[n] : PBLKSIZ;
1468 | X	free = off - (n + 1) * sizeof(short);
1469 | X	need += 2 * sizeof(short);
1470 | X
1471 | X	debug(("free %d need %d\n", free, need));
1472 | X
1473 | X	return need <= free;
1474 | X}
1475 | X
1476 | Xvoid
1477 | Xputpair(pag, key, val)
1478 | Xchar *pag;
1479 | Xdatum key;
1480 | Xdatum val;
1481 | X{
1482 | X	register int n;
1483 | X	register int off;
1484 | X	register short *ino = (short *) pag;
1485 | X
1486 | X	off = ((n = ino[0]) > 0) ? ino[n] : PBLKSIZ;
1487 | X/*
1488 | X * enter the key first
1489 | X */
1490 | X	off -= key.dsize;
1491 | X	(void) memcpy(pag + off, key.dptr, key.dsize);
1492 | X	ino[n + 1] = off;
1493 | X/*
1494 | X * now the data
1495 | X */
1496 | X	off -= val.dsize;
1497 | X	(void) memcpy(pag + off, val.dptr, val.dsize);
1498 | X	ino[n + 2] = off;
1499 | X/*
1500 | X * adjust item count
1501 | X */
1502 | X	ino[0] += 2;
1503 | X}
1504 | X
1505 | Xdatum
1506 | Xgetpair(pag, key)
1507 | Xchar *pag;
1508 | Xdatum key;
1509 | X{
1510 | X	register int i;
1511 | X	register int n;
1512 | X	datum val;
1513 | X	register short *ino = (short *) pag;
1514 | X
1515 | X	if ((n = ino[0]) == 0)
1516 | X		return nullitem;
1517 | X
1518 | X	if ((i = seepair(pag, n, key.dptr, key.dsize)) == 0)
1519 | X		return nullitem;
1520 | X
1521 | X	val.dptr = pag + ino[i + 1];
1522 | X	val.dsize = ino[i] - ino[i + 1];
1523 | X	return val;
1524 | X}
1525 | X
1526 | X#ifdef SEEDUPS
1527 | Xint
1528 | Xduppair(pag, key)
1529 | Xchar *pag;
1530 | Xdatum key;
1531 | X{
1532 | X	register short *ino = (short *) pag;
1533 | X	return ino[0] > 0 && seepair(pag, ino[0], key.dptr, key.dsize) > 0;
1534 | X}
1535 | X#endif
1536 | X
1537 | Xdatum
1538 | Xgetnkey(pag, num)
1539 | Xchar *pag;
1540 | Xint num;
1541 | X{
1542 | X	datum key;
1543 | X	register int off;
1544 | X	register short *ino = (short *) pag;
1545 | X
1546 | X	num = num * 2 - 1;
1547 | X	if (ino[0] == 0 || num > ino[0])
1548 | X		return nullitem;
1549 | X
1550 | X	off = (num > 1) ? ino[num - 1] : PBLKSIZ;
1551 | X
1552 | X	key.dptr = pag + ino[num];
1553 | X	key.dsize = off - ino[num];
1554 | X
1555 | X	return key;
1556 | X}
1557 | X
1558 | Xint
1559 | Xdelpair(pag, key)
1560 | Xchar *pag;
1561 | Xdatum key;
1562 | X{
1563 | X	register int n;
1564 | X	register int i;
1565 | X	register short *ino = (short *) pag;
1566 | X
1567 | X	if ((n = ino[0]) == 0)
1568 | X		return 0;
1569 | X
1570 | X	if ((i = seepair(pag, n, key.dptr, key.dsize)) == 0)
1571 | X		return 0;
1572 | X/*
1573 | X * found the key. if it is the last entry
1574 | X * [i.e. i == n - 1] we just adjust the entry count.
1575 | X * hard case: move all data down onto the deleted pair,
1576 | X * shift offsets onto deleted offsets, and adjust them.
1577 | X * [note: 0 < i < n]
1578 | X */
1579 | X	if (i < n - 1) {
1580 | X		register int m;
1581 | X		register char *dst = pag + (i == 1 ? PBLKSIZ : ino[i - 1]);
1582 | X		register char *src = pag + ino[i + 1];
1583 | X		register int   zoo = dst - src;
1584 | X
1585 | X		debug(("free-up %d ", zoo));
1586 | X/*
1587 | X * shift data/keys down
1588 | X */
1589 | X		m = ino[i + 1] - ino[n];
1590 | X#ifdef DUFF
1591 | X#define MOVB 	*--dst = *--src
1592 | X
1593 | X		if (m > 0) {
1594 | X			register int loop = (m + 8 - 1) >> 3;
1595 | X
1596 | X			switch (m & (8 - 1)) {
1597 | X			case 0:	do {
1598 | X				MOVB;	case 7:	MOVB;
1599 | X			case 6:	MOVB;	case 5:	MOVB;
1600 | X			case 4:	MOVB;	case 3:	MOVB;
1601 | X			case 2:	MOVB;	case 1:	MOVB;
1602 | X				} while (--loop);
1603 | X			}
1604 | X		}
1605 | X#else
1606 | X#ifdef MEMMOVE
1607 | X		memmove(dst - m, src - m, m);
1608 | X#else
1609 | X		while (m--)
1610 | X			*--dst = *--src;
1611 | X#endif
1612 | X#endif
1613 | X/*
1614 | X * adjust offset index up
1615 | X */
1616 | X		while (i < n - 1) {
1617 | X			ino[i] = ino[i + 2] + zoo;
1618 | X			i++;
1619 | X		}
1620 | X	}
1621 | X	ino[0] -= 2;
1622 | X	return 1;
1623 | X}
1624 | X
1625 | X/*
1626 | X * search for the key in the page.
1627 | X * return offset index in the range 0 < i < n.
1628 | X * return 0 if not found.
1629 | X */
1630 | Xstatic int
1631 | Xseepair(pag, n, key, siz)
1632 | Xchar *pag;
1633 | Xregister int n;
1634 | Xregister char *key;
1635 | Xregister int siz;
1636 | X{
1637 | X	register int i;
1638 | X	register int off = PBLKSIZ;
1639 | X	register short *ino = (short *) pag;
1640 | X
1641 | X	for (i = 1; i < n; i += 2) {
1642 | X		if (siz == off - ino[i] &&
1643 | X		    memcmp(key, pag + ino[i], siz) == 0)
1644 | X			return i;
1645 | X		off = ino[i + 1];
1646 | X	}
1647 | X	return 0;
1648 | X}
1649 | X
1650 | Xvoid
1651 | Xsplpage(pag, new, sbit)
1652 | Xchar *pag;
1653 | Xchar *new;
1654 | Xlong sbit;
1655 | X{
1656 | X	datum key;
1657 | X	datum val;
1658 | X
1659 | X	register int n;
1660 | X	register int off = PBLKSIZ;
1661 | X	char cur[PBLKSIZ];
1662 | X	register short *ino = (short *) cur;
1663 | X
1664 | X	(void) memcpy(cur, pag, PBLKSIZ);
1665 | X	(void) memset(pag, 0, PBLKSIZ);
1666 | X	(void) memset(new, 0, PBLKSIZ);
1667 | X
1668 | X	n = ino[0];
1669 | X	for (ino++; n > 0; ino += 2) {
1670 | X		key.dptr = cur + ino[0]; 
1671 | X		key.dsize = off - ino[0];
1672 | X		val.dptr = cur + ino[1];
1673 | X		val.dsize = ino[0] - ino[1];
1674 | X/*
1675 | X * select the page pointer (by looking at sbit) and insert
1676 | X */
1677 | X		(void) putpair((exhash(key) & sbit) ? new : pag, key, val);
1678 | X
1679 | X		off = ino[1];
1680 | X		n -= 2;
1681 | X	}
1682 | X
1683 | X	debug(("%d split %d/%d\n", ((short *) cur)[0] / 2, 
1684 | X	       ((short *) new)[0] / 2,
1685 | X	       ((short *) pag)[0] / 2));
1686 | X}
1687 | X
1688 | X/*
1689 | X * check page sanity: 
1690 | X * number of entries should be something
1691 | X * reasonable, and all offsets in the index should be in order.
1692 | X * this could be made more rigorous.
1693 | X */
1694 | Xint
1695 | Xchkpage(pag)
1696 | Xchar *pag;
1697 | X{
1698 | X	register int n;
1699 | X	register int off;
1700 | X	register short *ino = (short *) pag;
1701 | X
1702 | X	if ((n = ino[0]) < 0 || n > PBLKSIZ / sizeof(short))
1703 | X		return 0;
1704 | X
1705 | X	if (n > 0) {
1706 | X		off = PBLKSIZ;
1707 | X		for (ino++; n > 0; ino += 2) {
1708 | X			if (ino[0] > off || ino[1] > off ||
1709 | X			    ino[1] > ino[0])
1710 | X				return 0;
1711 | X			off = ino[1];
1712 | X			n -= 2;
1713 | X		}
1714 | X	}
1715 | X	return 1;
1716 | X}
1717 | @@@End of pair.c
1718 | echo x - pair.h 1>&2
1719 | sed 's/^X//' >pair.h <<'@@@End of pair.h'
1720 | Xextern int fitpair proto((char *, int));
1721 | Xextern void  putpair proto((char *, datum, datum));
1722 | Xextern datum	getpair proto((char *, datum));
1723 | Xextern int  delpair proto((char *, datum));
1724 | Xextern int  chkpage proto((char *));
1725 | Xextern datum getnkey proto((char *, int));
1726 | Xextern void splpage proto((char *, char *, long));
1727 | X#ifdef SEEDUPS
1728 | Xextern int duppair proto((char *, datum));
1729 | X#endif
1730 | @@@End of pair.h
1731 | echo x - readme.ms 1>&2
1732 | sed 's/^X//' >readme.ms <<'@@@End of readme.ms'
1733 | X.\" tbl | readme.ms | [tn]roff -ms | ...
1734 | X.\" note the "C" (courier) and "CB" fonts: you will probably have to
1735 | X.\" change these.
1736 | X.\" $Id: readme.ms,v 1.1 90/12/13 13:09:15 oz Exp Locker: oz $
1737 | X
1738 | X.de P1
1739 | X.br
1740 | X.nr dT 4
1741 | X.nf
1742 | X.ft C
1743 | X.sp .5
1744 | X.nr t \\n(dT*\\w'x'u
1745 | X.ta 1u*\\ntu 2u*\\ntu 3u*\\ntu 4u*\\ntu 5u*\\ntu 6u*\\ntu 7u*\\ntu 8u*\\ntu 9u*\\ntu 10u*\\ntu 11u*\\ntu 12u*\\ntu 13u*\\ntu 14u*\\ntu
1746 | X..
1747 | X.de P2
1748 | X.br
1749 | X.ft 1
1750 | X.br
1751 | X.sp .5
1752 | X.br
1753 | X.fi
1754 | X..
1755 | X.\" CW uses the typewriter/courier font.
1756 | X.de CW
1757 | X\fC\\$1\\fP\\$2
1758 | X..
1759 | X
1760 | X.\" Footnote numbering [by Henry Spencer]
1761 | X.\" <text>\*f for a footnote number..
1762 | X.\" .FS
1763 | X.\" \*F <footnote text>
1764 | X.\" .FE
1765 | X.\"
1766 | X.ds f \\u\\s-2\\n+f\\s+2\\d
1767 | X.nr f 0 1
1768 | X.ds F \\n+F.
1769 | X.nr F 0 1
1770 | X
1771 | X.ND
1772 | X.LP
1773 | X.TL
1774 | X\fIsdbm\fP \(em Substitute DBM
1775 | X.br
1776 | Xor
1777 | X.br
1778 | XBerkeley \fIndbm\fP for Every UN*X\** Made Simple
1779 | X.AU
1780 | XOzan (oz) Yigit
1781 | X.AI
1782 | XThe Guild of PD Software Toolmakers
1783 | XToronto - Canada
1784 | X.sp
1785 | Xoz@nexus.yorku.ca
1786 | X.LP
1787 | X.FS
1788 | XUN*X is not a trademark of any (dis)organization.
1789 | X.FE
1790 | X.sp 2
1791 | X\fIImplementation is the sincerest form of flattery. \(em L. Peter Deutsch\fP
1792 | X.SH
1793 | XA The Clone of the \fIndbm\fP library
1794 | X.PP
1795 | XThe sources accompanying this notice \(em \fIsdbm\fP \(em constitute
1796 | Xthe first public release (Dec. 1990) of a complete clone of
1797 | Xthe Berkeley UN*X \fIndbm\fP library. The \fIsdbm\fP library is meant to
1798 | Xclone the proven functionality of \fIndbm\fP as closely as possible,
1799 | Xincluding a few improvements. It is practical, easy to understand, and
1800 | Xcompatible.
1801 | XThe \fIsdbm\fP library is not derived from any licensed, proprietary or
1802 | Xcopyrighted software.
1803 | X.PP
1804 | XThe \fIsdbm\fP implementation is based on a 1978 algorithm
1805 | X[Lar78] by P.-A. (Paul) Larson known as ``Dynamic Hashing''.
1806 | XIn the course of searching for a substitute for \fIndbm\fP, I
1807 | Xprototyped three different external-hashing algorithms [Lar78, Fag79, Lit80]
1808 | Xand ultimately chose Larson's algorithm as a basis of the \fIsdbm\fP
1809 | Ximplementation. The Bell Labs
1810 | X\fIdbm\fP (and therefore \fIndbm\fP) is based on an algorithm invented by
1811 | XKen Thompson, [Tho90, Tor87] and predates Larson's work.
1812 | X.PP
1813 | XThe \fIsdbm\fR programming interface is totally compatible
1814 | Xwith \fIndbm\fP and includes a slight improvement in database initialization.
1815 | XIt is also expected to be binary-compatible under most UN*X versions that
1816 | Xsupport the \fIndbm\fP library.
1817 | X.PP
1818 | XThe \fIsdbm\fP implementation shares the shortcomings of the \fIndbm\fP
1819 | Xlibrary, as a side effect of various simplifications to the original Larson
1820 | Xalgorithm. It does produce \fIholes\fP in the page file as it writes
1821 | Xpages past the end of file. (Larson's paper include a clever solution to
1822 | Xthis problem that is a result of using the hash value directly as a block
1823 | Xaddress.) On the other hand, extensive tests seem to indicate that \fIsdbm\fP
1824 | Xcreates fewer holes in general, and the resulting pagefiles are
1825 | Xsmaller. The \fIsdbm\fP implementation is also faster than \fIndbm\fP
1826 | Xin database creation.
1827 | XUnlike the \fIndbm\fP, the \fIsdbm\fP
1828 | X.CW store
1829 | Xoperation will not ``wander away'' trying to split its
1830 | Xdata pages to insert a datum that \fIcannot\fP (due to elaborate worst-case
1831 | Xsituations) be inserted. (It will fail after a pre-defined number of attempts.)
1832 | X.SH
1833 | XImportant Compatibility Warning
1834 | X.PP
1835 | XThe \fIsdbm\fP and \fIndbm\fP
1836 | Xlibraries \fIcannot\fP share databases: one cannot read the (dir/pag)
1837 | Xdatabase created by the other. This is due to the differences
1838 | Xbetween the \fIndbm\fP and \fIsdbm\fP algorithms\**, 
1839 | X.FS
1840 | XTorek's discussion [Tor87]
1841 | Xindicates that \fIdbm/ndbm\fP implementations use the hash
1842 | Xvalue to traverse the radix trie differently than \fIsdbm\fP
1843 | Xand as a result, the page indexes are generated in \fIdifferent\fP order.
1844 | XFor more information, send e-mail to the author.
1845 | X.FE
1846 | Xand the hash functions
1847 | Xused.
1848 | XIt is easy to convert between the \fIdbm/ndbm\fP databases and \fIsdbm\fP
1849 | Xby ignoring the index completely: see
1850 | X.CW dbd ,
1851 | X.CW dbu
1852 | Xetc.
1853 | X.R
1854 | X.LP
1855 | X.SH
1856 | XNotice of Intellectual Property
1857 | X.LP
1858 | X\fIThe entire\fP sdbm  \fIlibrary package, as authored by me,\fP Ozan S. Yigit,
1859 | X\fIis hereby placed in the public domain.\fP As such, the author is not
1860 | Xresponsible for the consequences of use of this software, no matter how
1861 | Xawful, even if they arise from defects in it. There is no expressed or
1862 | Ximplied warranty for the \fIsdbm\fP library.
1863 | X.PP
1864 | XSince the \fIsdbm\fP
1865 | Xlibrary package is in the public domain, this \fIoriginal\fP
1866 | Xrelease or any additional public-domain releases of the modified original
1867 | Xcannot possibly (by definition) be withheld from you. Also by definition,
1868 | XYou (singular) have all the rights to this code (including the right to
1869 | Xsell without permission, the right to hoard\**
1870 | X.FS
1871 | XYou cannot really hoard something that is available to the public at
1872 | Xlarge, but try if it makes you feel any better.
1873 | X.FE
1874 | Xand the right to do other icky things as
1875 | Xyou see fit) but those rights are also granted to everyone else.
1876 | X.PP
1877 | XPlease note that all previous distributions of this software contained
1878 | Xa copyright (which is now dropped) to protect its
1879 | Xorigins and its current public domain status against any possible claims
1880 | Xand/or challenges.
1881 | X.SH
1882 | XAcknowledgments
1883 | X.PP
1884 | XMany people have been very helpful and supportive.  A partial list would
1885 | Xnecessarily include Rayan Zacherissen (who contributed the man page,
1886 | Xand also hacked a MMAP version of \fIsdbm\fP),
1887 | XArnold Robbins, Chris Lewis,
1888 | XBill Davidsen, Henry Spencer, Geoff Collyer, Rich Salz (who got me started
1889 | Xin the first place), Johannes Ruschein
1890 | X(who did the minix port) and David Tilbrook. I thank you all.
1891 | X.SH
1892 | XDistribution Manifest and Notes
1893 | X.LP
1894 | XThis distribution of \fIsdbm\fP includes (at least) the following:
1895 | X.P1
1896 | X	CHANGES		change log
1897 | X	README		this file.
1898 | X	biblio		a small bibliography on external hashing
1899 | X	dba.c		a crude (n/s)dbm page file analyzer
1900 | X	dbd.c		a crude (n/s)dbm page file dumper (for conversion)
1901 | X	dbe.1		man page for dbe.c
1902 | X	dbe.c		Janick's database editor
1903 | X	dbm.c		a dbm library emulation wrapper for ndbm/sdbm
1904 | X	dbm.h		header file for the above
1905 | X	dbu.c		a crude db management utility
1906 | X	hash.c		hashing function
1907 | X	makefile	guess.
1908 | X	pair.c		page-level routines (posted earlier)
1909 | X	pair.h		header file for the above
1910 | X	readme.ms	troff source for the README file
1911 | X	sdbm.3		man page
1912 | X	sdbm.c		the real thing
1913 | X	sdbm.h		header file for the above
1914 | X	tune.h		place for tuning & portability thingies
1915 | X	util.c		miscellaneous
1916 | X.P2
1917 | X.PP
1918 | X.CW dbu
1919 | Xis a simple database manipulation program\** that tries to look
1920 | X.FS
1921 | XThe 
1922 | X.CW dbd ,
1923 | X.CW dba ,
1924 | X.CW dbu
1925 | Xutilities are quick hacks and are not fit for production use. They were
1926 | Xdeveloped late one night, just to test out \fIsdbm\fP, and convert some
1927 | Xdatabases.
1928 | X.FE
1929 | Xlike Bell Labs'
1930 | X.CW cbt
1931 | Xutility. It is currently incomplete in functionality.
1932 | XI use
1933 | X.CW dbu
1934 | Xto test out the routines: it takes (from stdin) tab separated
1935 | Xkey/value pairs for commands like
1936 | X.CW build
1937 | Xor
1938 | X.CW insert
1939 | Xor takes keys for
1940 | Xcommands like
1941 | X.CW delete
1942 | Xor
1943 | X.CW look .
1944 | X.P1
1945 | X	dbu <build|creat|look|insert|cat|delete> dbmfile
1946 | X.P2
1947 | X.PP
1948 | X.CW dba
1949 | Xis a crude analyzer of \fIdbm/sdbm/ndbm\fP
1950 | Xpage files. It scans the entire
1951 | Xpage file, reporting page level statistics, and totals at the end.
1952 | X.PP
1953 | X.CW dbd
1954 | Xis a crude dump program for \fIdbm/ndbm/sdbm\fP
1955 | Xdatabases. It ignores the
1956 | Xbitmap, and dumps the data pages in sequence. It can be used to create
1957 | Xinput for the
1958 | X.CW dbu 
1959 | Xutility.
1960 | XNote that
1961 | X.CW dbd
1962 | Xwill skip any NULLs in the key and data
1963 | Xfields, thus is unsuitable to convert some peculiar databases that
1964 | Xinsist in including the terminating null.
1965 | X.PP
1966 | XI have also included a copy of the
1967 | X.CW dbe
1968 | X(\fIndbm\fP DataBase Editor) by Janick Bergeron [janick@bnr.ca] for
1969 | Xyour pleasure. You may find it more useful than the little
1970 | X.CW dbu
1971 | Xutility.
1972 | X.PP
1973 | X.CW dbm.[ch]
1974 | Xis a \fIdbm\fP library emulation on top of \fIndbm\fP
1975 | X(and hence suitable for \fIsdbm\fP). Written by Robert Elz.
1976 | X.PP
1977 | XThe \fIsdbm\fP
1978 | Xlibrary has been around in beta test for quite a long time, and from whatever
1979 | Xlittle feedback I received (maybe no news is good news), I believe it has been
1980 | Xfunctioning without any significant problems. I would, of course, appreciate
1981 | Xall fixes and/or improvements. Portability enhancements would especially be
1982 | Xuseful.
1983 | X.SH
1984 | XImplementation Issues
1985 | X.PP
1986 | XHash functions:
1987 | XThe algorithm behind \fIsdbm\fP implementation needs a good bit-scrambling
1988 | Xhash function to be effective. I ran into a set of constants for a simple
1989 | Xhash function that seem to help \fIsdbm\fP perform better than \fIndbm\fP
1990 | Xfor various inputs:
1991 | X.P1
1992 | X	/*
1993 | X	 * polynomial conversion ignoring overflows
1994 | X	 * 65599 nice. 65587 even better.
1995 | X	 */
1996 | X	long
1997 | X	dbm_hash(char *str, int len) {
1998 | X		register unsigned long n = 0;
1999 | X	
2000 | X		while (len--)
2001 | X			n = n * 65599 + *str++;
2002 | X		return n;
2003 | X	}
2004 | X.P2
2005 | X.PP
2006 | XThere may be better hash functions for the purposes of dynamic hashing.
2007 | XTry your favorite, and check the pagefile. If it contains too many pages
2008 | Xwith too many holes, (in relation to this one for example) or if
2009 | X\fIsdbm\fP
2010 | Xsimply stops working (fails after 
2011 | X.CW SPLTMAX
2012 | Xattempts to split) when you feed your
2013 | XNEWS 
2014 | X.CW history
2015 | Xfile to it, you probably do not have a good hashing function.
2016 | XIf you do better (for different types of input), I would like to know
2017 | Xabout the function you use.
2018 | X.PP
2019 | XBlock sizes: It seems (from various tests on a few machines) that a page
2020 | Xfile block size
2021 | X.CW PBLKSIZ
2022 | Xof 1024 is by far the best for performance, but
2023 | Xthis also happens to limit the size of a key/value pair. Depending on your
2024 | Xneeds, you may wish to increase the page size, and also adjust
2025 | X.CW PAIRMAX
2026 | X(the maximum size of a key/value pair allowed: should always be at least
2027 | Xthree words smaller than
2028 | X.CW PBLKSIZ .)
2029 | Xaccordingly. The system-wide version of the library
2030 | Xshould probably be
2031 | Xconfigured with 1024 (distribution default), as this appears to be sufficient
2032 | Xfor most common uses of \fIsdbm\fP.
2033 | X.SH
2034 | XPortability
2035 | X.PP
2036 | XThis package has been tested in many different UN*Xes even including minix,
2037 | Xand appears to be reasonably portable. This does not mean it will port
2038 | Xeasily to non-UN*X systems.
2039 | X.SH
2040 | XNotes and Miscellaneous
2041 | X.PP
2042 | XThe \fIsdbm\fP is not a very complicated package, at least not after you
2043 | Xfamiliarize yourself with the literature on external hashing. There are
2044 | Xother interesting algorithms in existence that ensure (approximately)
2045 | Xsingle-read access to a data value associated with any key. These are
2046 | Xdirectory-less schemes such as \fIlinear hashing\fP [Lit80] (+ Larson
2047 | Xvariations), \fIspiral storage\fP [Mar79] or directory schemes such as
2048 | X\fIextensible hashing\fP [Fag79] by Fagin et al. I do hope these sources
2049 | Xprovide a reasonable playground for experimentation with other algorithms.
2050 | XSee the June 1988 issue of ACM Computing Surveys [Enb88] for an
2051 | Xexcellent overview of the field. 
2052 | X.PG
2053 | X.SH
2054 | XReferences
2055 | X.LP
2056 | X.IP [Lar78] 4m
2057 | XP.-A. Larson,
2058 | X``Dynamic Hashing'', \fIBIT\fP, vol.  18,  pp. 184-201, 1978.
2059 | X.IP [Tho90] 4m
2060 | XKen Thompson, \fIprivate communication\fP, Nov. 1990
2061 | X.IP [Lit80] 4m
2062 | XW. Litwin,
2063 | X`` Linear Hashing: A new tool  for  file  and table addressing'',
2064 | X\fIProceedings of the 6th Conference on Very Large  Dabatases  (Montreal)\fP,
2065 | Xpp.  212-223,  Very Large Database Foundation, Saratoga, Calif., 1980.
2066 | X.IP [Fag79] 4m
2067 | XR. Fagin, J.  Nievergelt,  N.  Pippinger,  and  H.  R. Strong,
2068 | X``Extendible Hashing - A Fast Access Method for Dynamic Files'',
2069 | X\fIACM Trans. Database Syst.\fP, vol. 4,  no.3, pp. 315-344, Sept. 1979.
2070 | X.IP [Wal84] 4m
2071 | XRich Wales,
2072 | X``Discussion of "dbm" data base system'', \fIUSENET newsgroup unix.wizards\fP,
2073 | XJan. 1984.
2074 | X.IP [Tor87] 4m
2075 | XChris Torek,
2076 | X``Re:  dbm.a  and  ndbm.a  archives'', \fIUSENET newsgroup comp.unix\fP,
2077 | X1987.
2078 | X.IP [Mar79] 4m
2079 | XG. N. Martin,
2080 | X``Spiral Storage: Incrementally  Augmentable  Hash  Addressed  Storage'',
2081 | X\fITechnical Report #27\fP, University of Varwick, Coventry, U.K., 1979.
2082 | X.IP [Enb88] 4m
2083 | XR. J. Enbody and H. C. Du,
2084 | X``Dynamic Hashing  Schemes'',\fIACM Computing Surveys\fP,
2085 | Xvol. 20, no. 2, pp. 85-113, June 1988.
2086 | @@@End of readme.ms
2087 | echo x - readme.txt 1>&2
2088 | sed 's/^X//' >readme.txt <<'@@@End of readme.txt'
2089 | X
2090 | X
2091 | X
2092 | X
2093 | X
2094 | X
2095 | X                   sdbm - Substitute DBM
2096 | X                             or
2097 | X        Berkeley ndbm for Every UN*X[1] Made Simple
2098 | X
2099 | X                      Ozan (oz) Yigit
2100 | X
2101 | X            The Guild of PD Software Toolmakers
2102 | X                      Toronto - Canada
2103 | X
2104 | X                     oz@nexus.yorku.ca
2105 | X
2106 | X
2107 | X
2108 | XImplementation is the sincerest form of flattery. - L. Peter
2109 | XDeutsch
2110 | X
2111 | XA The Clone of the ndbm library
2112 | X
2113 | X     The sources accompanying this notice - sdbm  -  consti-
2114 | Xtute  the  first  public  release  (Dec. 1990) of a complete
2115 | Xclone of the Berkeley UN*X ndbm library. The sdbm library is
2116 | Xmeant  to  clone the proven functionality of ndbm as closely
2117 | Xas possible, including a few improvements. It is  practical,
2118 | Xeasy to understand, and compatible.  The sdbm library is not
2119 | Xderived  from  any  licensed,  proprietary  or   copyrighted
2120 | Xsoftware.
2121 | X
2122 | X     The sdbm implementation is based on  a  1978  algorithm
2123 | X[Lar78] by P.-A. (Paul) Larson known as ``Dynamic Hashing''.
2124 | XIn the course of searching for a substitute for ndbm, I pro-
2125 | Xtotyped  three different external-hashing algorithms [Lar78,
2126 | XFag79, Lit80] and ultimately chose Larson's algorithm  as  a
2127 | Xbasis  of  the  sdbm  implementation. The Bell Labs dbm (and
2128 | Xtherefore ndbm) is based on an  algorithm  invented  by  Ken
2129 | XThompson, [Tho90, Tor87] and predates Larson's work.
2130 | X
2131 | X     The sdbm programming interface  is  totally  compatible
2132 | Xwith ndbm and includes a slight improvement in database ini-
2133 | Xtialization.  It is also expected  to  be  binary-compatible
2134 | Xunder most UN*X versions that support the ndbm library.
2135 | X
2136 | X     The sdbm implementation shares the shortcomings of  the
2137 | Xndbm library, as a side effect of various simplifications to
2138 | Xthe original Larson algorithm. It does produce holes in  the
2139 | Xpage file as it writes pages past the end of file. (Larson's
2140 | Xpaper include a clever solution to this problem  that  is  a
2141 | Xresult of using the hash value directly as a block address.)
2142 | XOn the other hand, extensive tests  seem  to  indicate  that
2143 | Xsdbm creates fewer holes in general, and the resulting page-
2144 | Xfiles are smaller. The sdbm implementation  is  also  faster
2145 | Xthan  ndbm  in database creation.  Unlike the ndbm, the sdbm
2146 | X_________________________
2147 | X
2148 | X  [1] UN*X is not a trademark of any (dis)organization.
2149 | X
2150 | X
2151 | X
2152 | X
2153 | X
2154 | X
2155 | X
2156 | X
2157 | X
2158 | X                           - 2 -
2159 | X
2160 | X
2161 | Xstore operation will not ``wander away'' trying to split its
2162 | Xdata  pages  to insert a datum that cannot (due to elaborate
2163 | Xworst-case situations) be inserted. (It will  fail  after  a
2164 | Xpre-defined number of attempts.)
2165 | X
2166 | XImportant Compatibility Warning
2167 | X
2168 | X     The sdbm and ndbm libraries cannot share databases: one
2169 | Xcannot  read  the  (dir/pag)  database created by the other.
2170 | XThis is due to the differences between  the  ndbm  and  sdbm
2171 | Xalgorithms[2], and the hash functions used.  It is  easy  to
2172 | Xconvert  between the dbm/ndbm databases and sdbm by ignoring
2173 | Xthe index completely: see dbd, dbu etc.
2174 | X
2175 | X
2176 | XNotice of Intellectual Property
2177 | X
2178 | XThe entire sdbm  library package, as authored by me, Ozan S.
2179 | XYigit,  is  hereby placed in the public domain. As such, the
2180 | Xauthor is not responsible for the  consequences  of  use  of
2181 | Xthis  software, no matter how awful, even if they arise from
2182 | Xdefects in it. There is no expressed or implied warranty for
2183 | Xthe sdbm library.
2184 | X
2185 | X     Since the sdbm library package is in the public domain,
2186 | Xthis   original  release  or  any  additional  public-domain
2187 | Xreleases of the modified original cannot possibly (by defin-
2188 | Xition) be withheld from you. Also by definition, You (singu-
2189 | Xlar) have all the rights to this code (including  the  right
2190 | Xto sell without permission, the right to  hoard[3]  and  the
2191 | Xright  to  do  other  icky  things as you see fit) but those
2192 | Xrights are also granted to everyone else.
2193 | X
2194 | X     Please note that all  previous  distributions  of  this
2195 | Xsoftware  contained  a  copyright  (which is now dropped) to
2196 | Xprotect its origins and its  current  public  domain  status
2197 | Xagainst any possible claims and/or challenges.
2198 | X
2199 | XAcknowledgments
2200 | X
2201 | X     Many people have been very helpful and  supportive.   A
2202 | Xpartial  list  would  necessarily  include Rayan Zacherissen
2203 | X(who contributed the  man  page,  and  also  hacked  a  MMAP
2204 | X_________________________
2205 | X
2206 | X  [2] Torek's   discussion   [Tor87]   indicates   that
2207 | Xdbm/ndbm implementations use the hash value to traverse
2208 | Xthe radix trie differently than sdbm and as  a  result,
2209 | Xthe page indexes are generated in different order.  For
2210 | Xmore information, send e-mail to the author.
2211 | X  [3] You  cannot really hoard something that is avail-
2212 | Xable to the public at large, but try if  it  makes  you
2213 | Xfeel any better.
2214 | X
2215 | X
2216 | X
2217 | X
2218 | X
2219 | X
2220 | X
2221 | X
2222 | X
2223 | X
2224 | X                           - 3 -
2225 | X
2226 | X
2227 | Xversion of sdbm), Arnold Robbins, Chris Lewis,  Bill  David-
2228 | Xsen,  Henry  Spencer,  Geoff  Collyer, Rich Salz (who got me
2229 | Xstarted in the first place), Johannes Ruschein (who did  the
2230 | Xminix port) and David Tilbrook. I thank you all.
2231 | X
2232 | XDistribution Manifest and Notes
2233 | X
2234 | XThis distribution of sdbm includes (at least) the following:
2235 | X
2236 | X    CHANGES     change log
2237 | X    README      this file.
2238 | X    biblio      a small bibliography on external hashing
2239 | X    dba.c       a crude (n/s)dbm page file analyzer
2240 | X    dbd.c       a crude (n/s)dbm page file dumper (for conversion)
2241 | X    dbe.1       man page for dbe.c
2242 | X    dbe.c       Janick's database editor
2243 | X    dbm.c       a dbm library emulation wrapper for ndbm/sdbm
2244 | X    dbm.h       header file for the above
2245 | X    dbu.c       a crude db management utility
2246 | X    hash.c      hashing function
2247 | X    makefile    guess.
2248 | X    pair.c      page-level routines (posted earlier)
2249 | X    pair.h      header file for the above
2250 | X    readme.ms   troff source for the README file
2251 | X    sdbm.3      man page
2252 | X    sdbm.c      the real thing
2253 | X    sdbm.h      header file for the above
2254 | X    tune.h      place for tuning & portability thingies
2255 | X    util.c      miscellaneous
2256 | X
2257 | X     dbu is a simple database manipulation  program[4]  that
2258 | Xtries to look like Bell Labs' cbt utility. It  is  currently
2259 | Xincomplete in functionality.  I use dbu to test out the rou-
2260 | Xtines: it takes (from stdin) tab separated  key/value  pairs
2261 | Xfor commands like build or insert or takes keys for commands
2262 | Xlike delete or look.
2263 | X
2264 | X    dbu <build|creat|look|insert|cat|delete> dbmfile
2265 | X
2266 | X     dba is a crude analyzer of dbm/sdbm/ndbm page files. It
2267 | Xscans the entire page file, reporting page level statistics,
2268 | Xand totals at the end.
2269 | X
2270 | X     dbd is a crude dump  program  for  dbm/ndbm/sdbm  data-
2271 | Xbases.  It  ignores  the bitmap, and dumps the data pages in
2272 | Xsequence. It can be used to create input for the  dbu  util-
2273 | Xity.   Note that dbd will skip any NULLs in the key and data
2274 | Xfields,  thus  is  unsuitable  to  convert   some   peculiar
2275 | X_________________________
2276 | X
2277 | X  [4] The dbd, dba, dbu utilities are quick  hacks  and
2278 | Xare  not  fit  for  production use. They were developed
2279 | Xlate one night, just to test out sdbm, and convert some
2280 | Xdatabases.
2281 | X
2282 | X
2283 | X
2284 | X
2285 | X
2286 | X
2287 | X
2288 | X
2289 | X
2290 | X                           - 4 -
2291 | X
2292 | X
2293 | Xdatabases that insist in including the terminating null.
2294 | X
2295 | X     I have also included a copy of the dbe  (ndbm  DataBase
2296 | XEditor)  by  Janick Bergeron [janick@bnr.ca] for your pleas-
2297 | Xure. You may find it more useful than the little  dbu  util-
2298 | Xity.
2299 | X
2300 | X     dbm.[ch] is a dbm library emulation on top of ndbm (and
2301 | Xhence suitable for sdbm). Written by Robert Elz.
2302 | X
2303 | X     The sdbm library has been around in beta test for quite
2304 | Xa  long  time,  and from whatever little feedback I received
2305 | X(maybe no news is good news), I believe it  has  been  func-
2306 | Xtioning  without  any  significant  problems.  I  would,  of
2307 | Xcourse, appreciate all fixes and/or improvements.  Portabil-
2308 | Xity enhancements would especially be useful.
2309 | X
2310 | XImplementation Issues
2311 | X
2312 | X     Hash functions: The algorithm behind  sdbm  implementa-
2313 | Xtion  needs a good bit-scrambling hash function to be effec-
2314 | Xtive. I ran into a set of constants for a simple hash  func-
2315 | Xtion  that  seem  to  help sdbm perform better than ndbm for
2316 | Xvarious inputs:
2317 | X
2318 | X    /*
2319 | X     * polynomial conversion ignoring overflows
2320 | X     * 65599 nice. 65587 even better.
2321 | X     */
2322 | X    long
2323 | X    dbm_hash(char *str, int len) {
2324 | X        register unsigned long n = 0;
2325 | X
2326 | X        while (len--)
2327 | X            n = n * 65599 + *str++;
2328 | X        return n;
2329 | X    }
2330 | X
2331 | X     There may be better hash functions for the purposes  of
2332 | Xdynamic hashing.  Try your favorite, and check the pagefile.
2333 | XIf it contains too many pages with too many holes, (in rela-
2334 | Xtion  to this one for example) or if sdbm simply stops work-
2335 | Xing (fails after SPLTMAX attempts to split)  when  you  feed
2336 | Xyour  NEWS  history  file  to it, you probably do not have a
2337 | Xgood hashing function.  If  you  do  better  (for  different
2338 | Xtypes of input), I would like to know about the function you
2339 | Xuse.
2340 | X
2341 | X     Block sizes: It seems (from  various  tests  on  a  few
2342 | Xmachines)  that a page file block size PBLKSIZ of 1024 is by
2343 | Xfar the best for performance, but this also happens to limit
2344 | Xthe  size  of a key/value pair. Depending on your needs, you
2345 | Xmay wish to increase the page size, and also adjust  PAIRMAX
2346 | X(the maximum size of a key/value pair allowed: should always
2347 | X
2348 | X
2349 | X
2350 | X
2351 | X
2352 | X
2353 | X
2354 | X
2355 | X
2356 | X                           - 5 -
2357 | X
2358 | X
2359 | Xbe at least three words smaller than PBLKSIZ.)  accordingly.
2360 | XThe  system-wide  version  of the library should probably be
2361 | Xconfigured with 1024 (distribution default), as this appears
2362 | Xto be sufficient for most common uses of sdbm.
2363 | X
2364 | XPortability
2365 | X
2366 | X     This package has been tested in many  different  UN*Xes
2367 | Xeven including minix, and appears to be reasonably portable.
2368 | XThis does not mean it will port easily to non-UN*X systems.
2369 | X
2370 | XNotes and Miscellaneous
2371 | X
2372 | X     The sdbm is not a very complicated  package,  at  least
2373 | Xnot  after  you  familiarize yourself with the literature on
2374 | Xexternal hashing. There are other interesting algorithms  in
2375 | Xexistence  that ensure (approximately) single-read access to
2376 | Xa data value associated with any key. These  are  directory-
2377 | Xless schemes such as linear hashing [Lit80] (+ Larson varia-
2378 | Xtions), spiral storage [Mar79] or directory schemes such  as
2379 | Xextensible  hashing  [Fag79] by Fagin et al. I do hope these
2380 | Xsources provide a reasonable playground for  experimentation
2381 | Xwith  other algorithms.  See the June 1988 issue of ACM Com-
2382 | Xputing Surveys [Enb88] for  an  excellent  overview  of  the
2383 | Xfield.
2384 | X
2385 | XReferences
2386 | X
2387 | X
2388 | X[Lar78]
2389 | X    P.-A. Larson, ``Dynamic Hashing'', BIT, vol.   18,   pp.
2390 | X    184-201, 1978.
2391 | X
2392 | X[Tho90]
2393 | X    Ken Thompson, private communication, Nov. 1990
2394 | X
2395 | X[Lit80]
2396 | X    W. Litwin, `` Linear Hashing: A new tool  for  file  and
2397 | X    table addressing'', Proceedings of the 6th Conference on
2398 | X    Very Large  Dabatases  (Montreal), pp.   212-223,   Very
2399 | X    Large Database Foundation, Saratoga, Calif., 1980.
2400 | X
2401 | X[Fag79]
2402 | X    R. Fagin, J.  Nievergelt,  N.  Pippinger,  and   H.   R.
2403 | X    Strong,  ``Extendible Hashing - A Fast Access Method for
2404 | X    Dynamic Files'', ACM  Trans.  Database  Syst.,  vol.  4,
2405 | X    no.3, pp. 315-344, Sept. 1979.
2406 | X
2407 | X[Wal84]
2408 | X    Rich Wales, ``Discussion of "dbm"  data  base  system'',
2409 | X    USENET newsgroup unix.wizards, Jan. 1984.
2410 | X
2411 | X[Tor87]
2412 | X    Chris Torek,  ``Re:   dbm.a   and   ndbm.a   archives'',
2413 | X
2414 | X
2415 | X
2416 | X
2417 | X
2418 | X
2419 | X
2420 | X
2421 | X
2422 | X                           - 6 -
2423 | X
2424 | X
2425 | X    USENET newsgroup comp.unix, 1987.
2426 | X
2427 | X[Mar79]
2428 | X    G. N. Martin, ``Spiral Storage: Incrementally   Augment-
2429 | X    able   Hash  Addressed  Storage'', Technical Report #27,
2430 | X    University of Varwick, Coventry, U.K., 1979.
2431 | X
2432 | X[Enb88]
2433 | X    R.  J.  Enbody  and  H.   C.   Du,   ``Dynamic   Hashing
2434 | X    Schemes'',ACM  Computing  Surveys,  vol.  20, no. 2, pp.
2435 | X    85-113, June 1988.
2436 | X
2437 | X
2438 | X
2439 | X
2440 | X
2441 | X
2442 | X
2443 | X
2444 | X
2445 | X
2446 | X
2447 | X
2448 | X
2449 | X
2450 | X
2451 | X
2452 | X
2453 | X
2454 | X
2455 | X
2456 | X
2457 | X
2458 | X
2459 | X
2460 | X
2461 | X
2462 | X
2463 | X
2464 | X
2465 | X
2466 | X
2467 | X
2468 | X
2469 | X
2470 | X
2471 | X
2472 | X
2473 | X
2474 | X
2475 | X
2476 | X
2477 | X
2478 | X
2479 | X
2480 | X
2481 | X
2482 | X
2483 | X
2484 | X
2485 | @@@End of readme.txt
2486 | echo x - sdbm.3 1>&2
2487 | sed 's/^X//' >sdbm.3 <<'@@@End of sdbm.3'
2488 | X.\" $Id: sdbm.3,v 1.2 90/12/13 13:00:57 oz Exp $
2489 | X.TH SDBM 3 "1 March 1990"
2490 | X.SH NAME
2491 | Xsdbm, dbm_open, dbm_prep, dbm_close, dbm_fetch, dbm_store, dbm_delete, dbm_firstkey, dbm_nextkey, dbm_hash, dbm_rdonly, dbm_error, dbm_clearerr, dbm_dirfno, dbm_pagfno \- data base subroutines
2492 | X.SH SYNOPSIS
2493 | X.nf
2494 | X.ft B
2495 | X#include <sdbm.h>
2496 | X.sp
2497 | Xtypedef struct {
2498 | X	char *dptr;
2499 | X	int dsize;
2500 | X} datum;
2501 | X.sp
2502 | Xdatum nullitem = { NULL, 0 };
2503 | X.sp
2504 | X\s-1DBM\s0 *dbm_open(char *file, int flags, int mode)
2505 | X.sp
2506 | X\s-1DBM\s0 *dbm_prep(char *dirname, char *pagname, int flags, int mode)
2507 | X.sp
2508 | Xvoid dbm_close(\s-1DBM\s0 *db)
2509 | X.sp
2510 | Xdatum dbm_fetch(\s-1DBM\s0 *db, key)
2511 | X.sp
2512 | Xint dbm_store(\s-1DBM\s0 *db, datum key, datum val, int flags)
2513 | X.sp
2514 | Xint dbm_delete(\s-1DBM\s0 *db, datum key)
2515 | X.sp
2516 | Xdatum dbm_firstkey(\s-1DBM\s0 *db)
2517 | X.sp
2518 | Xdatum dbm_nextkey(\s-1DBM\s0 *db)
2519 | X.sp
2520 | Xlong dbm_hash(char *string, int len)
2521 | X.sp
2522 | Xint dbm_rdonly(\s-1DBM\s0 *db)
2523 | Xint dbm_error(\s-1DBM\s0 *db)
2524 | Xdbm_clearerr(\s-1DBM\s0 *db)
2525 | Xint dbm_dirfno(\s-1DBM\s0 *db)
2526 | Xint dbm_pagfno(\s-1DBM\s0 *db)
2527 | X.ft R
2528 | X.fi
2529 | X.SH DESCRIPTION
2530 | X.IX "database library" sdbm "" "\fLsdbm\fR"
2531 | X.IX dbm_open "" "\fLdbm_open\fR \(em open \fLsdbm\fR database"
2532 | X.IX dbm_prep "" "\fLdbm_prep\fR \(em prepare \fLsdbm\fR database"
2533 | X.IX dbm_close "" "\fLdbm_close\fR \(em close \fLsdbm\fR routine"
2534 | X.IX dbm_fetch "" "\fLdbm_fetch\fR \(em fetch \fLsdbm\fR database data"
2535 | X.IX dbm_store "" "\fLdbm_store\fR \(em add data to \fLsdbm\fR database"
2536 | X.IX dbm_delete "" "\fLdbm_delete\fR \(em remove data from \fLsdbm\fR database"
2537 | X.IX dbm_firstkey "" "\fLdbm_firstkey\fR \(em access \fLsdbm\fR database"
2538 | X.IX dbm_nextkey "" "\fLdbm_nextkey\fR \(em access \fLsdbm\fR database"
2539 | X.IX dbm_hash "" "\fLdbm_hash\fR \(em string hash for \fLsdbm\fR database"
2540 | X.IX dbm_rdonly "" "\fLdbm_rdonly\fR \(em return \fLsdbm\fR database read-only mode"
2541 | X.IX dbm_error "" "\fLdbm_error\fR \(em return \fLsdbm\fR database error condition"
2542 | X.IX dbm_clearerr "" "\fLdbm_clearerr\fR \(em clear \fLsdbm\fR database error condition"
2543 | X.IX dbm_dirfno "" "\fLdbm_dirfno\fR \(em return \fLsdbm\fR database bitmap file descriptor"
2544 | X.IX dbm_pagfno "" "\fLdbm_pagfno\fR \(em return \fLsdbm\fR database data file descriptor"
2545 | X.IX "database functions \(em \fLsdbm\fR"  dbm_open  ""  \fLdbm_open\fP
2546 | X.IX "database functions \(em \fLsdbm\fR"  dbm_prep  ""  \fLdbm_prep\fP
2547 | X.IX "database functions \(em \fLsdbm\fR"  dbm_close  ""  \fLdbm_close\fP
2548 | X.IX "database functions \(em \fLsdbm\fR"  dbm_fetch  ""  \fLdbm_fetch\fP
2549 | X.IX "database functions \(em \fLsdbm\fR"  dbm_store  ""  \fLdbm_store\fP
2550 | X.IX "database functions \(em \fLsdbm\fR"  dbm_delete  ""  \fLdbm_delete\fP
2551 | X.IX "database functions \(em \fLsdbm\fR"  dbm_firstkey  ""  \fLdbm_firstkey\fP
2552 | X.IX "database functions \(em \fLsdbm\fR"  dbm_nextkey  ""  \fLdbm_nextkey\fP
2553 | X.IX "database functions \(em \fLsdbm\fR"  dbm_rdonly  ""  \fLdbm_rdonly\fP
2554 | X.IX "database functions \(em \fLsdbm\fR"  dbm_error  ""  \fLdbm_error\fP
2555 | X.IX "database functions \(em \fLsdbm\fR"  dbm_clearerr  ""  \fLdbm_clearerr\fP
2556 | X.IX "database functions \(em \fLsdbm\fR"  dbm_dirfno  ""  \fLdbm_dirfno\fP
2557 | X.IX "database functions \(em \fLsdbm\fR"  dbm_pagfno  ""  \fLdbm_pagfno\fP
2558 | X.LP
2559 | XThis package allows an application to maintain a mapping of <key,value> pairs
2560 | Xin disk files.  This is not to be considered a real database system, but is
2561 | Xstill useful in many simple applications built around fast retrieval of a data
2562 | Xvalue from a key.  This implementation uses an external hashing scheme,
2563 | Xcalled Dynamic Hashing, as described by Per-Aake Larson in BIT 18 (1978) pp.
2564 | X184-201.  Retrieval of any item usually requires a single disk access.
2565 | XThe application interface is compatible with the
2566 | X.IR ndbm (3)
2567 | Xlibrary.
2568 | X.LP
2569 | XAn
2570 | X.B sdbm
2571 | Xdatabase is kept in two files usually given the extensions
2572 | X.B \.dir
2573 | Xand
2574 | X.BR \.pag .
2575 | XThe
2576 | X.B \.dir
2577 | Xfile contains a bitmap representing a forest of binary hash trees, the leaves
2578 | Xof which indicate data pages in the
2579 | X.B \.pag
2580 | Xfile.
2581 | X.LP
2582 | XThe application interface uses the
2583 | X.B datum
2584 | Xstructure to describe both
2585 | X.I keys
2586 | Xand
2587 | X.IR value s.
2588 | XA
2589 | X.B datum
2590 | Xspecifies a byte sequence of
2591 | X.I dsize
2592 | Xsize pointed to by
2593 | X.IR dptr .
2594 | XIf you use
2595 | X.SM ASCII
2596 | Xstrings as
2597 | X.IR key s
2598 | Xor
2599 | X.IR value s,
2600 | Xthen you must decide whether or not to include the terminating
2601 | X.SM NUL
2602 | Xbyte which sometimes defines strings.  Including it will require larger
2603 | Xdatabase files, but it will be possible to get sensible output from a
2604 | X.IR strings (1)
2605 | Xcommand applied to the data file.
2606 | X.LP
2607 | XIn order to allow a process using this package to manipulate multiple
2608 | Xdatabases, the applications interface always requires a
2609 | X.IR handle ,
2610 | Xa
2611 | X.BR "DBM *" ,
2612 | Xto identify the database to be manipulated.  Such a handle can be obtained
2613 | Xfrom the only routines that do not require it, namely
2614 | X.BR dbm_open (\|)
2615 | Xor
2616 | X.BR dbm_prep (\|).
2617 | XEither of these will open or create the two necessary files.  The
2618 | Xdifference is that the latter allows explicitly naming the bitmap and data
2619 | Xfiles whereas
2620 | X.BR dbm_open (\|)
2621 | Xwill take a base file name and call
2622 | X.BR dbm_prep (\|)
2623 | Xwith the default extensions.
2624 | XThe
2625 | X.I flags
2626 | Xand
2627 | X.I mode
2628 | Xparameters are the same as for
2629 | X.BR open (2).
2630 | X.LP
2631 | XTo free the resources occupied while a database handle is active, call
2632 | X.BR dbm_close (\|).
2633 | X.LP
2634 | XGiven a handle, one can retrieve data associated with a key by using the
2635 | X.BR dbm_fetch (\|)
2636 | Xroutine, and associate data with a key by using the
2637 | X.BR dbm_store (\|)
2638 | Xroutine.
2639 | X.LP
2640 | XThe values of the
2641 | X.I flags
2642 | Xparameter for
2643 | X.BR dbm_store (\|)
2644 | Xcan be either
2645 | X.BR \s-1DBM_INSERT\s0 ,
2646 | Xwhich will not change an existing entry with the same key, or
2647 | X.BR \s-1DBM_REPLACE\s0 ,
2648 | Xwhich will replace an existing entry with the same key.
2649 | XKeys are unique within the database.
2650 | X.LP
2651 | XTo delete a key and its associated value use the
2652 | X.BR dbm_delete (\|)
2653 | Xroutine.
2654 | X.LP
2655 | XTo retrieve every key in the database, use a loop like:
2656 | X.sp
2657 | X.nf
2658 | X.ft B
2659 | Xfor (key = dbm_firstkey(db); key.dptr != NULL; key = dbm_nextkey(db))
2660 | X        ;
2661 | X.ft R
2662 | X.fi
2663 | X.LP
2664 | XThe order of retrieval is unspecified.
2665 | X.LP
2666 | XIf you determine that the performance of the database is inadequate or
2667 | Xyou notice clustering or other effects that may be due to the hashing
2668 | Xalgorithm used by this package, you can override it by supplying your
2669 | Xown
2670 | X.BR dbm_hash (\|)
2671 | Xroutine.  Doing so will make the database unintelligable to any other
2672 | Xapplications that do not use your specialized hash function.
2673 | X.sp
2674 | X.LP
2675 | XThe following macros are defined in the header file:
2676 | X.IP
2677 | X.BR dbm_rdonly (\|)
2678 | Xreturns true if the database has been opened read\-only.
2679 | X.IP
2680 | X.BR dbm_error (\|)
2681 | Xreturns true if an I/O error has occurred.
2682 | X.IP
2683 | X.BR dbm_clearerr (\|)
2684 | Xallows you to clear the error flag if you think you know what the error
2685 | Xwas and insist on ignoring it.
2686 | X.IP
2687 | X.BR dbm_dirfno (\|)
2688 | Xreturns the file descriptor associated with the bitmap file.
2689 | X.IP
2690 | X.BR dbm_pagfno (\|)
2691 | Xreturns the file descriptor associated with the data file.
2692 | X.SH SEE ALSO
2693 | X.IR open (2).
2694 | X.SH DIAGNOSTICS
2695 | XFunctions that return a
2696 | X.B "DBM *"
2697 | Xhandle will use
2698 | X.SM NULL
2699 | Xto indicate an error.
2700 | XFunctions that return an
2701 | X.B int
2702 | Xwill use \-1 to indicate an error.  The normal return value in that case is 0.
2703 | XFunctions that return a
2704 | X.B datum
2705 | Xwill return
2706 | X.B nullitem
2707 | Xto indicate an error.
2708 | X.LP
2709 | XAs a special case of
2710 | X.BR dbm_store (\|),
2711 | Xif it is called with the
2712 | X.B \s-1DBM_INSERT\s0
2713 | Xflag and the key already exists in the database, the return value will be 1.
2714 | X.LP
2715 | XIn general, if a function parameter is invalid,
2716 | X.B errno
2717 | Xwill be set to
2718 | X.BR \s-1EINVAL\s0 .
2719 | XIf a write operation is requested on a read-only database,
2720 | X.B errno
2721 | Xwill be set to
2722 | X.BR \s-1ENOPERM\s0 .
2723 | XIf a memory allocation (using
2724 | X.IR malloc (3))
2725 | Xfailed,
2726 | X.B errno
2727 | Xwill be set to
2728 | X.BR \s-1ENOMEM\s0 .
2729 | XFor I/O operation failures
2730 | X.B errno
2731 | Xwill contain the value set by the relevant failed system call, either
2732 | X.IR read (2),
2733 | X.IR write (2),
2734 | Xor
2735 | X.IR lseek (2).
2736 | X.SH AUTHOR
2737 | X.IP "Ozan S. Yigit" (oz@nexus.yorku.ca)
2738 | X.SH BUGS
2739 | XThe sum of key and value data sizes must not exceed
2740 | X.B \s-1PAIRMAX\s0
2741 | X(1008 bytes).
2742 | X.LP
2743 | XThe sum of the key and value data sizes where several keys hash to the
2744 | Xsame value must fit within one bitmap page.
2745 | X.LP
2746 | XThe
2747 | X.B \.pag
2748 | Xfile will contain holes, so its apparent size is larger than its contents.
2749 | XWhen copied through the filesystem the holes will be filled.
2750 | X.LP
2751 | XThe contents of
2752 | X.B datum
2753 | Xvalues returned are in volatile storage.  If you want to retain the values
2754 | Xpointed to, you must copy them immediately before another call to this package.
2755 | X.LP
2756 | XThe only safe way for multiple processes to (read and) update a database at
2757 | Xthe same time, is to implement a private locking scheme outside this package
2758 | Xand open and close the database between lock acquisitions.  It is safe for
2759 | Xmultiple processes to concurrently access a database read-only.
2760 | X.SH APPLICATIONS PORTABILITY
2761 | XFor complete source code compatibility with the Berkeley Unix
2762 | X.IR ndbm (3)
2763 | Xlibrary, the 
2764 | X.B sdbm.h
2765 | Xheader file should be installed in
2766 | X.BR /usr/include/ndbm.h .
2767 | X.LP
2768 | XThe
2769 | X.B nullitem
2770 | Xdata item, and the
2771 | X.BR dbm_prep (\|),
2772 | X.BR dbm_hash (\|),
2773 | X.BR dbm_rdonly (\|),
2774 | X.BR dbm_dirfno (\|),
2775 | Xand
2776 | X.BR dbm_pagfno (\|)
2777 | Xfunctions are unique to this package.
2778 | @@@End of sdbm.3
2779 | echo x - sdbm.c 1>&2
2780 | sed 's/^X//' >sdbm.c <<'@@@End of sdbm.c'
2781 | X/*
2782 | X * sdbm - ndbm work-alike hashed database library
2783 | X * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
2784 | X * author: oz@nexus.yorku.ca
2785 | X * status: public domain.
2786 | X *
2787 | X * core routines
2788 | X */
2789 | X
2790 | X#ifndef lint
2791 | Xstatic char rcsid[] = "$Id: sdbm.c,v 1.16 90/12/13 13:01:31 oz Exp $";
2792 | X#endif
2793 | X
2794 | X#include "sdbm.h"
2795 | X#include "tune.h"
2796 | X#include "pair.h"
2797 | X
2798 | X#include <sys/types.h>
2799 | X#include <sys/stat.h>
2800 | X#ifdef BSD42
2801 | X#include <sys/file.h>
2802 | X#else
2803 | X#include <fcntl.h>
2804 | X#include <memory.h>
2805 | X#endif
2806 | X#include <errno.h>
2807 | X#include <string.h>
2808 | X
2809 | X#ifdef __STDC__
2810 | X#include <stddef.h>
2811 | X#endif
2812 | X
2813 | X#ifndef NULL
2814 | X#define NULL	0
2815 | X#endif
2816 | X
2817 | X/*
2818 | X * externals
2819 | X */
2820 | X#ifndef sun
2821 | Xextern int errno;
2822 | X#endif
2823 | X
2824 | Xextern char *malloc proto((unsigned int));
2825 | Xextern void free proto((void *));
2826 | Xextern long lseek();
2827 | X
2828 | X/*
2829 | X * forward
2830 | X */
2831 | Xstatic int getdbit proto((DBM *, long));
2832 | Xstatic int setdbit proto((DBM *, long));
2833 | Xstatic int getpage proto((DBM *, long));
2834 | Xstatic datum getnext proto((DBM *));
2835 | Xstatic int makroom proto((DBM *, long, int));
2836 | X
2837 | X/*
2838 | X * useful macros
2839 | X */
2840 | X#define bad(x)		((x).dptr == NULL || (x).dsize <= 0)
2841 | X#define exhash(item)	dbm_hash((item).dptr, (item).dsize)
2842 | X#define ioerr(db)	((db)->flags |= DBM_IOERR)
2843 | X
2844 | X#define OFF_PAG(off)	(long) (off) * PBLKSIZ
2845 | X#define OFF_DIR(off)	(long) (off) * DBLKSIZ
2846 | X
2847 | Xstatic long masks[] = {
2848 | X	000000000000, 000000000001, 000000000003, 000000000007,
2849 | X	000000000017, 000000000037, 000000000077, 000000000177,
2850 | X	000000000377, 000000000777, 000000001777, 000000003777,
2851 | X	000000007777, 000000017777, 000000037777, 000000077777,
2852 | X	000000177777, 000000377777, 000000777777, 000001777777,
2853 | X	000003777777, 000007777777, 000017777777, 000037777777,
2854 | X	000077777777, 000177777777, 000377777777, 000777777777,
2855 | X	001777777777, 003777777777, 007777777777, 017777777777
2856 | X};
2857 | X
2858 | Xdatum nullitem = {NULL, 0};
2859 | X
2860 | XDBM *
2861 | Xdbm_open(file, flags, mode)
2862 | Xregister char *file;
2863 | Xregister int flags;
2864 | Xregister int mode;
2865 | X{
2866 | X	register DBM *db;
2867 | X	register char *dirname;
2868 | X	register char *pagname;
2869 | X	register int n;
2870 | X
2871 | X	if (file == NULL || !*file)
2872 | X		return errno = EINVAL, (DBM *) NULL;
2873 | X/*
2874 | X * need space for two seperate filenames
2875 | X */
2876 | X	n = strlen(file) * 2 + strlen(DIRFEXT) + strlen(PAGFEXT) + 2;
2877 | X
2878 | X	if ((dirname = malloc((unsigned) n)) == NULL)
2879 | X		return errno = ENOMEM, (DBM *) NULL;
2880 | X/*
2881 | X * build the file names
2882 | X */
2883 | X	dirname = strcat(strcpy(dirname, file), DIRFEXT);
2884 | X	pagname = strcpy(dirname + strlen(dirname) + 1, file);
2885 | X	pagname = strcat(pagname, PAGFEXT);
2886 | X
2887 | X	db = dbm_prep(dirname, pagname, flags, mode);
2888 | X	free((char *) dirname);
2889 | X	return db;
2890 | X}
2891 | X
2892 | XDBM *
2893 | Xdbm_prep(dirname, pagname, flags, mode)
2894 | Xchar *dirname;
2895 | Xchar *pagname;
2896 | Xint flags;
2897 | Xint mode;
2898 | X{
2899 | X	register DBM *db;
2900 | X	struct stat dstat;
2901 | X
2902 | X	if ((db = (DBM *) malloc(sizeof(DBM))) == NULL)
2903 | X		return errno = ENOMEM, (DBM *) NULL;
2904 | X
2905 | X        db->flags = 0;
2906 | X        db->hmask = 0;
2907 | X        db->blkptr = 0;
2908 | X        db->keyptr = 0;
2909 | X/*
2910 | X * adjust user flags so that WRONLY becomes RDWR, 
2911 | X * as required by this package. Also set our internal
2912 | X * flag for RDONLY if needed.
2913 | X */
2914 | X	if (flags & O_WRONLY)
2915 | X		flags = (flags & ~O_WRONLY) | O_RDWR;
2916 | X
2917 | X	else if ((flags & 03) == O_RDONLY)
2918 | X		db->flags = DBM_RDONLY;
2919 | X/*
2920 | X * open the files in sequence, and stat the dirfile.
2921 | X * If we fail anywhere, undo everything, return NULL.
2922 | X */
2923 | X	if ((db->pagf = open(pagname, flags, mode)) > -1) {
2924 | X		if ((db->dirf = open(dirname, flags, mode)) > -1) {
2925 | X/*
2926 | X * need the dirfile size to establish max bit number.
2927 | X */
2928 | X			if (fstat(db->dirf, &dstat) == 0) {
2929 | X/*
2930 | X * zero size: either a fresh database, or one with a single,
2931 | X * unsplit data page: dirpage is all zeros.
2932 | X */
2933 | X				db->dirbno = (!dstat.st_size) ? 0 : -1;
2934 | X				db->pagbno = -1;
2935 | X				db->maxbno = dstat.st_size * BYTESIZ;
2936 | X
2937 | X				(void) memset(db->pagbuf, 0, PBLKSIZ);
2938 | X				(void) memset(db->dirbuf, 0, DBLKSIZ);
2939 | X			/*
2940 | X			 * success
2941 | X			 */
2942 | X				return db;
2943 | X			}
2944 | X			(void) close(db->dirf);
2945 | X		}
2946 | X		(void) close(db->pagf);
2947 | X	}
2948 | X	free((char *) db);
2949 | X	return (DBM *) NULL;
2950 | X}
2951 | X
2952 | Xvoid
2953 | Xdbm_close(db)
2954 | Xregister DBM *db;
2955 | X{
2956 | X	if (db == NULL)
2957 | X		errno = EINVAL;
2958 | X	else {
2959 | X		(void) close(db->dirf);
2960 | X		(void) close(db->pagf);
2961 | X		free((char *) db);
2962 | X	}
2963 | X}
2964 | X
2965 | Xdatum
2966 | Xdbm_fetch(db, key)
2967 | Xregister DBM *db;
2968 | Xdatum key;
2969 | X{
2970 | X	if (db == NULL || bad(key))
2971 | X		return errno = EINVAL, nullitem;
2972 | X
2973 | X	if (getpage(db, exhash(key)))
2974 | X		return getpair(db->pagbuf, key);
2975 | X
2976 | X	return ioerr(db), nullitem;
2977 | X}
2978 | X
2979 | Xint
2980 | Xdbm_delete(db, key)
2981 | Xregister DBM *db;
2982 | Xdatum key;
2983 | X{
2984 | X	if (db == NULL || bad(key))
2985 | X		return errno = EINVAL, -1;
2986 | X	if (dbm_rdonly(db))
2987 | X		return errno = EPERM, -1;
2988 | X
2989 | X	if (getpage(db, exhash(key))) {
2990 | X		if (!delpair(db->pagbuf, key))
2991 | X			return -1;
2992 | X/*
2993 | X * update the page file
2994 | X */
2995 | X		if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
2996 | X		    || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
2997 | X			return ioerr(db), -1;
2998 | X
2999 | X		return 0;
3000 | X	}
3001 | X
3002 | X	return ioerr(db), -1;
3003 | X}
3004 | X
3005 | Xint
3006 | Xdbm_store(db, key, val, flags)
3007 | Xregister DBM *db;
3008 | Xdatum key;
3009 | Xdatum val;
3010 | Xint flags;
3011 | X{
3012 | X	int need;
3013 | X	register long hash;
3014 | X
3015 | X	if (db == NULL || bad(key))
3016 | X		return errno = EINVAL, -1;
3017 | X	if (dbm_rdonly(db))
3018 | X		return errno = EPERM, -1;
3019 | X
3020 | X	need = key.dsize + val.dsize;
3021 | X/*
3022 | X * is the pair too big (or too small) for this database ??
3023 | X */
3024 | X	if (need < 0 || need > PAIRMAX)
3025 | X		return errno = EINVAL, -1;
3026 | X
3027 | X	if (getpage(db, (hash = exhash(key)))) {
3028 | X/*
3029 | X * if we need to replace, delete the key/data pair
3030 | X * first. If it is not there, ignore.
3031 | X */
3032 | X		if (flags == DBM_REPLACE)
3033 | X			(void) delpair(db->pagbuf, key);
3034 | X#ifdef SEEDUPS
3035 | X		else if (duppair(db->pagbuf, key))
3036 | X			return 1;
3037 | X#endif
3038 | X/*
3039 | X * if we do not have enough room, we have to split.
3040 | X */
3041 | X		if (!fitpair(db->pagbuf, need))
3042 | X			if (!makroom(db, hash, need))
3043 | X				return ioerr(db), -1;
3044 | X/*
3045 | X * we have enough room or split is successful. insert the key,
3046 | X * and update the page file.
3047 | X */
3048 | X		(void) putpair(db->pagbuf, key, val);
3049 | X
3050 | X		if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
3051 | X		    || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
3052 | X			return ioerr(db), -1;
3053 | X	/*
3054 | X	 * success
3055 | X	 */
3056 | X		return 0;
3057 | X	}
3058 | X
3059 | X	return ioerr(db), -1;
3060 | X}
3061 | X
3062 | X/*
3063 | X * makroom - make room by splitting the overfull page
3064 | X * this routine will attempt to make room for SPLTMAX times before
3065 | X * giving up.
3066 | X */
3067 | Xstatic int
3068 | Xmakroom(db, hash, need)
3069 | Xregister DBM *db;
3070 | Xlong hash;
3071 | Xint need;
3072 | X{
3073 | X	long newp;
3074 | X	char twin[PBLKSIZ];
3075 | X	char *pag = db->pagbuf;
3076 | X	char *new = twin;
3077 | X	register int smax = SPLTMAX;
3078 | X
3079 | X	do {
3080 | X/*
3081 | X * split the current page
3082 | X */
3083 | X		(void) splpage(pag, new, db->hmask + 1);
3084 | X/*
3085 | X * address of the new page
3086 | X */
3087 | X		newp = (hash & db->hmask) | (db->hmask + 1);
3088 | X
3089 | X/*
3090 | X * write delay, read avoidence/cache shuffle:
3091 | X * select the page for incoming pair: if key is to go to the new page,
3092 | X * write out the previous one, and copy the new one over, thus making
3093 | X * it the current page. If not, simply write the new page, and we are
3094 | X * still looking at the page of interest. current page is not updated
3095 | X * here, as dbm_store will do so, after it inserts the incoming pair.
3096 | X */
3097 | X		if (hash & (db->hmask + 1)) {
3098 | X			if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
3099 | X			    || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
3100 | X				return 0;
3101 | X			db->pagbno = newp;
3102 | X			(void) memcpy(pag, new, PBLKSIZ);
3103 | X		}
3104 | X		else if (lseek(db->pagf, OFF_PAG(newp), SEEK_SET) < 0
3105 | X			 || write(db->pagf, new, PBLKSIZ) < 0)
3106 | X			return 0;
3107 | X
3108 | X		if (!setdbit(db, db->curbit))
3109 | X			return 0;
3110 | X/*
3111 | X * see if we have enough room now
3112 | X */
3113 | X		if (fitpair(pag, need))
3114 | X			return 1;
3115 | X/*
3116 | X * try again... update curbit and hmask as getpage would have
3117 | X * done. because of our update of the current page, we do not
3118 | X * need to read in anything. BUT we have to write the current
3119 | X * [deferred] page out, as the window of failure is too great.
3120 | X */
3121 | X		db->curbit = 2 * db->curbit +
3122 | X			((hash & (db->hmask + 1)) ? 2 : 1);
3123 | X		db->hmask |= db->hmask + 1;
3124 | X
3125 | X		if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
3126 | X		    || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
3127 | X			return 0;
3128 | X
3129 | X	} while (--smax);
3130 | X/*
3131 | X * if we are here, this is real bad news. After SPLTMAX splits,
3132 | X * we still cannot fit the key. say goodnight.
3133 | X */
3134 | X#ifdef BADMESS
3135 | X	(void) write(2, "sdbm: cannot insert after SPLTMAX attempts.\n", 44);
3136 | X#endif
3137 | X	return 0;
3138 | X
3139 | X}
3140 | X
3141 | X/*
3142 | X * the following two routines will break if
3143 | X * deletions aren't taken into account. (ndbm bug)
3144 | X */
3145 | Xdatum
3146 | Xdbm_firstkey(db)
3147 | Xregister DBM *db;
3148 | X{
3149 | X	if (db == NULL)
3150 | X		return errno = EINVAL, nullitem;
3151 | X/*
3152 | X * start at page 0
3153 | X */
3154 | X	if (lseek(db->pagf, OFF_PAG(0), SEEK_SET) < 0
3155 | X	    || read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
3156 | X		return ioerr(db), nullitem;
3157 | X	db->pagbno = 0;
3158 | X	db->blkptr = 0;
3159 | X	db->keyptr = 0;
3160 | X
3161 | X	return getnext(db);
3162 | X}
3163 | X
3164 | Xdatum
3165 | Xdbm_nextkey(db)
3166 | Xregister DBM *db;
3167 | X{
3168 | X	if (db == NULL)
3169 | X		return errno = EINVAL, nullitem;
3170 | X	return getnext(db);
3171 | X}
3172 | X
3173 | X/*
3174 | X * all important binary trie traversal
3175 | X */
3176 | Xstatic int
3177 | Xgetpage(db, hash)
3178 | Xregister DBM *db;
3179 | Xregister long hash;
3180 | X{
3181 | X	register int hbit;
3182 | X	register long dbit;
3183 | X	register long pagb;
3184 | X
3185 | X	dbit = 0;
3186 | X	hbit = 0;
3187 | X	while (dbit < db->maxbno && getdbit(db, dbit))
3188 | X		dbit = 2 * dbit + ((hash & (1 << hbit++)) ? 2 : 1);
3189 | X
3190 | X	debug(("dbit: %d...", dbit));
3191 | X
3192 | X	db->curbit = dbit;
3193 | X	db->hmask = masks[hbit];
3194 | X
3195 | X	pagb = hash & db->hmask;
3196 | X/*
3197 | X * see if the block we need is already in memory.
3198 | X * note: this lookaside cache has about 10% hit rate.
3199 | X */
3200 | X	if (pagb != db->pagbno) { 
3201 | X/*
3202 | X * note: here, we assume a "hole" is read as 0s.
3203 | X * if not, must zero pagbuf first.
3204 | X */
3205 | X		if (lseek(db->pagf, OFF_PAG(pagb), SEEK_SET) < 0
3206 | X		    || read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
3207 | X			return 0;
3208 | X		if (!chkpage(db->pagbuf))
3209 | X			return 0;
3210 | X		db->pagbno = pagb;
3211 | X
3212 | X		debug(("pag read: %d\n", pagb));
3213 | X	}
3214 | X	return 1;
3215 | X}
3216 | X
3217 | Xstatic int
3218 | Xgetdbit(db, dbit)
3219 | Xregister DBM *db;
3220 | Xregister long dbit;
3221 | X{
3222 | X	register long c;
3223 | X	register long dirb;
3224 | X
3225 | X	c = dbit / BYTESIZ;
3226 | X	dirb = c / DBLKSIZ;
3227 | X
3228 | X	if (dirb != db->dirbno) {
3229 | X		if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
3230 | X		    || read(db->dirf, db->dirbuf, DBLKSIZ) < 0)
3231 | X			return 0;
3232 | X		db->dirbno = dirb;
3233 | X
3234 | X		debug(("dir read: %d\n", dirb));
3235 | X	}
3236 | X
3237 | X	return db->dirbuf[c % DBLKSIZ] & (1 << dbit % BYTESIZ);
3238 | X}
3239 | X
3240 | Xstatic int
3241 | Xsetdbit(db, dbit)
3242 | Xregister DBM *db;
3243 | Xregister long dbit;
3244 | X{
3245 | X	register long c;
3246 | X	register long dirb;
3247 | X
3248 | X	c = dbit / BYTESIZ;
3249 | X	dirb = c / DBLKSIZ;
3250 | X
3251 | X	if (dirb != db->dirbno) {
3252 | X		if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
3253 | X		    || read(db->dirf, db->dirbuf, DBLKSIZ) < 0)
3254 | X			return 0;
3255 | X		db->dirbno = dirb;
3256 | X
3257 | X		debug(("dir read: %d\n", dirb));
3258 | X	}
3259 | X
3260 | X	db->dirbuf[c % DBLKSIZ] |= (1 << dbit % BYTESIZ);
3261 | X
3262 | X	if (dbit >= db->maxbno)
3263 | X		db->maxbno += DBLKSIZ * BYTESIZ;
3264 | X
3265 | X	if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
3266 | X	    || write(db->dirf, db->dirbuf, DBLKSIZ) < 0)
3267 | X		return 0;
3268 | X
3269 | X	return 1;
3270 | X}
3271 | X
3272 | X/*
3273 | X * getnext - get the next key in the page, and if done with
3274 | X * the page, try the next page in sequence
3275 | X */
3276 | Xstatic datum
3277 | Xgetnext(db)
3278 | Xregister DBM *db;
3279 | X{
3280 | X	datum key;
3281 | X
3282 | X	for (;;) {
3283 | X		db->keyptr++;
3284 | X		key = getnkey(db->pagbuf, db->keyptr);
3285 | X		if (key.dptr != NULL)
3286 | X			return key;
3287 | X/*
3288 | X * we either run out, or there is nothing on this page..
3289 | X * try the next one... If we lost our position on the
3290 | X * file, we will have to seek.
3291 | X */
3292 | X		db->keyptr = 0;
3293 | X		if (db->pagbno != db->blkptr++)
3294 | X			if (lseek(db->pagf, OFF_PAG(db->blkptr), SEEK_SET) < 0)
3295 | X				break;
3296 | X		db->pagbno = db->blkptr;
3297 | X		if (read(db->pagf, db->pagbuf, PBLKSIZ) <= 0)
3298 | X			break;
3299 | X		if (!chkpage(db->pagbuf))
3300 | X			break;
3301 | X	}
3302 | X
3303 | X	return ioerr(db), nullitem;
3304 | X}
3305 | @@@End of sdbm.c
3306 | echo x - sdbm.h 1>&2
3307 | sed 's/^X//' >sdbm.h <<'@@@End of sdbm.h'
3308 | X/*
3309 | X * sdbm - ndbm work-alike hashed database library
3310 | X * based on Per-Ake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
3311 | X * author: oz@nexus.yorku.ca
3312 | X * status: public domain. 
3313 | X */
3314 | X#define DBLKSIZ 4096
3315 | X#define PBLKSIZ 1024
3316 | X#define PAIRMAX 1008			/* arbitrary on PBLKSIZ-N */
3317 | X#define SPLTMAX	10			/* maximum allowed splits */
3318 | X					/* for a single insertion */
3319 | X#define DIRFEXT	".dir"
3320 | X#define PAGFEXT	".pag"
3321 | X
3322 | Xtypedef struct {
3323 | X	int dirf;		       /* directory file descriptor */
3324 | X	int pagf;		       /* page file descriptor */
3325 | X	int flags;		       /* status/error flags, see below */
3326 | X	long maxbno;		       /* size of dirfile in bits */
3327 | X	long curbit;		       /* current bit number */
3328 | X	long hmask;		       /* current hash mask */
3329 | X	long blkptr;		       /* current block for nextkey */
3330 | X	int keyptr;		       /* current key for nextkey */
3331 | X	long blkno;		       /* current page to read/write */
3332 | X	long pagbno;		       /* current page in pagbuf */
3333 | X	char pagbuf[PBLKSIZ];	       /* page file block buffer */
3334 | X	long dirbno;		       /* current block in dirbuf */
3335 | X	char dirbuf[DBLKSIZ];	       /* directory file block buffer */
3336 | X} DBM;
3337 | X
3338 | X#define DBM_RDONLY	0x1	       /* data base open read-only */
3339 | X#define DBM_IOERR	0x2	       /* data base I/O error */
3340 | X
3341 | X/*
3342 | X * utility macros
3343 | X */
3344 | X#define dbm_rdonly(db)		((db)->flags & DBM_RDONLY)
3345 | X#define dbm_error(db)		((db)->flags & DBM_IOERR)
3346 | X
3347 | X#define dbm_clearerr(db)	((db)->flags &= ~DBM_IOERR)  /* ouch */
3348 | X
3349 | X#define dbm_dirfno(db)	((db)->dirf)
3350 | X#define dbm_pagfno(db)	((db)->pagf)
3351 | X
3352 | Xtypedef struct {
3353 | X	char *dptr;
3354 | X	int dsize;
3355 | X} datum;
3356 | X
3357 | Xextern datum nullitem;
3358 | X
3359 | X#ifdef __STDC__
3360 | X#define proto(p) p
3361 | X#else
3362 | X#define proto(p) ()
3363 | X#endif
3364 | X
3365 | X/*
3366 | X * flags to dbm_store
3367 | X */
3368 | X#define DBM_INSERT	0
3369 | X#define DBM_REPLACE	1
3370 | X
3371 | X/*
3372 | X * ndbm interface
3373 | X */
3374 | Xextern DBM *dbm_open proto((char *, int, int));
3375 | Xextern void dbm_close proto((DBM *));
3376 | Xextern datum dbm_fetch proto((DBM *, datum));
3377 | Xextern int dbm_delete proto((DBM *, datum));
3378 | Xextern int dbm_store proto((DBM *, datum, datum, int));
3379 | Xextern datum dbm_firstkey proto((DBM *));
3380 | Xextern datum dbm_nextkey proto((DBM *));
3381 | X
3382 | X/*
3383 | X * other
3384 | X */
3385 | Xextern DBM *dbm_prep proto((char *, char *, int, int));
3386 | Xextern long dbm_hash proto((char *, int));
3387 | @@@End of sdbm.h
3388 | echo x - tune.h 1>&2
3389 | sed 's/^X//' >tune.h <<'@@@End of tune.h'
3390 | X/*
3391 | X * sdbm - ndbm work-alike hashed database library
3392 | X * tuning and portability constructs [not nearly enough]
3393 | X * author: oz@nexus.yorku.ca
3394 | X */
3395 | X
3396 | X#define BYTESIZ		8
3397 | X
3398 | X#ifdef SVID
3399 | X#include <unistd.h>
3400 | X#endif
3401 | X
3402 | X#ifdef BSD42
3403 | X#define SEEK_SET	L_SET
3404 | X#define	memset(s,c,n)	bzero(s, n)		/* only when c is zero */
3405 | X#define	memcpy(s1,s2,n)	bcopy(s2, s1, n)
3406 | X#define	memcmp(s1,s2,n)	bcmp(s1,s2,n)
3407 | X#endif
3408 | X
3409 | X/*
3410 | X * important tuning parms (hah)
3411 | X */
3412 | X
3413 | X#define SEEDUPS			/* always detect duplicates */
3414 | X#define BADMESS			/* generate a message for worst case:
3415 | X				   cannot make room after SPLTMAX splits */
3416 | X/*
3417 | X * misc
3418 | X */
3419 | X#ifdef DEBUG
3420 | X#define debug(x)	printf x
3421 | X#else
3422 | X#define debug(x)
3423 | X#endif
3424 | @@@End of tune.h
3425 | echo x - util.c 1>&2
3426 | sed 's/^X//' >util.c <<'@@@End of util.c'
3427 | X#include <stdio.h>
3428 | X#ifdef SDBM
3429 | X#include "sdbm.h"
3430 | X#else
3431 | X#include "ndbm.h"
3432 | X#endif
3433 | X
3434 | Xvoid
3435 | Xoops(s1, s2)
3436 | Xregister char *s1;
3437 | Xregister char *s2;
3438 | X{
3439 | X	extern int errno, sys_nerr;
3440 | X	extern char *sys_errlist[];
3441 | X	extern char *progname;
3442 | X
3443 | X	if (progname)
3444 | X		fprintf(stderr, "%s: ", progname);
3445 | X	fprintf(stderr, s1, s2);
3446 | X	if (errno > 0 && errno < sys_nerr)
3447 | X		fprintf(stderr, " (%s)", sys_errlist[errno]);
3448 | X	fprintf(stderr, "\n");
3449 | X	exit(1);
3450 | X}
3451 | X
3452 | Xint
3453 | Xokpage(pag)
3454 | Xchar *pag;
3455 | X{
3456 | X	register unsigned n;
3457 | X	register off;
3458 | X	register short *ino = (short *) pag;
3459 | X
3460 | X	if ((n = ino[0]) > PBLKSIZ / sizeof(short))
3461 | X		return 0;
3462 | X
3463 | X	if (!n)
3464 | X		return 1;
3465 | X
3466 | X	off = PBLKSIZ;
3467 | X	for (ino++; n; ino += 2) {
3468 | X		if (ino[0] > off || ino[1] > off ||
3469 | X		    ino[1] > ino[0])
3470 | X			return 0;
3471 | X		off = ino[1];
3472 | X		n -= 2;
3473 | X	}
3474 | X
3475 | X	return 1;
3476 | X}
3477 | @@@End of util.c
3478 | 


--------------------------------------------------------------------------------
/sdbm.c:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * sdbm - ndbm work-alike hashed database library
  3 |  * based on Per-Aake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
  4 |  * author: oz@nexus.yorku.ca
  5 |  * status: public domain.
  6 |  *
  7 |  * core routines
  8 |  */
  9 | 
 10 | #ifndef lint
 11 | static char rcsid[] = "$Id: sdbm.c,v 1.16 90/12/13 13:01:31 oz Exp $";
 12 | #endif
 13 | 
 14 | #include "sdbm.h"
 15 | #include "tune.h"
 16 | #include "pair.h"
 17 | 
 18 | #include <sys/types.h>
 19 | #include <sys/stat.h>
 20 | #ifdef BSD42
 21 | #include <sys/file.h>
 22 | #else
 23 | #include <fcntl.h>
 24 | #include <memory.h>
 25 | #endif
 26 | #include <errno.h>
 27 | #include <string.h>
 28 | 
 29 | #ifdef __STDC__
 30 | #include <stddef.h>
 31 | #endif
 32 | 
 33 | #ifndef NULL
 34 | #define NULL	0
 35 | #endif
 36 | 
 37 | /*
 38 |  * externals
 39 |  */
 40 | #ifndef sun
 41 | extern int errno;
 42 | #endif
 43 | 
 44 | extern char *malloc proto((unsigned int));
 45 | extern void free proto((void *));
 46 | extern long lseek();
 47 | 
 48 | /*
 49 |  * forward
 50 |  */
 51 | static int getdbit proto((DBM *, long));
 52 | static int setdbit proto((DBM *, long));
 53 | static int getpage proto((DBM *, long));
 54 | static datum getnext proto((DBM *));
 55 | static int makroom proto((DBM *, long, int));
 56 | 
 57 | /*
 58 |  * useful macros
 59 |  */
 60 | #define bad(x)		((x).dptr == NULL || (x).dsize <= 0)
 61 | #define exhash(item)	dbm_hash((item).dptr, (item).dsize)
 62 | #define ioerr(db)	((db)->flags |= DBM_IOERR)
 63 | 
 64 | #define OFF_PAG(off)	(long) (off) * PBLKSIZ
 65 | #define OFF_DIR(off)	(long) (off) * DBLKSIZ
 66 | 
 67 | static long masks[] = {
 68 | 	000000000000, 000000000001, 000000000003, 000000000007,
 69 | 	000000000017, 000000000037, 000000000077, 000000000177,
 70 | 	000000000377, 000000000777, 000000001777, 000000003777,
 71 | 	000000007777, 000000017777, 000000037777, 000000077777,
 72 | 	000000177777, 000000377777, 000000777777, 000001777777,
 73 | 	000003777777, 000007777777, 000017777777, 000037777777,
 74 | 	000077777777, 000177777777, 000377777777, 000777777777,
 75 | 	001777777777, 003777777777, 007777777777, 017777777777
 76 | };
 77 | 
 78 | datum nullitem = {NULL, 0};
 79 | 
 80 | DBM *
 81 | dbm_open(file, flags, mode)
 82 | register char *file;
 83 | register int flags;
 84 | register int mode;
 85 | {
 86 | 	register DBM *db;
 87 | 	register char *dirname;
 88 | 	register char *pagname;
 89 | 	register int n;
 90 | 
 91 | 	if (file == NULL || !*file)
 92 | 		return errno = EINVAL, (DBM *) NULL;
 93 | /*
 94 |  * need space for two seperate filenames
 95 |  */
 96 | 	n = strlen(file) * 2 + strlen(DIRFEXT) + strlen(PAGFEXT) + 2;
 97 | 
 98 | 	if ((dirname = malloc((unsigned) n)) == NULL)
 99 | 		return errno = ENOMEM, (DBM *) NULL;
100 | /*
101 |  * build the file names
102 |  */
103 | 	dirname = strcat(strcpy(dirname, file), DIRFEXT);
104 | 	pagname = strcpy(dirname + strlen(dirname) + 1, file);
105 | 	pagname = strcat(pagname, PAGFEXT);
106 | 
107 | 	db = dbm_prep(dirname, pagname, flags, mode);
108 | 	free((char *) dirname);
109 | 	return db;
110 | }
111 | 
112 | DBM *
113 | dbm_prep(dirname, pagname, flags, mode)
114 | char *dirname;
115 | char *pagname;
116 | int flags;
117 | int mode;
118 | {
119 | 	register DBM *db;
120 | 	struct stat dstat;
121 | 
122 | 	if ((db = (DBM *) malloc(sizeof(DBM))) == NULL)
123 | 		return errno = ENOMEM, (DBM *) NULL;
124 | 
125 |         db->flags = 0;
126 |         db->hmask = 0;
127 |         db->blkptr = 0;
128 |         db->keyptr = 0;
129 | /*
130 |  * adjust user flags so that WRONLY becomes RDWR, 
131 |  * as required by this package. Also set our internal
132 |  * flag for RDONLY if needed.
133 |  */
134 | 	if (flags & O_WRONLY)
135 | 		flags = (flags & ~O_WRONLY) | O_RDWR;
136 | 
137 | 	else if ((flags & 03) == O_RDONLY)
138 | 		db->flags = DBM_RDONLY;
139 | /*
140 |  * open the files in sequence, and stat the dirfile.
141 |  * If we fail anywhere, undo everything, return NULL.
142 |  */
143 | 	if ((db->pagf = open(pagname, flags, mode)) > -1) {
144 | 		if ((db->dirf = open(dirname, flags, mode)) > -1) {
145 | /*
146 |  * need the dirfile size to establish max bit number.
147 |  */
148 | 			if (fstat(db->dirf, &dstat) == 0) {
149 | /*
150 |  * zero size: either a fresh database, or one with a single,
151 |  * unsplit data page: dirpage is all zeros.
152 |  */
153 | 				db->dirbno = (!dstat.st_size) ? 0 : -1;
154 | 				db->pagbno = -1;
155 | 				db->maxbno = dstat.st_size * BYTESIZ;
156 | 
157 | 				(void) memset(db->pagbuf, 0, PBLKSIZ);
158 | 				(void) memset(db->dirbuf, 0, DBLKSIZ);
159 | 			/*
160 | 			 * success
161 | 			 */
162 | 				return db;
163 | 			}
164 | 			(void) close(db->dirf);
165 | 		}
166 | 		(void) close(db->pagf);
167 | 	}
168 | 	free((char *) db);
169 | 	return (DBM *) NULL;
170 | }
171 | 
172 | void
173 | dbm_close(db)
174 | register DBM *db;
175 | {
176 | 	if (db == NULL)
177 | 		errno = EINVAL;
178 | 	else {
179 | 		(void) close(db->dirf);
180 | 		(void) close(db->pagf);
181 | 		free((char *) db);
182 | 	}
183 | }
184 | 
185 | datum
186 | dbm_fetch(db, key)
187 | register DBM *db;
188 | datum key;
189 | {
190 | 	if (db == NULL || bad(key))
191 | 		return errno = EINVAL, nullitem;
192 | 
193 | 	if (getpage(db, exhash(key)))
194 | 		return getpair(db->pagbuf, key);
195 | 
196 | 	return ioerr(db), nullitem;
197 | }
198 | 
199 | int
200 | dbm_delete(db, key)
201 | register DBM *db;
202 | datum key;
203 | {
204 | 	if (db == NULL || bad(key))
205 | 		return errno = EINVAL, -1;
206 | 	if (dbm_rdonly(db))
207 | 		return errno = EPERM, -1;
208 | 
209 | 	if (getpage(db, exhash(key))) {
210 | 		if (!delpair(db->pagbuf, key))
211 | 			return -1;
212 | /*
213 |  * update the page file
214 |  */
215 | 		if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
216 | 		    || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
217 | 			return ioerr(db), -1;
218 | 
219 | 		return 0;
220 | 	}
221 | 
222 | 	return ioerr(db), -1;
223 | }
224 | 
225 | int
226 | dbm_store(db, key, val, flags)
227 | register DBM *db;
228 | datum key;
229 | datum val;
230 | int flags;
231 | {
232 | 	int need;
233 | 	register long hash;
234 | 
235 | 	if (db == NULL || bad(key))
236 | 		return errno = EINVAL, -1;
237 | 	if (dbm_rdonly(db))
238 | 		return errno = EPERM, -1;
239 | 
240 | 	need = key.dsize + val.dsize;
241 | /*
242 |  * is the pair too big (or too small) for this database ??
243 |  */
244 | 	if (need < 0 || need > PAIRMAX)
245 | 		return errno = EINVAL, -1;
246 | 
247 | 	if (getpage(db, (hash = exhash(key)))) {
248 | /*
249 |  * if we need to replace, delete the key/data pair
250 |  * first. If it is not there, ignore.
251 |  */
252 | 		if (flags == DBM_REPLACE)
253 | 			(void) delpair(db->pagbuf, key);
254 | #ifdef SEEDUPS
255 | 		else if (duppair(db->pagbuf, key))
256 | 			return 1;
257 | #endif
258 | /*
259 |  * if we do not have enough room, we have to split.
260 |  */
261 | 		if (!fitpair(db->pagbuf, need))
262 | 			if (!makroom(db, hash, need))
263 | 				return ioerr(db), -1;
264 | /*
265 |  * we have enough room or split is successful. insert the key,
266 |  * and update the page file.
267 |  */
268 | 		(void) putpair(db->pagbuf, key, val);
269 | 
270 | 		if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
271 | 		    || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
272 | 			return ioerr(db), -1;
273 | 	/*
274 | 	 * success
275 | 	 */
276 | 		return 0;
277 | 	}
278 | 
279 | 	return ioerr(db), -1;
280 | }
281 | 
282 | /*
283 |  * makroom - make room by splitting the overfull page
284 |  * this routine will attempt to make room for SPLTMAX times before
285 |  * giving up.
286 |  */
287 | static int
288 | makroom(db, hash, need)
289 | register DBM *db;
290 | long hash;
291 | int need;
292 | {
293 | 	long newp;
294 | 	char twin[PBLKSIZ];
295 | 	char *pag = db->pagbuf;
296 | 	char *new = twin;
297 | 	register int smax = SPLTMAX;
298 | 
299 | 	do {
300 | /*
301 |  * split the current page
302 |  */
303 | 		(void) splpage(pag, new, db->hmask + 1);
304 | /*
305 |  * address of the new page
306 |  */
307 | 		newp = (hash & db->hmask) | (db->hmask + 1);
308 | 
309 | /*
310 |  * write delay, read avoidence/cache shuffle:
311 |  * select the page for incoming pair: if key is to go to the new page,
312 |  * write out the previous one, and copy the new one over, thus making
313 |  * it the current page. If not, simply write the new page, and we are
314 |  * still looking at the page of interest. current page is not updated
315 |  * here, as dbm_store will do so, after it inserts the incoming pair.
316 |  */
317 | 		if (hash & (db->hmask + 1)) {
318 | 			if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
319 | 			    || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
320 | 				return 0;
321 | 			db->pagbno = newp;
322 | 			(void) memcpy(pag, new, PBLKSIZ);
323 | 		}
324 | 		else if (lseek(db->pagf, OFF_PAG(newp), SEEK_SET) < 0
325 | 			 || write(db->pagf, new, PBLKSIZ) < 0)
326 | 			return 0;
327 | 
328 | 		if (!setdbit(db, db->curbit))
329 | 			return 0;
330 | /*
331 |  * see if we have enough room now
332 |  */
333 | 		if (fitpair(pag, need))
334 | 			return 1;
335 | /*
336 |  * try again... update curbit and hmask as getpage would have
337 |  * done. because of our update of the current page, we do not
338 |  * need to read in anything. BUT we have to write the current
339 |  * [deferred] page out, as the window of failure is too great.
340 |  */
341 | 		db->curbit = 2 * db->curbit +
342 | 			((hash & (db->hmask + 1)) ? 2 : 1);
343 | 		db->hmask |= db->hmask + 1;
344 | 
345 | 		if (lseek(db->pagf, OFF_PAG(db->pagbno), SEEK_SET) < 0
346 | 		    || write(db->pagf, db->pagbuf, PBLKSIZ) < 0)
347 | 			return 0;
348 | 
349 | 	} while (--smax);
350 | /*
351 |  * if we are here, this is real bad news. After SPLTMAX splits,
352 |  * we still cannot fit the key. say goodnight.
353 |  */
354 | #ifdef BADMESS
355 | 	(void) write(2, "sdbm: cannot insert after SPLTMAX attempts.\n", 44);
356 | #endif
357 | 	return 0;
358 | 
359 | }
360 | 
361 | /*
362 |  * the following two routines will break if
363 |  * deletions aren't taken into account. (ndbm bug)
364 |  */
365 | datum
366 | dbm_firstkey(db)
367 | register DBM *db;
368 | {
369 | 	if (db == NULL)
370 | 		return errno = EINVAL, nullitem;
371 | /*
372 |  * start at page 0
373 |  */
374 | 	if (lseek(db->pagf, OFF_PAG(0), SEEK_SET) < 0
375 | 	    || read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
376 | 		return ioerr(db), nullitem;
377 | 	db->pagbno = 0;
378 | 	db->blkptr = 0;
379 | 	db->keyptr = 0;
380 | 
381 | 	return getnext(db);
382 | }
383 | 
384 | datum
385 | dbm_nextkey(db)
386 | register DBM *db;
387 | {
388 | 	if (db == NULL)
389 | 		return errno = EINVAL, nullitem;
390 | 	return getnext(db);
391 | }
392 | 
393 | /*
394 |  * all important binary trie traversal
395 |  */
396 | static int
397 | getpage(db, hash)
398 | register DBM *db;
399 | register long hash;
400 | {
401 | 	register int hbit;
402 | 	register long dbit;
403 | 	register long pagb;
404 | 
405 | 	dbit = 0;
406 | 	hbit = 0;
407 | 	while (dbit < db->maxbno && getdbit(db, dbit))
408 | 		dbit = 2 * dbit + ((hash & (1 << hbit++)) ? 2 : 1);
409 | 
410 | 	debug(("dbit: %d...", dbit));
411 | 
412 | 	db->curbit = dbit;
413 | 	db->hmask = masks[hbit];
414 | 
415 | 	pagb = hash & db->hmask;
416 | /*
417 |  * see if the block we need is already in memory.
418 |  * note: this lookaside cache has about 10% hit rate.
419 |  */
420 | 	if (pagb != db->pagbno) { 
421 | /*
422 |  * note: here, we assume a "hole" is read as 0s.
423 |  * if not, must zero pagbuf first.
424 |  */
425 | 		if (lseek(db->pagf, OFF_PAG(pagb), SEEK_SET) < 0
426 | 		    || read(db->pagf, db->pagbuf, PBLKSIZ) < 0)
427 | 			return 0;
428 | 		if (!chkpage(db->pagbuf))
429 | 			return 0;
430 | 		db->pagbno = pagb;
431 | 
432 | 		debug(("pag read: %d\n", pagb));
433 | 	}
434 | 	return 1;
435 | }
436 | 
437 | static int
438 | getdbit(db, dbit)
439 | register DBM *db;
440 | register long dbit;
441 | {
442 | 	register long c;
443 | 	register long dirb;
444 | 
445 | 	c = dbit / BYTESIZ;
446 | 	dirb = c / DBLKSIZ;
447 | 
448 | 	if (dirb != db->dirbno) {
449 | 		if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
450 | 		    || read(db->dirf, db->dirbuf, DBLKSIZ) < 0)
451 | 			return 0;
452 | 		db->dirbno = dirb;
453 | 
454 | 		debug(("dir read: %d\n", dirb));
455 | 	}
456 | 
457 | 	return db->dirbuf[c % DBLKSIZ] & (1 << dbit % BYTESIZ);
458 | }
459 | 
460 | static int
461 | setdbit(db, dbit)
462 | register DBM *db;
463 | register long dbit;
464 | {
465 | 	register long c;
466 | 	register long dirb;
467 | 
468 | 	c = dbit / BYTESIZ;
469 | 	dirb = c / DBLKSIZ;
470 | 
471 | 	if (dirb != db->dirbno) {
472 | 		if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
473 | 		    || read(db->dirf, db->dirbuf, DBLKSIZ) < 0)
474 | 			return 0;
475 | 		db->dirbno = dirb;
476 | 
477 | 		debug(("dir read: %d\n", dirb));
478 | 	}
479 | 
480 | 	db->dirbuf[c % DBLKSIZ] |= (1 << dbit % BYTESIZ);
481 | 
482 | 	if (dbit >= db->maxbno)
483 | 		db->maxbno += DBLKSIZ * BYTESIZ;
484 | 
485 | 	if (lseek(db->dirf, OFF_DIR(dirb), SEEK_SET) < 0
486 | 	    || write(db->dirf, db->dirbuf, DBLKSIZ) < 0)
487 | 		return 0;
488 | 
489 | 	return 1;
490 | }
491 | 
492 | /*
493 |  * getnext - get the next key in the page, and if done with
494 |  * the page, try the next page in sequence
495 |  */
496 | static datum
497 | getnext(db)
498 | register DBM *db;
499 | {
500 | 	datum key;
501 | 
502 | 	for (;;) {
503 | 		db->keyptr++;
504 | 		key = getnkey(db->pagbuf, db->keyptr);
505 | 		if (key.dptr != NULL)
506 | 			return key;
507 | /*
508 |  * we either run out, or there is nothing on this page..
509 |  * try the next one... If we lost our position on the
510 |  * file, we will have to seek.
511 |  */
512 | 		db->keyptr = 0;
513 | 		if (db->pagbno != db->blkptr++)
514 | 			if (lseek(db->pagf, OFF_PAG(db->blkptr), SEEK_SET) < 0)
515 | 				break;
516 | 		db->pagbno = db->blkptr;
517 | 		if (read(db->pagf, db->pagbuf, PBLKSIZ) <= 0)
518 | 			break;
519 | 		if (!chkpage(db->pagbuf))
520 | 			break;
521 | 	}
522 | 
523 | 	return ioerr(db), nullitem;
524 | }
525 | 


--------------------------------------------------------------------------------
/sdbm.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * sdbm - ndbm work-alike hashed database library
 3 |  * based on Per-Ake Larson's Dynamic Hashing algorithms. BIT 18 (1978).
 4 |  * author: oz@nexus.yorku.ca
 5 |  * status: public domain. 
 6 |  */
 7 | #define DBLKSIZ 4096
 8 | #define PBLKSIZ 1024
 9 | #define PAIRMAX 1008			/* arbitrary on PBLKSIZ-N */
10 | #define SPLTMAX	10			/* maximum allowed splits */
11 | 					/* for a single insertion */
12 | #define DIRFEXT	".dir"
13 | #define PAGFEXT	".pag"
14 | 
15 | typedef struct {
16 | 	int dirf;		       /* directory file descriptor */
17 | 	int pagf;		       /* page file descriptor */
18 | 	int flags;		       /* status/error flags, see below */
19 | 	long maxbno;		       /* size of dirfile in bits */
20 | 	long curbit;		       /* current bit number */
21 | 	long hmask;		       /* current hash mask */
22 | 	long blkptr;		       /* current block for nextkey */
23 | 	int keyptr;		       /* current key for nextkey */
24 | 	long blkno;		       /* current page to read/write */
25 | 	long pagbno;		       /* current page in pagbuf */
26 | 	char pagbuf[PBLKSIZ];	       /* page file block buffer */
27 | 	long dirbno;		       /* current block in dirbuf */
28 | 	char dirbuf[DBLKSIZ];	       /* directory file block buffer */
29 | } DBM;
30 | 
31 | #define DBM_RDONLY	0x1	       /* data base open read-only */
32 | #define DBM_IOERR	0x2	       /* data base I/O error */
33 | 
34 | /*
35 |  * utility macros
36 |  */
37 | #define dbm_rdonly(db)		((db)->flags & DBM_RDONLY)
38 | #define dbm_error(db)		((db)->flags & DBM_IOERR)
39 | 
40 | #define dbm_clearerr(db)	((db)->flags &= ~DBM_IOERR)  /* ouch */
41 | 
42 | #define dbm_dirfno(db)	((db)->dirf)
43 | #define dbm_pagfno(db)	((db)->pagf)
44 | 
45 | typedef struct {
46 | 	char *dptr;
47 | 	int dsize;
48 | } datum;
49 | 
50 | extern datum nullitem;
51 | 
52 | #ifdef __STDC__
53 | #define proto(p) p
54 | #else
55 | #define proto(p) ()
56 | #endif
57 | 
58 | /*
59 |  * flags to dbm_store
60 |  */
61 | #define DBM_INSERT	0
62 | #define DBM_REPLACE	1
63 | 
64 | /*
65 |  * ndbm interface
66 |  */
67 | extern DBM *dbm_open proto((char *, int, int));
68 | extern void dbm_close proto((DBM *));
69 | extern datum dbm_fetch proto((DBM *, datum));
70 | extern int dbm_delete proto((DBM *, datum));
71 | extern int dbm_store proto((DBM *, datum, datum, int));
72 | extern datum dbm_firstkey proto((DBM *));
73 | extern datum dbm_nextkey proto((DBM *));
74 | 
75 | /*
76 |  * other
77 |  */
78 | extern DBM *dbm_prep proto((char *, char *, int, int));
79 | extern long dbm_hash proto((char *, int));
80 | 


--------------------------------------------------------------------------------
/tune.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * sdbm - ndbm work-alike hashed database library
 3 |  * tuning and portability constructs [not nearly enough]
 4 |  * author: oz@nexus.yorku.ca
 5 |  */
 6 | 
 7 | #define BYTESIZ		8
 8 | 
 9 | #ifdef SVID
10 | #include <unistd.h>
11 | #endif
12 | 
13 | #ifdef BSD42
14 | #define SEEK_SET	L_SET
15 | #define	memset(s,c,n)	bzero(s, n)		/* only when c is zero */
16 | #define	memcpy(s1,s2,n)	bcopy(s2, s1, n)
17 | #define	memcmp(s1,s2,n)	bcmp(s1,s2,n)
18 | #endif
19 | 
20 | /*
21 |  * important tuning parms (hah)
22 |  */
23 | 
24 | #define SEEDUPS			/* always detect duplicates */
25 | #define BADMESS			/* generate a message for worst case:
26 | 				   cannot make room after SPLTMAX splits */
27 | /*
28 |  * misc
29 |  */
30 | #ifdef DEBUG
31 | #define debug(x)	printf x
32 | #else
33 | #define debug(x)
34 | #endif
35 | 


--------------------------------------------------------------------------------
/util.c:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #ifdef SDBM
 3 | #include "sdbm.h"
 4 | #else
 5 | #include "ndbm.h"
 6 | #endif
 7 | 
 8 | void
 9 | oops(s1, s2)
10 | register char *s1;
11 | register char *s2;
12 | {
13 | 	extern int errno, sys_nerr;
14 | 	extern char *sys_errlist[];
15 | 	extern char *progname;
16 | 
17 | 	if (progname)
18 | 		fprintf(stderr, "%s: ", progname);
19 | 	fprintf(stderr, s1, s2);
20 | 	if (errno > 0 && errno < sys_nerr)
21 | 		fprintf(stderr, " (%s)", sys_errlist[errno]);
22 | 	fprintf(stderr, "\n");
23 | 	exit(1);
24 | }
25 | 
26 | int
27 | okpage(pag)
28 | char *pag;
29 | {
30 | 	register unsigned n;
31 | 	register off;
32 | 	register short *ino = (short *) pag;
33 | 
34 | 	if ((n = ino[0]) > PBLKSIZ / sizeof(short))
35 | 		return 0;
36 | 
37 | 	if (!n)
38 | 		return 1;
39 | 
40 | 	off = PBLKSIZ;
41 | 	for (ino++; n; ino += 2) {
42 | 		if (ino[0] > off || ino[1] > off ||
43 | 		    ino[1] > ino[0])
44 | 			return 0;
45 | 		off = ino[1];
46 | 		n -= 2;
47 | 	}
48 | 
49 | 	return 1;
50 | }
51 | 


--------------------------------------------------------------------------------