├── .gitignore ├── Makefile ├── README.txt ├── db-bench.c ├── db-data.py ├── db-del.c ├── db-export.c ├── db-get.c ├── db-import.c ├── db-iter.c ├── db-put.c ├── db-server.c ├── db-stat.c ├── db.c ├── db.h ├── hash.c └── hash.h /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.dSYM 3 | 4 | db-get 5 | db-put 6 | db-stat 7 | db-del 8 | db-iter 9 | db-export 10 | db-import 11 | db-bench 12 | db-server 13 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CC = cc 2 | 3 | CFLAGS = -Wall -Werror -Wno-long-long -ansi -pedantic -g 4 | 5 | SRC = hash.c db.c 6 | OBJ = $(SRC:.c=.o) 7 | 8 | UNAME := $(shell uname) 9 | ifeq ($(UNAME), Linux) 10 | CFLAGS += -D_BSD_SOURCE 11 | CFLAGS += -D_GNU_SOURCE 12 | CFLAGS += -D_POSIX_SOURCE 13 | CFLAGS += -D_XOPEN_SOURCE 14 | CFLAGS += -D_POSIX_C_SOURCE=200809L 15 | CFLAGS += -DLINUX 16 | endif 17 | 18 | all: db-put db-get db-del db-iter db-stat db-export db-import db-bench db-server 19 | 20 | db-put: db-put.c $(OBJ) 21 | $(CC) $(CFLAGS) $(LDFLAGS) $^ -o $@ 22 | 23 | db-get: db-get.c $(OBJ) 24 | $(CC) $(CFLAGS) $(LDFLAGS) $^ -o $@ 25 | 26 | db-del: db-del.c $(OBJ) 27 | $(CC) $(CFLAGS) $(LDFLAGS) $^ -o $@ 28 | 29 | db-iter: db-iter.c $(OBJ) 30 | $(CC) $(CFLAGS) $(LDFLAGS) $^ -o $@ 31 | 32 | db-stat: db-stat.c $(OBJ) 33 | $(CC) $(CFLAGS) $(LDFLAGS) $^ -o $@ 34 | 35 | db-export: db-export.c $(OBJ) 36 | $(CC) $(CFLAGS) $(LDFLAGS) $^ -o $@ 37 | 38 | db-import: db-import.c $(OBJ) 39 | $(CC) $(CFLAGS) $(LDFLAGS) $^ -o $@ 40 | 41 | db-bench: db-bench.c $(OBJ) 42 | $(CC) $(CFLAGS) $(LDFLAGS) $^ -o $@ 43 | 44 | db-server: db-server.c $(OBJ) 45 | $(CC) $(CFLAGS) $(LDFLAGS) $^ -o $@ 46 | 47 | .c.o: 48 | $(CC) $(CFLAGS) -c $< -o $@ 49 | 50 | clean: 51 | rm -rf db-put db-get db-del db-iter db-stat db-export db-import db-bench db-server *.o *.dSYM 52 | -------------------------------------------------------------------------------- /README.txt: -------------------------------------------------------------------------------- 1 | A New DBM in Pure C 2 | 3 | 4 | Demo: 5 | ===== 6 | db_t db; 7 | db_option_t option; 8 | 9 | option.table = 256; /* table number,keep this small if data not too much */ 10 | option.bucket = 256; /* initialize bucket number in per table,will incrase when key add */ 11 | option.rdonly = 0; 12 | if (db_open(&db, /* data file */ "foo.db", /* index file */ "foo.db", &option) != DB_OK) { 13 | fprintf(stderr, "open db failed\n"); 14 | return 0; 15 | } 16 | 17 | if (db_put(&db, "hi", strlen(...), "hello,world", strlen(...)) != DB_OK) { 18 | fprintf(stderr, "NOT OK\n"); 19 | } 20 | 21 | if ((len = db_get(&db, "hi", strlen(...), val, sizeof(val))) == 0) { 22 | fprintf(stderr, "NOT FOUND\n"); 23 | } 24 | 25 | db_close(&db); 26 | 27 | 28 | Limited: 29 | ======== 30 | 31 | In 32 bit platform database file size is limited 4GiB* 32 | 33 | Key/Value length is 32 bit unsigned int 34 | 35 | *Depends Your Operation System,Mostly can't get 4GiB map 36 | 37 | 38 | Design: 39 | ======= 40 | 41 | +----------+ 42 | | | 43 | | header | 44 | | | 45 | +----------+ 46 | | table[0] |---------+ 47 | +----------+ | 48 | | table[1] |------------+ 49 | +----------+ | | 50 | | table[2] | | | 51 | +----------+ | | 52 | | . | | | 53 | | . | | | 54 | | . | | | 55 | +----------+ | | 56 | | table[N] | | | 57 | +----------+ | | 58 | +-----| bucket[0]|<--------| | 59 | | +----------+ | 60 | | | bucket[1]| | 61 | | +----------+ | 62 | +--------| bucket[2]| | 63 | | | +----------+ | 64 | | | | . | | 65 | | | | . |<-----------+ 66 | | | | . | 67 | | | +----------+ 68 | | | | bucket[N]| 69 | | | +----------+ 70 | | +---->| klen | 71 | | +----------+ 72 | | | vlen | 73 | | +----------+ 74 | | | . | 75 | | | klen | 76 | | | bytes | 77 | | | . | 78 | | +----------+ 79 | | | . | 80 | | | vlen | 81 | | | bytes | 82 | | | . | 83 | | +----------+ 84 | +------->| klen | 85 | +----------+ 86 | | vlen | 87 | +----------+ 88 | | . | 89 | | klen | 90 | | bytes | 91 | | . | 92 | +----------+ 93 | | . | 94 | | vlen | 95 | | bytes | 96 | | . | 97 | +----------+ 98 | | . | 99 | | . | 100 | | . | 101 | +----------+ 102 | 103 | 104 | Goal: 105 | ===== 106 | 107 | Keep it simple, stupid 108 | 109 | Next Release will has Dynamic Hash Implementation* 110 | And Mmap Maybe not required. 111 | 112 | *Litwin, Witold (1980), "Linear hashing: A new tool for file and table addressing" 113 | 114 | 115 | FAQ: 116 | ==== 117 | 118 | Q: Do you use `mmap'? What if I don't want use `mmap'? 119 | A: Yes.Just use others,there is a lot of key/value database you can choose. 120 | 121 | Q: I tried this library,It's waste to much disk space and memory! 122 | A: I'll write compaction function later,will reduce disk space in high update application,you can use db-export and db-import to a new database file.The future compaction function will do same thing.Memory is control by the kernel,Sorry. 123 | 124 | Q: Compression? 125 | A: Maybe. 126 | 127 | Q: Encryption? 128 | A: Maybe. 129 | 130 | Q: I use this in my Web Server,I have a problem! 131 | A: Please contact the author. 132 | 133 | Q: I use this in my Mobile Phone,I have a problem! 134 | A: Please contact the author. 135 | 136 | Q: I want do X,Will be OK? 137 | A: Just try.If don't,Please contact the author. 138 | 139 | Q: I have a problem! 140 | A: Please contact the author. 141 | 142 | 143 | License: 144 | ======== 145 | 146 | Public Domain License 147 | -------------------------------------------------------------------------------- /db-bench.c: -------------------------------------------------------------------------------- 1 | #include "db.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | int 10 | main(int argc, char *argv[]) 11 | { 12 | int i; 13 | int loop; 14 | 15 | db_t db; 16 | db_option_t option; 17 | 18 | uint8_t key[1024]; 19 | uint8_t val[1024]; 20 | 21 | uint32_t klen, vlen; 22 | 23 | struct timeval tv; 24 | uint64_t start, end; 25 | uint64_t size = 0; 26 | 27 | if (argc != 4) { 28 | fprintf(stderr, "usage: %s [datafile] [indexfile] [loop]\n", argv[0]); 29 | return 0; 30 | } 31 | 32 | option.table = 256; 33 | option.bucket = 256; 34 | option.rdonly = 0; 35 | if (db_open(&db, argv[1], argv[2], &option) != DB_OK) { 36 | fprintf(stderr, "open db %s failed\n", argv[1]); 37 | return 0; 38 | } 39 | 40 | loop = atoi(argv[3]); 41 | 42 | memset(val, 0, sizeof(val)); 43 | 44 | vlen = sizeof(vlen); 45 | gettimeofday(&tv, NULL); 46 | start = tv.tv_sec * 1000 + tv.tv_usec / 1000; 47 | for (i = 0; i < loop; i++) { 48 | klen = sprintf((char *)key, "%016d", i); 49 | vlen = sprintf((char *)val, "%0128d", i); 50 | if (db_put(&db, key, klen, val, vlen) != DB_OK) { 51 | printf("db_put error: %s\n", key); 52 | break; 53 | } 54 | size += klen + vlen; 55 | } 56 | gettimeofday(&tv, NULL); 57 | end = tv.tv_sec * 1000 + tv.tv_usec / 1000; 58 | printf("write: %6.3f MB/s\n", size / 1024.0/1024.0 /(end - start) * 1000); 59 | printf("write: %6.3f Keys/s\n", (float)loop /(end - start) * 1000); 60 | 61 | memset(val, 0, sizeof(val)); 62 | 63 | size = 0; 64 | start = end; 65 | for (i = 0; i < loop; i++) { 66 | klen = sprintf((char *)key, "%016d", i); 67 | vlen = db_get(&db, key, klen, val, sizeof(val)); 68 | size += klen + vlen; 69 | if (vlen == 0) { 70 | printf("db_get error: %s\n", key); 71 | break; 72 | } 73 | } 74 | 75 | gettimeofday(&tv, NULL); 76 | end = tv.tv_sec * 1000 + tv.tv_usec / 1000; 77 | printf("read: %6.3f MB/s\n", size / 1024.0/1024.0 /(end - start) * 1000); 78 | printf("read: %6.3f Keys/s\n", (float)loop /(end - start) * 1000); 79 | 80 | db_close(&db); 81 | 82 | return 0; 83 | } 84 | 85 | -------------------------------------------------------------------------------- /db-data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import sys 4 | import struct 5 | 6 | def main(): 7 | with open(sys.argv[1], 'rb') as file: 8 | data = file.read(40) 9 | magic, version, head, tail, toff, tlen = struct.unpack('IIQQQQ', data) 10 | if toff != 0 and tlen != 0: 11 | file.seek(toff + tlen * 24, 0) # ignore table if has 12 | else: 13 | file.seek(head, 0) 14 | 15 | db = {} 16 | off = 0 17 | while True: 18 | data = file.read(8) 19 | if len(data) != 8: 20 | break 21 | klen, vlen = struct.unpack('II', data) 22 | if klen == 0 or vlen == 0: # not data or deleted data 23 | file.seek(klen + vlen, 1) 24 | continue 25 | key = file.read(klen) 26 | val = file.read(vlen) 27 | if len(key) != klen or len(val) != vlen: 28 | break; 29 | db[key] = val 30 | print '%s: %s' % (key, val) 31 | print len(db) 32 | print tail - head 33 | 34 | if __name__ == '__main__': 35 | main() 36 | -------------------------------------------------------------------------------- /db-del.c: -------------------------------------------------------------------------------- 1 | #include "db.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | int 10 | main(int argc, char *argv[]) 11 | { 12 | db_t db; 13 | db_option_t option; 14 | 15 | if (argc != 4) { 16 | fprintf(stderr, "usage: %s [datafile] [indexfile] [key]\n", argv[0]); 17 | return 0; 18 | } 19 | 20 | option.table = 256; 21 | option.bucket = 256; 22 | option.rdonly = 0; 23 | if (db_open(&db, argv[1], argv[2], &option) != DB_OK) { 24 | fprintf(stderr, "open db %s failed\n", argv[1]); 25 | return 0; 26 | } 27 | 28 | if (db_del(&db, argv[3], strlen(argv[3])) == DB_OK) { 29 | fprintf(stderr, "OK\n"); 30 | } else { 31 | fprintf(stderr, "NOT OK\n"); 32 | } 33 | 34 | db_close(&db); 35 | 36 | return 0; 37 | } 38 | 39 | -------------------------------------------------------------------------------- /db-export.c: -------------------------------------------------------------------------------- 1 | #include "db.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | int 10 | main(int argc, char *argv[]) 11 | { 12 | int i; 13 | db_t db; 14 | db_iter_t iter; 15 | db_option_t option; 16 | 17 | uint32_t item; 18 | uint32_t klen; 19 | uint32_t vlen; 20 | 21 | char key[1024]; 22 | char val[1024]; 23 | 24 | FILE *fp; 25 | 26 | if (argc != 4) { 27 | fprintf(stderr, "usage: %s [datafile] [indexfile] [file]\n", argv[0]); 28 | return 0; 29 | } 30 | 31 | option.rdonly = 1; 32 | if (db_open(&db, argv[1], argv[2], &option) != DB_OK) { 33 | fprintf(stderr, "open db %s failed\n", argv[1]); 34 | return 0; 35 | } 36 | 37 | if (db_iter(&db, &iter, NULL, 0) != DB_OK) { 38 | fprintf(stderr, "iter db %s failed\n", argv[1]); 39 | return 0; 40 | } 41 | 42 | fp = fopen(argv[3], "w"); 43 | if (fp == NULL) { 44 | fprintf(stderr, "open file %s failed\n", argv[3]); 45 | return 0; 46 | } 47 | 48 | item = 0; 49 | klen = sizeof(key); 50 | vlen = sizeof(val); 51 | while (db_iter_next(&db, &iter, key, &klen, val, &vlen) == DB_OK) { 52 | for (i = 0; i < klen; i++) 53 | fprintf(fp, "%02x", key[i]); 54 | fprintf(fp, "\n"); 55 | for (i = 0; i < vlen; i++) 56 | fprintf(fp, "%02x", val[i]); 57 | fprintf(fp, "\n"); 58 | 59 | klen = sizeof(key); 60 | vlen = sizeof(val); 61 | item += 1; 62 | } 63 | 64 | printf("%u", item); 65 | 66 | fclose(fp); 67 | db_close(&db); 68 | 69 | return 0; 70 | } 71 | 72 | -------------------------------------------------------------------------------- /db-get.c: -------------------------------------------------------------------------------- 1 | #include "db.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | int 10 | main(int argc, char *argv[]) 11 | { 12 | db_t db; 13 | db_option_t option; 14 | 15 | char val[1024]; 16 | uint32_t vlen; 17 | 18 | if (argc != 4) { 19 | fprintf(stderr, "usage: %s [databfile] [indexfile] [key]\n", argv[0]); 20 | return 0; 21 | } 22 | 23 | option.rdonly = 1; 24 | if (db_open(&db, argv[1], argv[2], &option) != DB_OK) { 25 | fprintf(stderr, "open db %s failed\n", argv[1]); 26 | return 0; 27 | } 28 | 29 | if ((vlen = db_get(&db, argv[3], strlen(argv[3]), val, sizeof(val))) != 0) { 30 | fwrite(val, sizeof(char), vlen, stdout); 31 | } else { 32 | fprintf(stderr, "NOT FOUND\n"); 33 | } 34 | 35 | db_close(&db); 36 | 37 | return 0; 38 | } 39 | 40 | -------------------------------------------------------------------------------- /db-import.c: -------------------------------------------------------------------------------- 1 | #include "db.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | int 10 | main(int argc, char *argv[]) 11 | { 12 | int i; 13 | db_t db; 14 | db_option_t option; 15 | 16 | uint32_t item; 17 | uint32_t klen; 18 | uint32_t vlen; 19 | 20 | char key[1024]; 21 | char val[1024]; 22 | 23 | FILE *fp; 24 | 25 | if (argc != 4) { 26 | fprintf(stderr, "usage: %s [datafile] [indexfile] [file]\n", argv[0]); 27 | return 0; 28 | } 29 | 30 | option.table = 256; 31 | option.bucket = 256; 32 | option.rdonly = 0; 33 | if (db_open(&db, argv[1], argv[2], &option) != DB_OK) { 34 | fprintf(stderr, "open db %s failed\n", argv[1]); 35 | return 0; 36 | } 37 | 38 | fp = fopen(argv[3], "r"); 39 | if (fp == NULL) { 40 | fprintf(stderr, "open file %s failed\n", argv[3]); 41 | return 0; 42 | } 43 | 44 | item = 0; 45 | klen = 0; 46 | vlen = 0; 47 | while (!feof(fp)) { 48 | char *line = NULL; 49 | size_t linecap = 0; 50 | ssize_t linelen; 51 | 52 | memset(key, 0, sizeof(key)); 53 | if ((linelen = getline(&line, &linecap, fp)) > 0) { 54 | linelen -= linelen % 2; 55 | for (i = 0; i < linelen; i+=2) { 56 | char str[3]; 57 | str[2] = '\0'; 58 | memcpy(str, line + i, 2); 59 | key[klen++] = strtol(str, NULL, 16); 60 | } 61 | } 62 | line = NULL; 63 | linecap = 0; 64 | memset(val, 0, sizeof(val)); 65 | if ((linelen = getline(&line, &linecap, fp)) > 0) { 66 | linelen -= linelen % 2; 67 | for (i = 0; i < linelen; i+=2) { 68 | char str[3]; 69 | str[2] = '\0'; 70 | memcpy(str, line + i, 2); 71 | val[vlen++] = strtol(str, NULL, 16); 72 | } 73 | } 74 | if (klen) { 75 | if (db_put(&db, key, klen, val, vlen) == DB_OK) { 76 | item += 1; 77 | } 78 | } 79 | klen = 0; 80 | vlen = 0; 81 | } 82 | 83 | printf("%u", item); 84 | 85 | fclose(fp); 86 | db_close(&db); 87 | 88 | return 0; 89 | } 90 | 91 | -------------------------------------------------------------------------------- /db-iter.c: -------------------------------------------------------------------------------- 1 | #include "db.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | int 10 | main(int argc, char *argv[]) 11 | { 12 | db_t db; 13 | db_iter_t iter; 14 | db_option_t option; 15 | 16 | char key[1024]; 17 | char val[1024]; 18 | uint32_t klen, vlen; 19 | uint64_t len; 20 | 21 | if (argc != 3) { 22 | fprintf(stderr, "usage: %s [datafile] [indexfile]\n", argv[0]); 23 | return 0; 24 | } 25 | 26 | option.rdonly = 1; 27 | if (db_open(&db, argv[1], argv[2], &option) != DB_OK) { 28 | fprintf(stderr, "open db %s failed\n", argv[1]); 29 | return 0; 30 | } 31 | 32 | if (db_iter(&db, &iter, NULL, 0) != DB_OK) { 33 | fprintf(stderr, "iter db %s failed\n", argv[1]); 34 | return 0; 35 | } 36 | 37 | len = 0; 38 | klen = sizeof(key); 39 | vlen = sizeof(val); 40 | while (db_iter_next(&db, &iter, key, &klen, val, &vlen) == DB_OK) { 41 | char buf[1024]; 42 | if (klen + vlen + 3 > sizeof(buf)) { 43 | if (klen > 510) 44 | klen = 510; 45 | if (vlen > 510) 46 | vlen = 510; 47 | } 48 | memcpy(buf, key, klen); 49 | memcpy(buf + klen, ": ", 2); 50 | memcpy(buf + klen + 2, val, vlen); 51 | memcpy(buf + klen + 2 + vlen, "\n", 1); 52 | fwrite(buf, sizeof(char), klen + vlen + 3, stdout); 53 | klen = sizeof(key); 54 | vlen = sizeof(val); 55 | len++; 56 | } 57 | 58 | printf("len: %llu\n", (long long int)len); 59 | db_close(&db); 60 | 61 | return 0; 62 | } 63 | 64 | -------------------------------------------------------------------------------- /db-put.c: -------------------------------------------------------------------------------- 1 | #include "db.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | int 10 | main(int argc, char *argv[]) 11 | { 12 | db_t db; 13 | db_option_t option; 14 | 15 | char *val; 16 | size_t vlen = 0; 17 | 18 | if (argc != 4 && argc != 5) { 19 | fprintf(stderr, "usage: %s [datafile] [indexfile] [key] [val]\n", argv[0]); 20 | fprintf(stderr, "or : %s [datafile] [indexfile] [key] < [file]\n", argv[0]); 21 | return 0; 22 | } 23 | 24 | option.table = 256; 25 | option.bucket = 256; 26 | option.rdonly = 0; 27 | if (db_open(&db, argv[1], argv[2], &option) != DB_OK) { 28 | fprintf(stderr, "open db %s failed\n", argv[1]); 29 | return 0; 30 | } 31 | 32 | if (argc == 4) { 33 | int n = 0; 34 | vlen = 4096; 35 | val = malloc(vlen); 36 | while ((val[n++] = getchar()) != EOF) { 37 | if (n >= vlen) { 38 | vlen *= 2; 39 | val = realloc(val, vlen); 40 | } 41 | } 42 | vlen = n - 1; 43 | } else { 44 | vlen = strlen(argv[4]); 45 | val = malloc(vlen); 46 | memcpy(val, argv[4], vlen); 47 | } 48 | 49 | if (db_put(&db, argv[3], strlen(argv[3]), val, vlen) == DB_OK) { 50 | fprintf(stderr, "OK\n"); 51 | } else { 52 | fprintf(stderr, "NOT OK\n"); 53 | } 54 | 55 | db_close(&db); 56 | 57 | return 0; 58 | } 59 | 60 | -------------------------------------------------------------------------------- /db-server.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | #include 15 | 16 | #include "db.h" 17 | 18 | #define PORT "11211" 19 | 20 | typedef socklen_t sl_t; 21 | typedef struct sockaddr sa_t; 22 | typedef struct sockaddr_in si_t; 23 | typedef struct sockaddr_in6 s6_t; 24 | typedef struct sockaddr_storage ss_t; 25 | 26 | typedef struct sbuf { 27 | int max; 28 | int off; 29 | int len; 30 | char *buf; 31 | } sbuf_t; 32 | 33 | typedef struct conn { 34 | sbuf_t in; 35 | sbuf_t out; 36 | } conn_t; 37 | 38 | conn_t conn[FD_SETSIZE]; /* FD_SETSIZE not a big number anyway */ 39 | 40 | #define INBUF_LEN (1 << 26) /* 64MB for read request */ 41 | #define OUTBUF_LEN (1 << 26) /* 64MB for send response */ 42 | 43 | #define VALBUF_LEN (1 << 26) /* 64MB for read database */ 44 | 45 | char *inbuf; 46 | char *outbuf; 47 | 48 | char *valbuf; 49 | 50 | enum {HANDLE_CLOSE, HANDLE_FINISH, HANDLE_NEEDMOREIN, HANDLE_NEEDMOREOUT}; 51 | 52 | #define ARGC_MAX 6 53 | #define ARGV_MAX 1024 /* also max key size */ 54 | 55 | ssize_t 56 | sbuf_send(const int fd, sbuf_t *buf, ssize_t *snd) 57 | { 58 | ssize_t len; 59 | 60 | assert(buf->len <= buf->max); 61 | assert(buf->off < buf->len); 62 | 63 | *snd = 0; 64 | do { 65 | len = send(fd, buf->buf + buf->off, buf->len - buf->off, 0); 66 | if (len > 0) { 67 | *snd += len; 68 | buf->off += len; 69 | } 70 | } while (buf->off > 0 && buf->off < buf->len && len > 0); 71 | 72 | if (len == -1 && errno == EWOULDBLOCK) 73 | len = 1; /* don't error when recv block */ 74 | 75 | return len; /* err */ 76 | } 77 | 78 | ssize_t 79 | sbuf_recv(const int fd, sbuf_t *buf, ssize_t *rcv) 80 | { 81 | ssize_t len; 82 | 83 | assert(buf->len < buf->max); 84 | 85 | *rcv = 0; 86 | do { 87 | len = recv(fd, buf->buf + buf->len, buf->max - buf->len, 0); 88 | if (len > 0) { 89 | *rcv += len; 90 | buf->len += len; 91 | } 92 | } while (buf->len > 0 && buf->len < buf->max && len > 0); 93 | 94 | if (len == -1 && errno == EWOULDBLOCK) 95 | len = 1; /* don't error when recv block */ 96 | 97 | return len; /* err */ 98 | } 99 | 100 | void 101 | sbuf_allocate(sbuf_t *buf, char *data, const ssize_t size) 102 | { 103 | buf->max = size; 104 | buf->len = 0; 105 | buf->off = 0; 106 | buf->buf = data; 107 | } 108 | 109 | void 110 | sbuf_release(sbuf_t *buf) 111 | { 112 | buf->max = 0; 113 | buf->len = 0; 114 | buf->off = 0; 115 | buf->buf = NULL; 116 | } 117 | 118 | int 119 | keylen(const char *key, const int maxlen) 120 | { 121 | char *p = (char *)memchr(key, ' ', maxlen); 122 | if (p == NULL) 123 | p = (char *)memchr(key, '\r', maxlen); 124 | if (p == NULL) 125 | return 0; 126 | return p - key; 127 | } 128 | 129 | int 130 | argparse(const char *buf, const ssize_t buflen, 131 | int *argc, char argv[][ARGV_MAX], const int max) 132 | { 133 | int i; 134 | 135 | int p; 136 | int c; 137 | 138 | int arg; 139 | int len; 140 | 141 | assert(buflen > 0); 142 | 143 | p = 0; 144 | arg = 0; 145 | len = 0; 146 | for (i = 0; i < buflen; i++) { 147 | c = buf[i]; 148 | 149 | if (len + 1 > ARGV_MAX) /* 1 for '\0' */ 150 | goto err; 151 | 152 | switch (c) { 153 | case '\n': 154 | if (p != '\r') 155 | goto err; 156 | goto out; 157 | case '\r': 158 | case ' ': 159 | memcpy(argv[arg], buf + i - len, len); 160 | argv[arg][len] = '\0'; 161 | if (arg == max) 162 | goto out; 163 | 164 | len = 0; 165 | arg += 1;; 166 | break; 167 | default: 168 | len++; 169 | } 170 | 171 | p = c; 172 | } 173 | 174 | return 0; 175 | out: 176 | *argc = arg; 177 | return (i + 1); 178 | err: 179 | return -1; 180 | } 181 | 182 | int 183 | handle(const int fd, db_t *db, sbuf_t *in, sbuf_t *out) 184 | { 185 | ssize_t err; 186 | ssize_t len; 187 | 188 | char *key; 189 | uint32_t klen; 190 | uint32_t vlen; 191 | 192 | int argc; 193 | int arglen; 194 | char argv[ARGC_MAX][ARGV_MAX]; 195 | 196 | if ((err = sbuf_recv(fd, in, &len)) <= 0 && len == 0) { 197 | if (err == -1) { 198 | fprintf(stdout, "db-server: socket %d recv %s\n", fd, strerror(errno)); 199 | } 200 | return HANDLE_CLOSE; 201 | } 202 | 203 | arglen = argparse(in->buf, in->len, &argc, argv, ARGC_MAX); 204 | if (argc < 1 || arglen == -1) { 205 | fprintf(stderr, "db-server: socket %d malformed request\n", fd); 206 | 207 | return HANDLE_CLOSE; 208 | } 209 | 210 | if (strcmp(argv[0], "set") == 0 && argc >= 5) { 211 | key = argv[1]; 212 | klen = strlen(key); 213 | 214 | vlen = atoi(argv[4]); 215 | if (vlen + arglen > in->max) { 216 | fprintf(stderr, "db-server: socket %d too large value\n", fd); 217 | 218 | return HANDLE_CLOSE; 219 | } 220 | 221 | if (vlen + arglen > in->len) { 222 | return HANDLE_NEEDMOREIN; 223 | } 224 | 225 | if (db_put(db, key, klen, in->buf + arglen, vlen) == DB_OK) { 226 | out->len = sprintf(out->buf, "STORED\r\n"); 227 | } else { 228 | out->len = sprintf(out->buf, "ERROR\r\n"); 229 | } 230 | } else if (strcmp(argv[0], "get") == 0 && argc >= 2) { 231 | key = argv[1]; 232 | klen = strlen(key); 233 | 234 | if ((vlen = db_get(db, key, klen, valbuf, VALBUF_LEN)) != 0) { 235 | if (vlen > VALBUF_LEN) 236 | return HANDLE_CLOSE; 237 | 238 | out->len = snprintf(out->buf, out->max, 239 | "VALUE %.*s %d %d\r\n%.*s\r\nEND\r\n", 240 | klen, key, 0, vlen, vlen, valbuf); 241 | } else { 242 | out->len = sprintf(out->buf, "END\r\n"); 243 | } 244 | } else if (strcmp(argv[0], "delete") == 0 && argc >= 2) { 245 | key = argv[1]; 246 | klen = strlen(key); 247 | 248 | if ((db_del(db, key, klen)) != 0) { 249 | out->len = snprintf(out->buf, out->max, "DELETED\r\n"); 250 | } else { 251 | out->len = snprintf(out->buf, out->max, "NOT_FOUND\r\n"); 252 | } 253 | } else { 254 | return HANDLE_CLOSE; 255 | } 256 | 257 | if ((err = sbuf_send(fd, out, &len)) <= 0 && len == 0) { 258 | if (err == -1) { 259 | fprintf(stdout, "db-server: socket %d send %s\n", fd, strerror(errno)); 260 | } 261 | return HANDLE_CLOSE; 262 | } 263 | 264 | if (out->off < out->len) { 265 | return HANDLE_NEEDMOREOUT; 266 | } 267 | 268 | return HANDLE_FINISH; 269 | } 270 | 271 | int 272 | handle_write(const int fd, fd_set *readfds, fd_set *writefds) 273 | { 274 | sbuf_t *in; 275 | sbuf_t *out; 276 | 277 | ssize_t err; 278 | ssize_t len; 279 | 280 | in = &conn[fd].in; 281 | out = &conn[fd].out; 282 | 283 | if (out->buf == NULL || out->len == 0 || out->off >= out->len) { 284 | FD_CLR(fd, writefds); 285 | 286 | return 0; 287 | } 288 | 289 | if ((err = sbuf_send(fd, out, &len)) <= 0 && len == 0) { 290 | FD_CLR(fd, writefds); 291 | 292 | return 0; 293 | } 294 | if (out->off == out->len) { 295 | inbuf = in->buf; 296 | outbuf = out->buf; 297 | 298 | sbuf_release(in); 299 | sbuf_release(out); 300 | } 301 | return 0; 302 | } 303 | 304 | int 305 | handle_read(const int fd, db_t *db, fd_set *readfds, fd_set *writefds) 306 | { 307 | sbuf_t *in; 308 | sbuf_t *out; 309 | 310 | ssize_t err; 311 | 312 | in = &conn[fd].in; 313 | out = &conn[fd].out; 314 | 315 | if (in->buf == NULL) { 316 | if (inbuf == NULL) { 317 | inbuf = malloc(INBUF_LEN); 318 | } 319 | sbuf_allocate(in, inbuf, INBUF_LEN); 320 | 321 | inbuf = NULL; 322 | } 323 | if (out->buf == NULL) { 324 | if (outbuf == NULL) { 325 | outbuf = malloc(OUTBUF_LEN); 326 | } 327 | sbuf_allocate(out, outbuf, OUTBUF_LEN); 328 | 329 | outbuf = NULL; 330 | } 331 | 332 | err = handle(fd, db, in, out); 333 | 334 | if (err == HANDLE_FINISH) { 335 | if (inbuf == NULL) { 336 | inbuf = in->buf; 337 | in->buf = NULL; 338 | } 339 | if (outbuf == NULL) { 340 | outbuf = out->buf; 341 | out->buf = NULL; 342 | } 343 | 344 | free(in->buf); 345 | free(out->buf); 346 | 347 | sbuf_release(in); 348 | sbuf_release(out); 349 | 350 | return 0; 351 | } 352 | 353 | if (err == HANDLE_NEEDMOREIN) { 354 | return 0; /* do nothing */ 355 | } 356 | 357 | if (err == HANDLE_NEEDMOREOUT) { 358 | FD_SET(fd, writefds); /* writer buf full need write event */ 359 | 360 | return 0; /* do nothing */ 361 | } 362 | 363 | if (err == HANDLE_CLOSE) { 364 | fprintf(stdout, "db-server: socket %d close\n", fd); 365 | 366 | if (inbuf == NULL) { 367 | inbuf = in->buf; 368 | in->buf = NULL; 369 | } 370 | if (outbuf == NULL) { 371 | outbuf = out->buf; 372 | out->buf = NULL; 373 | } 374 | 375 | free(in->buf); 376 | free(out->buf); 377 | 378 | sbuf_release(in); 379 | sbuf_release(out); 380 | 381 | close(fd); 382 | FD_CLR(fd, readfds); 383 | FD_CLR(fd, writefds); 384 | 385 | return -1; 386 | } 387 | 388 | return 0; 389 | } 390 | 391 | int 392 | handle_accept(const int fd, fd_set *readfds, fd_set *writefds) 393 | { 394 | ss_t addr; 395 | sl_t addrlen; 396 | char addrstr[INET6_ADDRSTRLEN]; 397 | int acceptfd; 398 | 399 | void *in_addr; 400 | 401 | addrlen = sizeof(addr); 402 | acceptfd = accept(fd, (sa_t *)&addr, &addrlen); 403 | 404 | if (acceptfd == -1) { 405 | fprintf(stdout, "db-server: accept: %s\n", strerror(errno)); 406 | 407 | return -1; 408 | } 409 | 410 | if (acceptfd >= FD_SETSIZE) { 411 | fprintf(stdout, "db-server: socket %d closed,can't take more fd\n", fd); 412 | 413 | close(acceptfd); 414 | 415 | return -1; 416 | } 417 | 418 | 419 | if (fcntl(acceptfd, F_SETFL, O_NONBLOCK) == -1) { 420 | fprintf(stdout, "db-server: socket %d fcntl NONBLOCK: %s\n", fd, strerror(errno)); 421 | 422 | close(acceptfd); 423 | 424 | return -1; 425 | } 426 | 427 | if (addr.ss_family == AF_INET) { 428 | in_addr = &((si_t *)&addr)->sin_addr; 429 | } else if (addr.ss_family == AF_INET6) { 430 | in_addr = &((s6_t *)&addr)->sin6_addr; 431 | } else { 432 | in_addr = NULL; 433 | } 434 | if (inet_ntop(addr.ss_family, in_addr, addrstr, sizeof(addrstr)) == NULL) { 435 | fprintf(stdout, "db-server: socket %d unknown address family\n", fd); 436 | } 437 | printf("db-server: new connection from %s on socket %d\n", addrstr, acceptfd); 438 | 439 | FD_SET(acceptfd, readfds); 440 | 441 | return acceptfd; 442 | } 443 | 444 | int 445 | main(int argc, char *argv[]) 446 | { 447 | int err; 448 | int nfds; 449 | int socketfd; 450 | 451 | db_t db; 452 | db_option_t option; 453 | 454 | fd_set readfds; 455 | fd_set writefds; 456 | 457 | char *dbfilename; 458 | char *idxfilename; 459 | 460 | struct addrinfo hints, *ai, *p; 461 | 462 | if (argc == 2) { 463 | dbfilename = argv[1]; 464 | idxfilename = NULL; 465 | } else if (argc != 3) { 466 | dbfilename = argv[1]; 467 | idxfilename = argv[2]; 468 | } else { 469 | fprintf(stderr, "usage: %s dbfile [indexfile]\n", argv[0]); 470 | 471 | return 0; 472 | } 473 | 474 | memset(&hints, 0, sizeof(hints)); 475 | 476 | hints.ai_family = AF_UNSPEC; 477 | hints.ai_socktype = SOCK_STREAM; 478 | hints.ai_flags = AI_PASSIVE; 479 | 480 | if ((err = getaddrinfo(NULL, PORT, &hints, &ai)) != 0) { 481 | fprintf(stderr, "db-server: getaddrinfo: %s\n", gai_strerror(err)); 482 | 483 | exit(1); 484 | } 485 | 486 | for (p = ai; p != NULL; p = p->ai_next) { 487 | int optval = 1; 488 | 489 | socketfd = socket(p->ai_family, p->ai_socktype, p->ai_protocol); 490 | if (socketfd == -1) { 491 | fprintf(stderr, "db-server: socket: %s\n", gai_strerror(err)); 492 | 493 | continue; 494 | } 495 | if (fcntl(socketfd, F_SETFL, O_NONBLOCK) == -1) { 496 | fprintf(stderr, "db-server: fcntl NONBLOCK: %s\n", strerror(errno)); 497 | 498 | continue; 499 | } 500 | 501 | if (setsockopt(socketfd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval)) == -1) { 502 | fprintf(stderr, "db-server: setsockopt REUSEADDR: %s\n", strerror(errno)); 503 | 504 | continue; 505 | } 506 | 507 | if (bind(socketfd, p->ai_addr, p->ai_addrlen) < 0) { 508 | fprintf(stderr, "db-server: bind: %s\n", strerror(errno)); 509 | 510 | close(socketfd); 511 | 512 | continue; 513 | } 514 | break; 515 | } 516 | 517 | if (p == NULL) { 518 | fprintf(stderr, "db-server: failed to bind: %s\n", PORT); 519 | exit(1); 520 | } 521 | 522 | freeaddrinfo(ai); 523 | 524 | option.table = 256; 525 | option.bucket = 256; 526 | option.rdonly = 0; 527 | if (db_open(&db, dbfilename, idxfilename, &option) != DB_OK) { 528 | fprintf(stderr, "db-server: open db %s failed\n", dbfilename); 529 | 530 | exit(0); 531 | } 532 | 533 | if (listen(socketfd, 32) == -1) { 534 | fprintf(stderr, "db-server: listen: %s\n", strerror(errno)); 535 | 536 | exit(1); 537 | } 538 | 539 | FD_ZERO(&readfds); 540 | FD_ZERO(&writefds); 541 | 542 | nfds = socketfd; 543 | FD_SET(socketfd, &readfds); 544 | 545 | inbuf = malloc(INBUF_LEN); 546 | outbuf = malloc(OUTBUF_LEN); 547 | valbuf = malloc(VALBUF_LEN); 548 | 549 | for (;;) { 550 | int n; 551 | int fd; 552 | fd_set readfds_; 553 | fd_set writefds_; 554 | 555 | FD_ZERO(&readfds_); 556 | FD_ZERO(&writefds_); 557 | 558 | #ifdef LINUX 559 | memcpy(&readfds_, &readfds, sizeof(readfds)); 560 | memcpy(&writefds_, &writefds, sizeof(writefds)); 561 | #else 562 | FD_COPY(&readfds, &readfds_); 563 | FD_COPY(&writefds, &writefds_); 564 | #endif 565 | 566 | if ((n = select(nfds + 1, &readfds_, &writefds_, NULL, NULL)) == -1) { 567 | fprintf(stderr, "db-server: select: %s\n", strerror(errno)); 568 | 569 | exit(1); 570 | } 571 | 572 | for (fd = 0; fd <= nfds && n > 0; fd++) { 573 | if (FD_ISSET(fd, &writefds_)) { 574 | n--; 575 | handle_write(fd, &readfds, &writefds); 576 | } 577 | 578 | if (FD_ISSET(fd, &readfds_)) { 579 | n--; 580 | if (fd == socketfd) { 581 | int acceptfd; 582 | 583 | acceptfd = handle_accept(fd, &readfds, &writefds); 584 | if (acceptfd > nfds) { 585 | nfds = acceptfd; 586 | } 587 | } else { 588 | err = handle_read(fd, &db, &readfds, &writefds); 589 | if (err == -1 && fd >= nfds) { 590 | nfds = fd; 591 | while (FD_ISSET(nfds, &readfds) == 0 && 592 | FD_ISSET(nfds, &writefds) == 0) 593 | { 594 | nfds--; 595 | } 596 | } 597 | } 598 | } 599 | } 600 | } 601 | free(inbuf); 602 | free(outbuf); 603 | free(valbuf); 604 | 605 | return 0; 606 | } 607 | -------------------------------------------------------------------------------- /db-stat.c: -------------------------------------------------------------------------------- 1 | #include "db.h" 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | int 10 | main(int argc, char *argv[]) 11 | { 12 | db_t db; 13 | db_stat_t stat; 14 | db_option_t option; 15 | 16 | if (argc != 3) { 17 | fprintf(stderr, "usage: %s [datafile] [indexfile]\n", argv[0]); 18 | return 0; 19 | } 20 | 21 | option.rdonly = 1; 22 | if (db_open(&db, argv[1], argv[2], &option) != DB_OK) { 23 | fprintf(stderr, "open db %s failed\n", argv[1]); 24 | return 0; 25 | } 26 | 27 | if (db_stat(&db, &stat) != DB_OK) { 28 | fprintf(stderr, "db_stat error\n"); 29 | } 30 | 31 | printf("db_file_size: %llu\n", (long long int)stat.db_file_size); 32 | printf("db_table_max: %llu\n", (long long int)stat.db_table_max); 33 | printf("db_table_min: %llu\n", (long long int)stat.db_table_min); 34 | printf("db_table_total: %llu\n", (long long int)stat.db_table_total); 35 | printf("db_table_size: %llu\n", (long long int)stat.db_table_size); 36 | printf("db_bucket_total: %llu\n", (long long int)stat.db_bucket_total); 37 | printf("db_bucket_size: %llu\n", (long long int)stat.db_bucket_size); 38 | printf("db_data_size: %llu\n", (long long int)stat.db_data_size); 39 | 40 | db_close(&db); 41 | 42 | return 0; 43 | } 44 | 45 | -------------------------------------------------------------------------------- /db.c: -------------------------------------------------------------------------------- 1 | /* 2 | * db.c was written by WEI Zhicheng, and is placed in the public domain. 3 | * The author hereby disclaims copyright to this source code. 4 | */ 5 | 6 | #include "db.h" 7 | #include "hash.h" 8 | 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | 18 | #define DB_MAGIC 0x00004244 19 | #define DB_MAGIC_INDEX 0x58494244 20 | #define DB_MAGIC_DATA 0x54444244 21 | #define DB_VERSION 2 22 | 23 | #define PAGE_ALIGN(ptr,pgsz) \ 24 | ((char *)(ptr) - (((char *)(ptr) - (char *)NULL) & ((pgsz) - 1))) 25 | 26 | static int 27 | db_file_open(db_file_t *file, const char *filename, int rdonly) 28 | { 29 | if (rdonly) 30 | file->fd = open(filename, O_RDONLY, 0644); 31 | else 32 | file->fd = open(filename, O_RDWR | O_CREAT, 0644); 33 | 34 | if (file->fd == -1) 35 | return DB_SYS_ERROR; 36 | 37 | file->rdonly = rdonly; 38 | 39 | return DB_OK; 40 | } 41 | 42 | static size_t 43 | db_file_size(db_file_t *file) 44 | { 45 | struct stat stat; 46 | 47 | if (fstat(file->fd, &stat) == -1) 48 | return 0; 49 | return stat.st_size; 50 | } 51 | 52 | static int 53 | db_file_resize(db_file_t *file, size_t size) 54 | { 55 | assert(!file->rdonly); 56 | 57 | if (ftruncate(file->fd, size) == -1) 58 | return DB_SYS_ERROR; 59 | return DB_OK; 60 | } 61 | 62 | static int 63 | db_file_read(db_file_t *file, void *buf, off_t off, size_t len) 64 | { 65 | assert(off + len <= file->buflen); 66 | 67 | memcpy(buf, (uint8_t *)file->buf + off, len); 68 | return len; 69 | } 70 | 71 | static int 72 | db_file_write(db_file_t *file, const void *buf, off_t off, size_t len) 73 | { 74 | assert(!file->rdonly); 75 | assert(off + len <= file->buflen); 76 | 77 | memcpy((uint8_t *)file->buf + off, buf, len); 78 | return len; 79 | } 80 | 81 | static int 82 | db_file_compare(db_file_t *file, const void *buf, off_t off, size_t len) 83 | { 84 | assert(off + len <= file->buflen); 85 | 86 | return memcmp((uint8_t *)file->buf + off, buf, len); 87 | } 88 | 89 | static int 90 | db_file_sync(db_file_t *file, off_t off, size_t len) 91 | { 92 | void *ptr; 93 | 94 | assert(file); 95 | assert(!file->rdonly); 96 | 97 | ptr = (uint8_t *)file->buf + off; 98 | if (msync(PAGE_ALIGN(ptr, file->pgsz), len, MS_SYNC) == -1) { 99 | return DB_SYS_ERROR; 100 | } 101 | return DB_OK; 102 | } 103 | 104 | static int 105 | db_file_mmap(db_file_t *file) 106 | { 107 | int prot; 108 | int flags; 109 | 110 | assert(file); 111 | 112 | if (file->buf != NULL) { 113 | int error; 114 | if (!file->rdonly && 115 | (error = db_file_sync(file, 0, file->buflen)) != DB_OK) 116 | return error; 117 | if (munmap(file->buf, file->buflen) == -1) 118 | return DB_SYS_ERROR; 119 | } 120 | 121 | file->size = db_file_size(file); 122 | 123 | if (file->rdonly) { 124 | prot = PROT_READ; 125 | flags = MAP_PRIVATE; 126 | } else { 127 | prot = PROT_READ | PROT_WRITE; 128 | flags = MAP_SHARED; 129 | } 130 | file->buf = mmap(NULL, file->size, prot, flags, file->fd, 0); 131 | if (file->buf == MAP_FAILED) 132 | return DB_SYS_ERROR; 133 | file->buflen = file->size; 134 | 135 | file->header = file->buf; 136 | 137 | return DB_OK; 138 | } 139 | 140 | enum {DB_FILE_ADVISE_UNLIKELY = MADV_WILLNEED, 141 | DB_FILE_ADVISE_LIKELY = MADV_WILLNEED}; 142 | 143 | static int 144 | db_file_advise(db_file_t *file, off_t off, size_t len, int advise) 145 | { 146 | void *ptr; 147 | 148 | ptr = PAGE_ALIGN((uint8_t *)file->buf + off, file->pgsz); 149 | if (madvise(PAGE_ALIGN(ptr, file->pgsz), len, advise) == -1) 150 | return DB_SYS_ERROR; 151 | return DB_OK; 152 | } 153 | 154 | #define db_file_likely(file,off,len) \ 155 | db_file_advise((file), (off), (len), DB_FILE_ADVISE_LIKELY) 156 | 157 | #define db_file_unlikely(file,off,len) \ 158 | db_file_advise((file), (off), (len), DB_FILE_ADVISE_LIKELY) 159 | 160 | 161 | static uint64_t 162 | db_file_alloc(db_file_t *file, uint64_t len) 163 | { 164 | int error; 165 | uint64_t off; 166 | 167 | assert(!file->rdonly); 168 | 169 | if ((file->header->data_tail + len) > file->size) { 170 | 171 | /* Changeable,time and space tradeoff */ 172 | size_t newsize = (file->header->data_tail + len) * 2; 173 | 174 | if ((error = db_file_resize(file, newsize)) != DB_OK) { 175 | file->db->db_error = error; 176 | return 0; 177 | } 178 | if ((error = db_file_mmap(file)) != DB_OK) { 179 | file->db->db_error = error; 180 | return 0; 181 | } 182 | } 183 | 184 | off = file->header->data_tail; 185 | file->header->data_tail += len; 186 | return off; 187 | } 188 | 189 | static uint64_t 190 | db_file_calloc(db_file_t *file, uint64_t len) 191 | { 192 | uint64_t off; 193 | 194 | off = db_file_alloc(file, len); 195 | if (off != 0) 196 | memset((uint8_t *)file->buf + off, 0, len); 197 | 198 | return off; 199 | } 200 | 201 | static int 202 | db_file_init(db_file_t *file, size_t size) 203 | { 204 | int error; 205 | 206 | file->pgsz = sysconf(_SC_PAGESIZE); 207 | 208 | if ((size > db_file_size(file)) && !file->rdonly) 209 | if ((error = db_file_resize(file, size)) != DB_OK) 210 | return error; 211 | 212 | if ((error = db_file_mmap(file)) != DB_OK) 213 | return error; 214 | 215 | return DB_OK; 216 | } 217 | 218 | static int 219 | db_file_close(db_file_t *file) 220 | { 221 | if (!file->rdonly && msync(file->buf, file->buflen, MS_SYNC) == -1) 222 | return DB_SYS_ERROR; 223 | 224 | if (munmap(file->buf, file->buflen) == -1) 225 | return DB_SYS_ERROR; 226 | 227 | if (close(file->fd) == -1) 228 | return DB_SYS_ERROR; 229 | return DB_OK; 230 | } 231 | 232 | static int 233 | db_table_read(db_t *db, db_table_t *table, uint64_t off) 234 | { 235 | return db_file_read(db->db_index, table, 236 | db->db_index->header->table_off + off * sizeof(db_table_t), 237 | sizeof(db_table_t)); 238 | } 239 | 240 | static int 241 | db_table_write(db_t *db, db_table_t *table, uint64_t off) 242 | { 243 | return db_file_write(db->db_index, table, 244 | db->db_index->header->table_off + off * sizeof(db_table_t), 245 | sizeof(db_table_t)); 246 | } 247 | 248 | static int 249 | db_bucket_read(db_t *db, db_table_t *table, db_bucket_t *bucket, uint64_t off) 250 | { 251 | return db_file_read(db->db_index, bucket, 252 | table->bucket_off + off * sizeof(db_bucket_t), 253 | sizeof(db_bucket_t)); 254 | } 255 | 256 | static int 257 | db_bucket_write(db_t *db, db_table_t *table, db_bucket_t *bucket, uint64_t off) 258 | { 259 | return db_file_write(db->db_index, bucket, 260 | table->bucket_off + off * sizeof(db_bucket_t), 261 | sizeof(db_bucket_t)); 262 | } 263 | 264 | static int 265 | db_table_resize(db_t *db, uint64_t table_off, uint64_t bucket_per_table) 266 | { 267 | uint64_t i; 268 | uint64_t off; 269 | 270 | uint32_t klen; 271 | uint32_t vlen; 272 | 273 | db_table_t old_table; 274 | db_table_t new_table; 275 | 276 | vlen = bucket_per_table * sizeof(db_bucket_t); 277 | off = db_file_calloc(db->db_index, vlen + sizeof(klen) + sizeof(vlen)); 278 | 279 | if (off == 0) 280 | return DB_SYS_ERROR; 281 | 282 | /* make db-data can expert data when single file */ 283 | /* just writer klen = 0, vlen = table size */ 284 | klen = 0; 285 | off += db_file_write(db->db_index, &klen, off, sizeof(klen)); 286 | off += db_file_write(db->db_index, &vlen, off, sizeof(vlen)); 287 | 288 | db_table_read(db, &old_table, table_off); 289 | 290 | new_table.bucket_off = off; 291 | new_table.bucket_key = old_table.bucket_key; 292 | new_table.bucket_len = bucket_per_table; 293 | 294 | for (i = 0; i < old_table.bucket_len; i++) { 295 | uint64_t j; 296 | db_bucket_t bucket; 297 | 298 | db_bucket_read(db, &old_table, &bucket, i); 299 | 300 | if (bucket.hash == 0) 301 | continue; 302 | 303 | for (j = bucket.hash % bucket_per_table;; 304 | j = (j + 1) % bucket_per_table) 305 | { 306 | db_bucket_t new_bucket; 307 | 308 | db_bucket_read(db, &new_table, &new_bucket, j); 309 | if (new_bucket.hash == 0) { 310 | db_bucket_write(db, &new_table, &bucket, j); 311 | break; 312 | } 313 | } 314 | } 315 | 316 | db_table_write(db, &new_table, table_off); 317 | 318 | return DB_OK; 319 | } 320 | 321 | static int 322 | db_index_init(db_t *db, uint64_t table, uint64_t bucket) 323 | { 324 | uint64_t i; 325 | uint64_t table_off; 326 | assert(db && db->db_index->buf); 327 | 328 | db->db_index->header->magic = DB_MAGIC; 329 | db->db_index->header->version = DB_VERSION; 330 | 331 | db->db_index->header->data_head = sizeof(db_file_header_t); 332 | db->db_index->header->data_tail = db->db_index->buflen; 333 | 334 | table_off = db_file_calloc(db->db_index, table * sizeof(db_table_t)); 335 | if (table_off == 0) 336 | return DB_SYS_ERROR; 337 | 338 | db->db_index->header->table_off = table_off; 339 | db->db_index->header->table_len = table; 340 | 341 | for (i = 0; i < table; i++) { 342 | db_table_resize(db, i, bucket); 343 | } 344 | 345 | return DB_OK; 346 | } 347 | 348 | static int 349 | db_data_init(db_t *db) 350 | { 351 | assert(db && db->db_data->buf); 352 | 353 | db->db_data->header->magic = DB_MAGIC; 354 | db->db_data->header->version = DB_VERSION; 355 | 356 | db->db_data->header->data_head = sizeof(db_file_header_t); 357 | db->db_data->header->data_tail = db->db_data->buflen; 358 | 359 | return DB_OK; 360 | } 361 | 362 | int 363 | db_open(db_t *db, const char *data, const char *index, const db_option_t *option) 364 | { 365 | int init; 366 | int error; 367 | 368 | memset(db, 0, sizeof(struct db)); 369 | 370 | if (index != NULL && strcmp(index, data) == 0) 371 | index = NULL; 372 | 373 | db->db_data = &db->db_file_data; 374 | if ((error = db_file_open(db->db_data, data, option->rdonly)) != DB_OK) 375 | return error; 376 | 377 | if (index != NULL) { /* separate index and data file */ 378 | db->db_index = &db->db_file_index; 379 | if ((error = db_file_open(db->db_index, index, option->rdonly)) != DB_OK) 380 | return error; 381 | } else { 382 | db->db_index = &db->db_file_data; 383 | } 384 | 385 | init = db_file_size(db->db_index); 386 | error = db_file_init(db->db_index, sizeof(db_file_header_t)); 387 | if (error != DB_OK) 388 | return error; 389 | 390 | if (!init && !option->rdonly) { 391 | error = db_index_init(db, option->table, option->bucket); 392 | if (error != DB_OK) 393 | return error; 394 | } 395 | db->db_table_len = db->db_index->header->table_len; 396 | 397 | init = db_file_size(db->db_data); 398 | error = db_file_init(db->db_data, sizeof(db_file_header_t)); 399 | if (error != DB_OK) 400 | return error; 401 | 402 | if (!init) { 403 | if ((error = db_data_init(db)) != DB_OK) 404 | return error; 405 | } 406 | 407 | if (db->db_index->header->version != DB_VERSION || 408 | db->db_data->header->version != DB_VERSION) 409 | { 410 | return DB_SYS_ERROR; 411 | } 412 | 413 | if (db->db_index->header->magic != DB_MAGIC && 414 | db->db_index->header->magic != DB_MAGIC_INDEX) 415 | { 416 | return DB_SYS_ERROR; 417 | } 418 | 419 | if (db->db_data->header->magic != DB_MAGIC && 420 | db->db_data->header->magic != DB_MAGIC_DATA) 421 | { 422 | return DB_SYS_ERROR; 423 | } 424 | 425 | db_file_likely(db->db_data, 0, sizeof(*db->db_data->header)); 426 | 427 | return DB_OK; 428 | } 429 | 430 | int 431 | db_put(db_t *db, const void *key, uint32_t klen, const void *val, uint32_t vlen) 432 | { 433 | uint64_t i; 434 | uint64_t len; 435 | uint64_t data; 436 | 437 | uint64_t hash; 438 | db_table_t table; 439 | db_bucket_t bucket; 440 | 441 | hash = db_hash(key, klen); 442 | db_table_read(db, &table, hash % db->db_index->header->table_len); 443 | 444 | if (((table.bucket_key + 1) * 2) > table.bucket_len) { 445 | if (db_table_resize(db, hash % db->db_table_len, 446 | table.bucket_len * 2) != DB_OK) 447 | { 448 | return DB_SYS_ERROR; 449 | } 450 | db_table_read(db, &table, hash % db->db_table_len); 451 | } 452 | 453 | len = sizeof(uint32_t) * 2 + klen + vlen; 454 | 455 | data = db_file_alloc(db->db_data, len); 456 | if (data == 0) 457 | return DB_SYS_ERROR; 458 | 459 | data += db_file_write(db->db_data, &klen, data, sizeof(uint32_t)); 460 | data += db_file_write(db->db_data, &vlen, data, sizeof(uint32_t)); 461 | data += db_file_write(db->db_data, key, data, klen); 462 | data += db_file_write(db->db_data, val, data, vlen); 463 | 464 | bucket.hash = hash; 465 | bucket.off = data - len; 466 | 467 | for (i = hash % table.bucket_len;; i = (i + 1) % table.bucket_len) { 468 | db_bucket_t db_bucket; 469 | 470 | db_bucket_read(db, &table, &db_bucket, i); 471 | if (db_bucket.hash != 0) { 472 | uint64_t koff; 473 | 474 | if (db_bucket.hash != bucket.hash) 475 | continue; 476 | if (db_file_compare(db->db_data, &klen, db_bucket.off, 477 | sizeof(klen)) != 0) 478 | { 479 | continue; 480 | } 481 | koff = db_bucket.off + sizeof(klen) + sizeof(vlen); 482 | if (db_file_compare(db->db_data, key, koff, klen) != 0) 483 | continue; 484 | } 485 | 486 | db_bucket_write(db, &table, &bucket, i); 487 | 488 | if (db_bucket.hash == 0) { 489 | table.bucket_key += 1; 490 | db_table_write(db, &table, hash % db->db_table_len); 491 | } 492 | return DB_OK; 493 | } 494 | 495 | return DB_SYS_ERROR; 496 | } 497 | 498 | uint32_t 499 | db_get(db_t *db, const void *key, uint32_t klen, void *val, uint32_t vlen) 500 | { 501 | uint64_t i; 502 | 503 | uint64_t hash; 504 | db_table_t table; 505 | db_bucket_t bucket; 506 | 507 | hash = db_hash(key, klen); 508 | db_table_read(db, &table, hash % db->db_table_len); 509 | 510 | for (i = hash % table.bucket_len;; i = (i + 1) % table.bucket_len) { 511 | uint64_t koff; 512 | 513 | db_bucket_read(db, &table, &bucket, i); 514 | 515 | if (bucket.hash == 0) 516 | break; 517 | 518 | if (bucket.hash != hash) 519 | continue; 520 | 521 | if (db_file_compare(db->db_data, &klen, bucket.off, 522 | sizeof(klen)) != 0) 523 | { 524 | continue; 525 | } 526 | 527 | koff = bucket.off + sizeof(klen) + sizeof(vlen); 528 | if (db_file_compare(db->db_data, key, koff, klen) == 0) { 529 | uint32_t len; 530 | 531 | db_file_read(db->db_data, &len, 532 | bucket.off + sizeof(klen), sizeof(len)); 533 | 534 | if (len < vlen) 535 | vlen = len; 536 | 537 | db_file_read(db->db_data, val, koff + klen, vlen); 538 | return len; 539 | } 540 | } 541 | 542 | return 0; 543 | } 544 | 545 | int 546 | db_del(db_t *db, const void *key, uint32_t klen) 547 | { 548 | return db_put(db, key, klen, NULL, 0); 549 | } 550 | 551 | int 552 | db_iter(db_t *db, db_iter_t *iter, const void *key, const uint32_t klen) 553 | { 554 | uint64_t i; 555 | uint64_t hash; 556 | db_table_t table; 557 | db_bucket_t bucket; 558 | 559 | if (key == NULL || klen == 0) { 560 | iter->table_off = 0; 561 | iter->bucket_off = 0; 562 | 563 | return DB_OK; 564 | } 565 | 566 | hash = db_hash(key, klen); 567 | db_table_read(db, &table, hash % db->db_table_len); 568 | 569 | for (i = hash % table.bucket_len;; i = (i + 1) % table.bucket_len) { 570 | uint64_t koff; 571 | 572 | db_bucket_read(db, &table, &bucket, i); 573 | 574 | if (bucket.hash == 0) 575 | break; 576 | 577 | if (bucket.hash != hash) 578 | continue; 579 | 580 | if (db_file_compare(db->db_data, &klen, bucket.off, 581 | sizeof(klen)) != 0) 582 | { 583 | continue; 584 | } 585 | 586 | koff = bucket.off + sizeof(klen) + sizeof(uint32_t); 587 | if (db_file_compare(db->db_data, key, koff, klen) == 0) { 588 | iter->table_off = hash % table.bucket_len; 589 | iter->bucket_off = i; 590 | 591 | return DB_OK; 592 | } 593 | } 594 | 595 | return DB_ERROR; 596 | } 597 | 598 | int 599 | db_iter_next(db_t *db, db_iter_t *iter, 600 | void *key, uint32_t *klen, void *val, uint32_t *vlen) 601 | { 602 | uint64_t i; 603 | uint64_t j; 604 | 605 | for (i = iter->table_off; i < db->db_table_len; i++) { 606 | db_table_t table; 607 | 608 | db_table_read(db, &table, i); 609 | for (j = iter->bucket_off; j < table.bucket_len; j++) { 610 | uint64_t off; 611 | uint32_t dbklen; 612 | uint32_t dbvlen; 613 | db_bucket_t bucket; 614 | 615 | db_bucket_read(db, &table, &bucket, j); 616 | if (bucket.hash == 0) 617 | continue; 618 | 619 | off = bucket.off; 620 | off += db_file_read(db->db_data, &dbklen, off, 621 | sizeof(dbklen)); 622 | off += db_file_read(db->db_data, &dbvlen, off, 623 | sizeof(dbvlen)); 624 | 625 | if (dbvlen == 0) 626 | continue; 627 | 628 | if (dbklen < *klen) 629 | *klen = dbklen; 630 | 631 | if (dbvlen < *vlen) 632 | *vlen = dbvlen; 633 | 634 | off += db_file_read(db->db_data, key, off, *klen); 635 | off += db_file_read(db->db_data, val, off, *vlen); 636 | 637 | *klen = dbklen; 638 | *vlen = dbvlen; 639 | 640 | iter->bucket_off = j + 1; 641 | return DB_OK; 642 | } 643 | iter->table_off += 1; 644 | iter->bucket_off = 0; 645 | } 646 | 647 | return DB_SYS_ERROR; 648 | } 649 | 650 | int 651 | db_stat(db_t *db, db_stat_t *stat) 652 | { 653 | int error; 654 | uint64_t i; 655 | uint32_t klen; 656 | uint32_t vlen; 657 | 658 | db_iter_t iter; 659 | 660 | memset(stat, 0, sizeof(db_stat_t)); 661 | 662 | stat->db_file_size = db_file_size(db->db_data); 663 | 664 | stat->db_table_min = UINT32_MAX; 665 | for (i = 0; i < db->db_table_len; i++) { 666 | db_table_t table; 667 | db_table_read(db, &table, i); 668 | 669 | if (table.bucket_key > stat->db_table_max) 670 | stat->db_table_max = table.bucket_key; 671 | 672 | if (table.bucket_key < stat->db_table_min) 673 | stat->db_table_min = table.bucket_key; 674 | stat->db_table_total += table.bucket_key; 675 | stat->db_bucket_total += table.bucket_len; 676 | } 677 | stat->db_table_size = stat->db_table_total * sizeof(db_table_t); 678 | stat->db_bucket_size = stat->db_bucket_total * sizeof(db_bucket_t); 679 | 680 | if ((error = db_iter(db, &iter, NULL, 0)) != DB_OK) 681 | return error; 682 | 683 | klen = 0; 684 | vlen = 0; 685 | while (db_iter_next(db, &iter, NULL, &klen, NULL, &vlen) == DB_OK) { 686 | stat->db_data_size += klen; 687 | stat->db_data_size += vlen; 688 | klen = 0; 689 | vlen = 0; 690 | } 691 | 692 | return DB_OK; 693 | } 694 | 695 | int 696 | db_close(db_t *db) 697 | { 698 | return db_file_close(db->db_data); 699 | } 700 | -------------------------------------------------------------------------------- /db.h: -------------------------------------------------------------------------------- 1 | #ifndef __DB_H__ 2 | #define __DB_H__ 3 | 4 | #include 5 | #include 6 | 7 | enum {DB_SYS_ERROR = -1, DB_ERROR = 0, DB_OK = 1}; 8 | 9 | typedef struct db_table { 10 | uint64_t bucket_off; /* offset in file */ 11 | uint64_t bucket_key; /* key in use */ 12 | uint64_t bucket_len; /* buckets in table */ 13 | } db_table_t; 14 | 15 | typedef struct db_bucket { 16 | uint64_t hash; /* key hash */ 17 | uint64_t off; /* offset in file */ 18 | } db_bucket_t; 19 | 20 | typedef struct db_iter { 21 | uint64_t table_off; 22 | uint64_t bucket_off; 23 | } db_iter_t; 24 | 25 | typedef struct db_stat { 26 | uint64_t db_file_size; 27 | 28 | uint64_t db_table_max; 29 | uint64_t db_table_min; 30 | 31 | uint64_t db_table_total; 32 | uint64_t db_table_size; 33 | 34 | uint64_t db_bucket_total; 35 | uint64_t db_bucket_size; 36 | 37 | uint64_t db_data_size; 38 | } db_stat_t; 39 | 40 | /* disk format */ 41 | typedef struct db_file_header { 42 | uint32_t magic; 43 | uint32_t version; 44 | uint64_t data_head; 45 | uint64_t data_tail; 46 | uint64_t table_off; 47 | uint64_t table_len; 48 | } db_file_header_t; 49 | 50 | typedef struct db_file { 51 | struct db *db; 52 | 53 | void *buf; 54 | uint64_t buflen; 55 | 56 | int fd; 57 | int pgsz; 58 | uint64_t size; 59 | int rdonly; 60 | 61 | db_file_header_t *header; 62 | } db_file_t; 63 | 64 | 65 | typedef struct db { 66 | int db_mode; 67 | int db_error; 68 | 69 | db_file_t *db_index; 70 | db_file_t *db_data; 71 | 72 | db_file_t db_file_index; 73 | db_file_t db_file_data; 74 | 75 | uint64_t db_table_len; 76 | } db_t; 77 | 78 | typedef struct db_option { 79 | uint64_t table; 80 | uint64_t bucket; 81 | uint64_t rdonly; 82 | } db_option_t; 83 | 84 | /* 85 | * if index is NULL or same data 86 | * is the single file mode (mixin data and index) 87 | */ 88 | int 89 | db_open(db_t *db, const char *data, const char *index, const db_option_t *option); 90 | 91 | int 92 | db_put(db_t *db, const void *key, uint32_t klen, const void *val, uint32_t vlen); 93 | 94 | /* 95 | * if value size bigger than vlen, val will fill in value 0 ~ vlen 96 | * return value length 97 | */ 98 | uint32_t 99 | db_get(db_t *db, const void *key, uint32_t klen, void *val, uint32_t vlen); 100 | 101 | int 102 | db_del(db_t *db, const void *key, uint32_t klen); 103 | 104 | int 105 | db_iter(db_t *db, db_iter_t *iter, const void *key, const uint32_t klen); 106 | 107 | /* 108 | * klen is pointer of key buffer length 109 | * vlen is pointer of val buffer length 110 | * 111 | * when function finish: 112 | * klen will set db's key length 113 | * vlen will set db's val length 114 | */ 115 | int 116 | db_iter_next(db_t *db, db_iter_t *iter, 117 | void *key, uint32_t *klen, void *val, uint32_t *vlen); 118 | 119 | int 120 | db_stat(db_t *db, db_stat_t *stat); 121 | 122 | int 123 | db_close(db_t *db); 124 | 125 | 126 | #endif /* __DB_H__ */ 127 | -------------------------------------------------------------------------------- /hash.c: -------------------------------------------------------------------------------- 1 | #include "hash.h" 2 | 3 | static uint64_t 4 | murmur3_hash64(const void *key, size_t len, uint64_t seed) 5 | { 6 | const uint64_t m = UINT64_C(0xc6a4a7935bd1e995); 7 | const int r = 47; 8 | 9 | uint64_t h = seed ^ (len * m); 10 | 11 | const uint64_t * data = (const uint64_t *)key; 12 | const uint64_t * end = data + (len/8); 13 | const unsigned char * data2; 14 | 15 | while(data != end) { 16 | uint64_t k = *data++; 17 | 18 | k *= m; 19 | k ^= k >> r; 20 | k *= m; 21 | 22 | h ^= k; 23 | h *= m; 24 | } 25 | 26 | data2 = (const unsigned char*)data; 27 | 28 | switch(len & 7) { 29 | case 7: h ^= (uint64_t)data2[6] << 48; 30 | case 6: h ^= (uint64_t)data2[5] << 40; 31 | case 5: h ^= (uint64_t)data2[4] << 32; 32 | case 4: h ^= (uint64_t)data2[3] << 24; 33 | case 3: h ^= (uint64_t)data2[2] << 16; 34 | case 2: h ^= (uint64_t)data2[1] << 8; 35 | case 1: h ^= (uint64_t)data2[0]; 36 | h *= m; 37 | }; 38 | 39 | h ^= h >> r; 40 | h *= m; 41 | h ^= h >> r; 42 | 43 | return h; 44 | } 45 | 46 | uint64_t 47 | db_hash(const void *key, size_t len) 48 | { 49 | return murmur3_hash64(key, len, 0); 50 | } 51 | 52 | -------------------------------------------------------------------------------- /hash.h: -------------------------------------------------------------------------------- 1 | #ifndef __DB_HASH_H__ 2 | #define __DB_HASH_H__ 3 | 4 | #include 5 | #include 6 | 7 | uint64_t 8 | db_hash(const void *key, size_t len); 9 | 10 | #endif /* __DB_HASH_H__ */ 11 | 12 | --------------------------------------------------------------------------------