├── test └── test.cdb ├── go.mod ├── hash.go ├── .travis.yml ├── hash_test.go ├── util.go ├── go.sum ├── LICENSE.txt ├── iterator_test.go ├── iterator.go ├── README.md ├── cdb_test.go ├── cdb.go ├── writer_test.go └── writer.go /test/test.cdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/colinmarc/cdb/HEAD/test/test.cdb -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/colinmarc/cdb 2 | 3 | require ( 4 | github.com/Pallinder/go-randomdata v1.1.0 5 | github.com/stretchr/testify v1.3.0 6 | ) 7 | -------------------------------------------------------------------------------- /hash.go: -------------------------------------------------------------------------------- 1 | package cdb 2 | 3 | const start uint32 = 5381 4 | 5 | func cdbHash(data []byte) uint32 { 6 | v := start 7 | for _, b := range data { 8 | v = ((v << 5) + v) ^ uint32(b) 9 | } 10 | 11 | return v 12 | } 13 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | go: 3 | - 1.3 4 | - 1.4 5 | - tip 6 | install: 7 | - go get github.com/stretchr/testify/assert 8 | - go get github.com/stretchr/testify/require 9 | - go get github.com/Pallinder/go-randomdata 10 | -------------------------------------------------------------------------------- /hash_test.go: -------------------------------------------------------------------------------- 1 | package cdb 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestHash(t *testing.T) { 10 | assert.EqualValues(t, 776976811, cdbHash([]byte("foo bar baz"))) 11 | assert.EqualValues(t, 3538394712, cdbHash([]byte("The quick brown fox jumped over the lazy dog"))) 12 | } 13 | -------------------------------------------------------------------------------- /util.go: -------------------------------------------------------------------------------- 1 | package cdb 2 | 3 | import ( 4 | "encoding/binary" 5 | "io" 6 | ) 7 | 8 | func readTuple(r io.ReaderAt, offset uint32) (uint32, uint32, error) { 9 | tuple := make([]byte, 8) 10 | _, err := r.ReadAt(tuple, int64(offset)) 11 | if err != nil { 12 | return 0, 0, err 13 | } 14 | 15 | first := binary.LittleEndian.Uint32(tuple[:4]) 16 | second := binary.LittleEndian.Uint32(tuple[4:]) 17 | return first, second, nil 18 | } 19 | 20 | func writeTuple(w io.Writer, first, second uint32) error { 21 | tuple := make([]byte, 8) 22 | binary.LittleEndian.PutUint32(tuple[:4], first) 23 | binary.LittleEndian.PutUint32(tuple[4:], second) 24 | 25 | _, err := w.Write(tuple) 26 | return err 27 | } 28 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/Pallinder/go-randomdata v1.1.0 h1:gUubB1IEUliFmzjqjhf+bgkg1o6uoFIkRsP3VrhEcx8= 2 | github.com/Pallinder/go-randomdata v1.1.0/go.mod h1:yHmJgulpD2Nfrm0cR9tI/+oAgRqCQQixsA8HyRZfV9Y= 3 | github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= 4 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 5 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 6 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 7 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 8 | github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= 9 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 10 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016 Colin Marc (colinmarc@gmail.com) 2 | 3 | MIT License 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | -------------------------------------------------------------------------------- /iterator_test.go: -------------------------------------------------------------------------------- 1 | package cdb_test 2 | 3 | import ( 4 | "log" 5 | "math/rand" 6 | "testing" 7 | "time" 8 | 9 | "github.com/colinmarc/cdb" 10 | "github.com/stretchr/testify/assert" 11 | "github.com/stretchr/testify/require" 12 | ) 13 | 14 | func TestIterator(t *testing.T) { 15 | db, err := cdb.Open("./test/test.cdb") 16 | require.NoError(t, err) 17 | require.NotNil(t, db) 18 | 19 | n := 0 20 | iter := db.Iter() 21 | for iter.Next() { 22 | assert.Equal(t, string(expectedRecords[n][0]), string(iter.Key())) 23 | assert.Equal(t, string(expectedRecords[n][1]), string(iter.Value())) 24 | require.NoError(t, iter.Err()) 25 | n++ 26 | } 27 | 28 | assert.Equal(t, len(expectedRecords)-1, n) 29 | 30 | require.NoError(t, iter.Err()) 31 | } 32 | 33 | func BenchmarkIterator(b *testing.B) { 34 | db, _ := cdb.Open("./test/test.cdb") 35 | iter := db.Iter() 36 | b.ResetTimer() 37 | 38 | rand.Seed(time.Now().UnixNano()) 39 | for i := 0; i < b.N; i++ { 40 | for iter.Next() { 41 | } 42 | } 43 | } 44 | 45 | func ExampleIterator() { 46 | db, err := cdb.Open("./test/test.cdb") 47 | if err != nil { 48 | log.Fatal(err) 49 | } 50 | 51 | // Create an iterator for the database. 52 | iter := db.Iter() 53 | for iter.Next() { 54 | // Do something with iter.Key()/iter.Value() 55 | } 56 | 57 | if err := iter.Err(); err != nil { 58 | log.Fatal(err) 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /iterator.go: -------------------------------------------------------------------------------- 1 | package cdb 2 | 3 | // Iterator represents a sequential iterator over a CDB database. 4 | type Iterator struct { 5 | db *CDB 6 | pos uint32 7 | endPos uint32 8 | err error 9 | key []byte 10 | value []byte 11 | } 12 | 13 | // Iter creates an Iterator that can be used to iterate the database. 14 | func (cdb *CDB) Iter() *Iterator { 15 | return &Iterator{ 16 | db: cdb, 17 | pos: uint32(indexSize), 18 | endPos: cdb.index[0].offset, 19 | } 20 | } 21 | 22 | // Next reads the next key/value pair and advances the iterator one record. 23 | // It returns false when the scan stops, either by reaching the end of the 24 | // database or an error. After Next returns false, the Err method will return 25 | // any error that occurred while iterating. 26 | func (iter *Iterator) Next() bool { 27 | if iter.pos >= iter.endPos { 28 | return false 29 | } 30 | 31 | keyLength, valueLength, err := readTuple(iter.db.reader, iter.pos) 32 | if err != nil { 33 | iter.err = err 34 | return false 35 | } 36 | 37 | buf := make([]byte, keyLength+valueLength) 38 | _, err = iter.db.reader.ReadAt(buf, int64(iter.pos+8)) 39 | if err != nil { 40 | iter.err = err 41 | return false 42 | } 43 | 44 | // Update iterator state 45 | iter.key = buf[:keyLength] 46 | iter.value = buf[keyLength:] 47 | iter.pos += 8 + keyLength + valueLength 48 | 49 | return true 50 | } 51 | 52 | // Key returns the current key. 53 | func (iter *Iterator) Key() []byte { 54 | return iter.key 55 | } 56 | 57 | // Value returns the current value. 58 | func (iter *Iterator) Value() []byte { 59 | return iter.value 60 | } 61 | 62 | // Err returns the current error. 63 | func (iter *Iterator) Err() error { 64 | return iter.err 65 | } 66 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | CDB 2 | === 3 | 4 | [![GoDoc](https://godoc.org/github.com/colinmarc/cdb/web?status.svg)](https://godoc.org/github.com/colinmarc/cdb) [![build](https://travis-ci.org/colinmarc/cdb.svg?branch=master)](https://travis-ci.org/colinmarc/hdfs) 5 | 6 | This is a native Go implementation of [cdb][1], a constant key/value database 7 | with some very nice properties. From the [design doc][1]: 8 | 9 | > cdb is a fast, reliable, simple package for creating and reading constant databases. Its database structure provides several features: 10 | > - Fast lookups: A successful lookup in a large database normally takes just two disk accesses. An unsuccessful lookup takes only one. 11 | > - Low overhead: A database uses 2048 bytes, plus 24 bytes per record, plus the space for keys and data. 12 | > - No random limits: cdb can handle any database up to 4 gigabytes. There are no other restrictions; records don't even have to fit into memory. Databases are stored in a machine-independent format. 13 | 14 | [1]: http://cr.yp.to/cdb.html 15 | 16 | Usage 17 | ----- 18 | 19 | ```go 20 | writer, err := cdb.Create("/tmp/example.cdb") 21 | if err != nil { 22 | log.Fatal(err) 23 | } 24 | 25 | // Write some key/value pairs to the database. 26 | writer.Put([]byte("Alice"), []byte("Practice")) 27 | writer.Put([]byte("Bob"), []byte("Hope")) 28 | writer.Put([]byte("Charlie"), []byte("Horse")) 29 | 30 | // Freeze the database, and open it for reads. 31 | db, err := writer.Freeze() 32 | if err != nil { 33 | log.Fatal(err) 34 | } 35 | 36 | // Fetch a value. 37 | v, err := db.Get([]byte("Alice")) 38 | if err != nil { 39 | log.Fatal(err) 40 | } 41 | 42 | log.Println(string(v)) 43 | // => Practice 44 | 45 | // Iterate over the database 46 | iter := db.Iter() 47 | for iter.Next() { 48 | log.Printf("The key %s has a value of length %d\n", string(iter.Key()), len(iter.Value())) 49 | } 50 | 51 | if err := iter.Err(); err != nil { 52 | log.Fatal(err) 53 | } 54 | ``` 55 | -------------------------------------------------------------------------------- /cdb_test.go: -------------------------------------------------------------------------------- 1 | package cdb_test 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "math/rand" 7 | "os" 8 | "testing" 9 | "time" 10 | 11 | "github.com/colinmarc/cdb" 12 | "github.com/stretchr/testify/assert" 13 | "github.com/stretchr/testify/require" 14 | ) 15 | 16 | var expectedRecords = [][][]byte{ 17 | {[]byte("foo"), []byte("bar")}, 18 | {[]byte("baz"), []byte("quuuux")}, 19 | {[]byte("playwright"), []byte("wow")}, 20 | {[]byte("crystal"), []byte("CASTLES")}, 21 | {[]byte("CRYSTAL"), []byte("castles")}, 22 | {[]byte("snush"), []byte("collision!")}, // 'playwright' collides with 'snush' in cdbhash 23 | {[]byte("a"), []byte("a")}, 24 | {[]byte("empty_value"), []byte("")}, 25 | {[]byte(""), []byte("empty_key")}, 26 | {[]byte("not in the table"), nil}, 27 | } 28 | 29 | func TestGet(t *testing.T) { 30 | db, err := cdb.Open("./test/test.cdb") 31 | require.NoError(t, err) 32 | require.NotNil(t, db) 33 | 34 | records := append(append(expectedRecords, expectedRecords...), expectedRecords...) 35 | shuffle(records) 36 | 37 | for _, record := range records { 38 | msg := "while fetching " + string(record[0]) 39 | 40 | value, err := db.Get(record[0]) 41 | require.NoError(t, err, msg) 42 | assert.Equal(t, string(record[1]), string(value), msg) 43 | } 44 | } 45 | 46 | func TestClosesFile(t *testing.T) { 47 | f, err := os.Open("./test/test.cdb") 48 | require.NoError(t, err) 49 | 50 | db, err := cdb.New(f, nil) 51 | require.NoError(t, err) 52 | require.NotNil(t, db) 53 | 54 | err = db.Close() 55 | require.NoError(t, err) 56 | 57 | err = f.Close() 58 | assert.Error(t, err) 59 | } 60 | 61 | func BenchmarkGet(b *testing.B) { 62 | db, _ := cdb.Open("./test/test.cdb") 63 | b.ResetTimer() 64 | 65 | rand.Seed(time.Now().UnixNano()) 66 | for i := 0; i < b.N; i++ { 67 | record := expectedRecords[rand.Intn(len(expectedRecords))] 68 | db.Get(record[0]) 69 | } 70 | } 71 | 72 | func Example() { 73 | writer, err := cdb.Create("/tmp/example.cdb") 74 | if err != nil { 75 | log.Fatal(err) 76 | } 77 | 78 | // Write some key/value pairs to the database. 79 | writer.Put([]byte("Alice"), []byte("Practice")) 80 | writer.Put([]byte("Bob"), []byte("Hope")) 81 | writer.Put([]byte("Charlie"), []byte("Horse")) 82 | 83 | // Freeze the database, and open it for reads. 84 | db, err := writer.Freeze() 85 | if err != nil { 86 | log.Fatal(err) 87 | } 88 | 89 | // Fetch a value. 90 | v, err := db.Get([]byte("Alice")) 91 | if err != nil { 92 | log.Fatal(err) 93 | } 94 | 95 | fmt.Println(string(v)) 96 | // Output: Practice 97 | } 98 | 99 | func ExampleCDB() { 100 | db, err := cdb.Open("./test/test.cdb") 101 | if err != nil { 102 | log.Fatal(err) 103 | } 104 | 105 | // Fetch a value. 106 | v, err := db.Get([]byte("foo")) 107 | if err != nil { 108 | log.Fatal(err) 109 | } 110 | 111 | fmt.Println(string(v)) 112 | // Output: bar 113 | } 114 | 115 | func shuffle(a [][][]byte) { 116 | rand.Seed(time.Now().UnixNano()) 117 | for i := range a { 118 | j := rand.Intn(i + 1) 119 | a[i], a[j] = a[j], a[i] 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /cdb.go: -------------------------------------------------------------------------------- 1 | /* 2 | Package cdb provides a native implementation of cdb, a constant key/value 3 | database with some very nice properties. 4 | 5 | For more information on cdb, see the original design doc at http://cr.yp.to/cdb.html. 6 | */ 7 | package cdb 8 | 9 | import ( 10 | "bytes" 11 | "encoding/binary" 12 | "io" 13 | "os" 14 | ) 15 | 16 | const indexSize = 256 * 8 17 | 18 | type index [256]table 19 | 20 | // CDB represents an open CDB database. It can only be used for reads; to 21 | // create a database, use Writer. 22 | type CDB struct { 23 | reader io.ReaderAt 24 | hash func([]byte) uint32 25 | index index 26 | } 27 | 28 | type table struct { 29 | offset uint32 30 | length uint32 31 | } 32 | 33 | // Open opens an existing CDB database at the given path. 34 | func Open(path string) (*CDB, error) { 35 | f, err := os.Open(path) 36 | if err != nil { 37 | return nil, err 38 | } 39 | 40 | return New(f, nil) 41 | } 42 | 43 | // New opens a new CDB instance for the given io.ReaderAt. It can only be used 44 | // for reads; to create a database, use Writer. The returned CDB instance is 45 | // thread-safe as long as reader is. 46 | // 47 | // If hash is nil, it will default to the CDB hash function. If a database 48 | // was created with a particular hash function, that same hash function must be 49 | // passed to New, or the database will return incorrect results. 50 | func New(reader io.ReaderAt, hash func([]byte) uint32) (*CDB, error) { 51 | if hash == nil { 52 | hash = cdbHash 53 | } 54 | 55 | cdb := &CDB{reader: reader, hash: hash} 56 | err := cdb.readIndex() 57 | if err != nil { 58 | return nil, err 59 | } 60 | 61 | return cdb, nil 62 | } 63 | 64 | // Get returns the value for a given key, or nil if it can't be found. 65 | func (cdb *CDB) Get(key []byte) ([]byte, error) { 66 | hash := cdb.hash(key) 67 | 68 | table := cdb.index[hash&0xff] 69 | if table.length == 0 { 70 | return nil, nil 71 | } 72 | 73 | // Probe the given hash table, starting at the given slot. 74 | startingSlot := (hash >> 8) % table.length 75 | slot := startingSlot 76 | 77 | for { 78 | slotOffset := table.offset + (8 * slot) 79 | slotHash, offset, err := readTuple(cdb.reader, slotOffset) 80 | if err != nil { 81 | return nil, err 82 | } 83 | 84 | // An empty slot means the key doesn't exist. 85 | if slotHash == 0 { 86 | break 87 | } else if slotHash == hash { 88 | value, err := cdb.getValueAt(offset, key) 89 | if err != nil { 90 | return nil, err 91 | } else if value != nil { 92 | return value, nil 93 | } 94 | } 95 | 96 | slot = (slot + 1) % table.length 97 | if slot == startingSlot { 98 | break 99 | } 100 | } 101 | 102 | return nil, nil 103 | } 104 | 105 | // Close closes the database to further reads. 106 | func (cdb *CDB) Close() error { 107 | if closer, ok := cdb.reader.(io.Closer); ok { 108 | return closer.Close() 109 | } else { 110 | return nil 111 | } 112 | } 113 | 114 | func (cdb *CDB) readIndex() error { 115 | buf := make([]byte, indexSize) 116 | _, err := cdb.reader.ReadAt(buf, 0) 117 | if err != nil { 118 | return err 119 | } 120 | 121 | for i := 0; i < 256; i++ { 122 | off := i * 8 123 | cdb.index[i] = table{ 124 | offset: binary.LittleEndian.Uint32(buf[off : off+4]), 125 | length: binary.LittleEndian.Uint32(buf[off+4 : off+8]), 126 | } 127 | } 128 | 129 | return nil 130 | } 131 | 132 | func (cdb *CDB) getValueAt(offset uint32, expectedKey []byte) ([]byte, error) { 133 | keyLength, valueLength, err := readTuple(cdb.reader, offset) 134 | if err != nil { 135 | return nil, err 136 | } 137 | 138 | // We can compare key lengths before reading the key at all. 139 | if int(keyLength) != len(expectedKey) { 140 | return nil, nil 141 | } 142 | 143 | buf := make([]byte, keyLength+valueLength) 144 | _, err = cdb.reader.ReadAt(buf, int64(offset+8)) 145 | if err != nil { 146 | return nil, err 147 | } 148 | 149 | // If they keys don't match, this isn't it. 150 | if bytes.Compare(buf[:keyLength], expectedKey) != 0 { 151 | return nil, nil 152 | } 153 | 154 | return buf[keyLength:], nil 155 | } 156 | -------------------------------------------------------------------------------- /writer_test.go: -------------------------------------------------------------------------------- 1 | package cdb_test 2 | 3 | import ( 4 | "hash/fnv" 5 | "io/ioutil" 6 | "log" 7 | "math/rand" 8 | "os" 9 | "reflect" 10 | "strconv" 11 | "testing" 12 | "testing/quick" 13 | "time" 14 | 15 | "github.com/Pallinder/go-randomdata" 16 | "github.com/colinmarc/cdb" 17 | "github.com/stretchr/testify/assert" 18 | "github.com/stretchr/testify/require" 19 | ) 20 | 21 | func fnvHash(data []byte) uint32 { 22 | h := fnv.New32a() 23 | h.Write(data) 24 | return h.Sum32() 25 | } 26 | 27 | func testWritesReadable(t *testing.T, writer *cdb.Writer) { 28 | expected := make([][][]byte, 0, 100) 29 | for i := 0; i < cap(expected); i++ { 30 | key := []byte(strconv.Itoa(i)) 31 | value := []byte(randomdata.SillyName()) 32 | err := writer.Put(key, value) 33 | require.NoError(t, err) 34 | 35 | expected = append(expected, [][]byte{key, value}) 36 | } 37 | 38 | db, err := writer.Freeze() 39 | require.NoError(t, err) 40 | 41 | for _, record := range expected { 42 | msg := "while fetching " + string(record[0]) 43 | val, err := db.Get(record[0]) 44 | require.Nil(t, err) 45 | assert.Equal(t, string(record[1]), string(val), msg) 46 | } 47 | } 48 | 49 | func TestWritesReadable(t *testing.T) { 50 | f, err := ioutil.TempFile("", "test-cdb") 51 | require.NoError(t, err) 52 | defer os.Remove(f.Name()) 53 | 54 | writer, err := cdb.NewWriter(f, nil) 55 | require.NoError(t, err) 56 | require.NotNil(t, writer) 57 | 58 | testWritesReadable(t, writer) 59 | } 60 | 61 | func TestWritesReadableFnv(t *testing.T) { 62 | f, err := ioutil.TempFile("", "test-cdb") 63 | require.NoError(t, err) 64 | defer os.Remove(f.Name()) 65 | 66 | writer, err := cdb.NewWriter(f, fnvHash) 67 | require.NoError(t, err) 68 | require.NotNil(t, writer) 69 | 70 | testWritesReadable(t, writer) 71 | } 72 | 73 | func testWritesRandom(t *testing.T, writer *cdb.Writer) { 74 | random := rand.New(rand.NewSource(time.Now().UnixNano())) 75 | records := make([][][]byte, 0, 1000) 76 | seenKeys := make(map[string]bool) 77 | stringType := reflect.TypeOf("") 78 | 79 | // Make sure we don't end up with duplicate keys, since that makes testing 80 | // hard. 81 | for len(records) < cap(records) { 82 | key, _ := quick.Value(stringType, random) 83 | if !seenKeys[key.String()] { 84 | value, _ := quick.Value(stringType, random) 85 | keyBytes := []byte(key.String()) 86 | valueBytes := []byte(value.String()) 87 | records = append(records, [][]byte{keyBytes, valueBytes}) 88 | seenKeys[key.String()] = true 89 | } 90 | } 91 | 92 | for _, record := range records { 93 | err := writer.Put(record[0], record[1]) 94 | require.NoError(t, err) 95 | } 96 | 97 | db, err := writer.Freeze() 98 | require.NoError(t, err) 99 | 100 | for _, record := range records { 101 | msg := "while fetching " + string(record[0]) 102 | val, err := db.Get(record[0]) 103 | require.Nil(t, err) 104 | assert.Equal(t, string(record[1]), string(val), msg) 105 | } 106 | } 107 | 108 | func TestWritesRandom(t *testing.T) { 109 | f, err := ioutil.TempFile("", "test-cdb") 110 | require.NoError(t, err) 111 | defer os.Remove(f.Name()) 112 | 113 | writer, err := cdb.NewWriter(f, nil) 114 | require.NoError(t, err) 115 | require.NotNil(t, writer) 116 | 117 | testWritesRandom(t, writer) 118 | } 119 | 120 | func TestWritesRandomFnv(t *testing.T) { 121 | f, err := ioutil.TempFile("", "test-cdb") 122 | require.NoError(t, err) 123 | defer os.Remove(f.Name()) 124 | 125 | writer, err := cdb.NewWriter(f, fnvHash) 126 | require.NoError(t, err) 127 | require.NotNil(t, writer) 128 | 129 | testWritesRandom(t, writer) 130 | } 131 | 132 | func benchmarkPut(b *testing.B, writer *cdb.Writer) { 133 | random := rand.New(rand.NewSource(time.Now().UnixNano())) 134 | stringType := reflect.TypeOf("") 135 | b.ResetTimer() 136 | for i := 0; i < b.N; i++ { 137 | key, _ := quick.Value(stringType, random) 138 | value, _ := quick.Value(stringType, random) 139 | keyBytes := []byte(key.String()) 140 | valueBytes := []byte(value.String()) 141 | 142 | writer.Put(keyBytes, valueBytes) 143 | } 144 | } 145 | 146 | func BenchmarkPut(b *testing.B) { 147 | f, err := ioutil.TempFile("", "test-cdb") 148 | require.NoError(b, err) 149 | defer func() { 150 | f.Close() 151 | os.Remove(f.Name()) 152 | }() 153 | 154 | writer, err := cdb.NewWriter(f, nil) 155 | require.NoError(b, err) 156 | 157 | benchmarkPut(b, writer) 158 | } 159 | 160 | func BenchmarkPutFnv(b *testing.B) { 161 | f, err := ioutil.TempFile("", "test-cdb") 162 | require.NoError(b, err) 163 | defer func() { 164 | f.Close() 165 | os.Remove(f.Name()) 166 | }() 167 | 168 | writer, err := cdb.NewWriter(f, fnvHash) 169 | require.NoError(b, err) 170 | 171 | benchmarkPut(b, writer) 172 | } 173 | 174 | func ExampleWriter() { 175 | writer, err := cdb.Create("/tmp/example.cdb") 176 | if err != nil { 177 | log.Fatal(err) 178 | } 179 | 180 | // Write some key/value pairs to the database. 181 | writer.Put([]byte("Alice"), []byte("Practice")) 182 | writer.Put([]byte("Bob"), []byte("Hope")) 183 | writer.Put([]byte("Charlie"), []byte("Horse")) 184 | 185 | // It's important to call Close or Freeze when you're finished writing 186 | // records. 187 | writer.Close() 188 | } 189 | -------------------------------------------------------------------------------- /writer.go: -------------------------------------------------------------------------------- 1 | package cdb 2 | 3 | import ( 4 | "bufio" 5 | "encoding/binary" 6 | "errors" 7 | "io" 8 | "math" 9 | "os" 10 | "sync" 11 | ) 12 | 13 | var ErrTooMuchData = errors.New("CDB files are limited to 4GB of data") 14 | 15 | // Writer provides an API for creating a CDB database record by record. 16 | // 17 | // Close or Freeze must be called to finalize the database, or the resulting 18 | // file will be invalid. 19 | type Writer struct { 20 | hash func([]byte) uint32 21 | writer io.WriteSeeker 22 | entries [256][]entry 23 | finalizeOnce sync.Once 24 | 25 | bufferedWriter *bufio.Writer 26 | bufferedOffset int64 27 | estimatedFooterSize int64 28 | } 29 | 30 | type entry struct { 31 | hash uint32 32 | offset uint32 33 | } 34 | 35 | // Create opens a CDB database at the given path. If the file exists, it will 36 | // be overwritten. The returned database is not safe for concurrent writes. 37 | func Create(path string) (*Writer, error) { 38 | f, err := os.Create(path) 39 | if err != nil { 40 | return nil, err 41 | } 42 | 43 | return NewWriter(f, nil) 44 | } 45 | 46 | // NewWriter opens a CDB database for the given io.WriteSeeker. 47 | // 48 | // If hash is nil, it will default to the CDB hash function. 49 | func NewWriter(writer io.WriteSeeker, hash func([]byte) uint32) (*Writer, error) { 50 | // Leave 256 * 8 bytes for the index at the head of the file. 51 | _, err := writer.Seek(0, os.SEEK_SET) 52 | if err != nil { 53 | return nil, err 54 | } 55 | 56 | _, err = writer.Write(make([]byte, indexSize)) 57 | if err != nil { 58 | return nil, err 59 | } 60 | 61 | if hash == nil { 62 | hash = cdbHash 63 | } 64 | 65 | return &Writer{ 66 | hash: hash, 67 | writer: writer, 68 | bufferedWriter: bufio.NewWriterSize(writer, 65536), 69 | bufferedOffset: indexSize, 70 | }, nil 71 | } 72 | 73 | // Put adds a key/value pair to the database. If the amount of data written 74 | // would exceed the limit, Put returns ErrTooMuchData. 75 | func (cdb *Writer) Put(key, value []byte) error { 76 | entrySize := int64(8 + len(key) + len(value)) 77 | if (cdb.bufferedOffset + entrySize + cdb.estimatedFooterSize + 16) > math.MaxUint32 { 78 | return ErrTooMuchData 79 | } 80 | 81 | // Record the entry in the hash table, to be written out at the end. 82 | hash := cdb.hash(key) 83 | table := hash & 0xff 84 | 85 | entry := entry{hash: hash, offset: uint32(cdb.bufferedOffset)} 86 | cdb.entries[table] = append(cdb.entries[table], entry) 87 | 88 | // Write the key length, then value length, then key, then value. 89 | err := writeTuple(cdb.bufferedWriter, uint32(len(key)), uint32(len(value))) 90 | if err != nil { 91 | return err 92 | } 93 | 94 | _, err = cdb.bufferedWriter.Write(key) 95 | if err != nil { 96 | return err 97 | } 98 | 99 | _, err = cdb.bufferedWriter.Write(value) 100 | if err != nil { 101 | return err 102 | } 103 | 104 | cdb.bufferedOffset += entrySize 105 | cdb.estimatedFooterSize += 16 106 | return nil 107 | } 108 | 109 | // Close finalizes the database, then closes it to further writes. 110 | // 111 | // Close or Freeze must be called to finalize the database, or the resulting 112 | // file will be invalid. 113 | func (cdb *Writer) Close() error { 114 | var err error 115 | cdb.finalizeOnce.Do(func() { 116 | _, err = cdb.finalize() 117 | }) 118 | 119 | if err != nil { 120 | return err 121 | } 122 | 123 | if closer, ok := cdb.writer.(io.Closer); ok { 124 | return closer.Close() 125 | } else { 126 | return nil 127 | } 128 | } 129 | 130 | // Freeze finalizes the database, then opens it for reads. If the stream cannot 131 | // be converted to a io.ReaderAt, Freeze will return os.ErrInvalid. 132 | // 133 | // Close or Freeze must be called to finalize the database, or the resulting 134 | // file will be invalid. 135 | func (cdb *Writer) Freeze() (*CDB, error) { 136 | var err error 137 | var index index 138 | cdb.finalizeOnce.Do(func() { 139 | index, err = cdb.finalize() 140 | }) 141 | 142 | if err != nil { 143 | return nil, err 144 | } 145 | 146 | if readerAt, ok := cdb.writer.(io.ReaderAt); ok { 147 | return &CDB{reader: readerAt, index: index, hash: cdb.hash}, nil 148 | } else { 149 | return nil, os.ErrInvalid 150 | } 151 | } 152 | 153 | func (cdb *Writer) finalize() (index, error) { 154 | var index index 155 | 156 | // Write the hashtables out, one by one, at the end of the file. 157 | for i := 0; i < 256; i++ { 158 | tableEntries := cdb.entries[i] 159 | tableSize := uint32(len(tableEntries) << 1) 160 | 161 | index[i] = table{ 162 | offset: uint32(cdb.bufferedOffset), 163 | length: tableSize, 164 | } 165 | 166 | sorted := make([]entry, tableSize) 167 | for _, entry := range tableEntries { 168 | slot := (entry.hash >> 8) % tableSize 169 | 170 | for { 171 | if sorted[slot].hash == 0 { 172 | sorted[slot] = entry 173 | break 174 | } 175 | 176 | slot = (slot + 1) % tableSize 177 | } 178 | } 179 | 180 | for _, entry := range sorted { 181 | err := writeTuple(cdb.bufferedWriter, entry.hash, entry.offset) 182 | if err != nil { 183 | return index, err 184 | } 185 | 186 | cdb.bufferedOffset += 8 187 | if cdb.bufferedOffset > math.MaxUint32 { 188 | return index, ErrTooMuchData 189 | } 190 | } 191 | } 192 | 193 | // We're done with the buffer. 194 | err := cdb.bufferedWriter.Flush() 195 | cdb.bufferedWriter = nil 196 | if err != nil { 197 | return index, err 198 | } 199 | 200 | // Seek to the beginning of the file and write out the index. 201 | _, err = cdb.writer.Seek(0, os.SEEK_SET) 202 | if err != nil { 203 | return index, err 204 | } 205 | 206 | buf := make([]byte, indexSize) 207 | for i, table := range index { 208 | off := i * 8 209 | binary.LittleEndian.PutUint32(buf[off:off+4], table.offset) 210 | binary.LittleEndian.PutUint32(buf[off+4:off+8], table.length) 211 | } 212 | 213 | _, err = cdb.writer.Write(buf) 214 | if err != nil { 215 | return index, err 216 | } 217 | 218 | return index, nil 219 | } 220 | --------------------------------------------------------------------------------