├── AUTHORS ├── CONTRIBUTORS ├── LICENSE ├── README ├── README.md ├── batch.go ├── batch_test.go ├── bloom ├── bloom.go └── bloom_test.go ├── cmd └── ldbdump │ └── main.go ├── compaction.go ├── compaction_test.go ├── crc └── crc.go ├── db ├── comparer.go ├── comparer_test.go ├── db.go ├── db_test.go ├── file.go ├── file_lock_generic.go ├── file_lock_test.go ├── file_lock_unix.go ├── file_lock_windows.go └── options.go ├── filenames.go ├── filenames_test.go ├── ikey.go ├── ikey_test.go ├── leveldb.go ├── leveldb_test.go ├── memdb ├── memdb.go └── memdb_test.go ├── memfs ├── memfs.go └── memfs_test.go ├── record ├── record.go └── record_test.go ├── table ├── reader.go ├── table.go ├── table_test.go └── writer.go ├── table_cache.go ├── table_cache_test.go ├── testdata ├── db-stage-1 │ ├── 000003.log │ ├── CURRENT │ ├── LOCK │ ├── LOG │ └── MANIFEST-000002 ├── db-stage-2 │ ├── 000003.log │ ├── CURRENT │ ├── LOCK │ ├── LOG │ └── MANIFEST-000002 ├── db-stage-3 │ ├── 000005.sst │ ├── 000006.log │ ├── CURRENT │ ├── LOCK │ ├── LOG │ ├── LOG.old │ └── MANIFEST-000004 ├── db-stage-4 │ ├── 000005.sst │ ├── 000006.log │ ├── CURRENT │ ├── LOCK │ ├── LOG │ ├── LOG.old │ └── MANIFEST-000004 ├── h.bloom.no-compression.ldb ├── h.ldb ├── h.no-compression.ldb ├── h.txt ├── hamlet-act-1.txt ├── make-db.cc └── make-table.cc ├── version.go ├── version_edit.go ├── version_edit_test.go ├── version_set.go └── version_test.go /AUTHORS: -------------------------------------------------------------------------------- 1 | # This is the official list of LevelDB-Go authors for copyright purposes. 2 | # This file is distinct from the CONTRIBUTORS files. 3 | # See the latter for an explanation. 4 | 5 | # Names should be added to this file as 6 | # Name or Organization 7 | # The email address is not required for organizations. 8 | 9 | # Please keep the list sorted. 10 | 11 | ChaiShushan 12 | Christoph Hack 13 | Google Inc. 14 | Markus Sonderegger 15 | -------------------------------------------------------------------------------- /CONTRIBUTORS: -------------------------------------------------------------------------------- 1 | # This is the official list of people who can contribute 2 | # (and typically have contributed) code to the LevelDB-Go repository. 3 | # The AUTHORS file lists the copyright holders; this file 4 | # lists people. For example, Google employees are listed here 5 | # but not in AUTHORS, because Google holds the copyright. 6 | # 7 | # The submission process automatically checks to make sure 8 | # that people submitting code are listed in this file (by email address). 9 | # 10 | # Names should be added to this file only after verifying that 11 | # the individual or the individual's organization has agreed to 12 | # the appropriate Contributor License Agreement, found here: 13 | # 14 | # http://code.google.com/legal/individual-cla-v1.0.html 15 | # http://code.google.com/legal/corporate-cla-v1.0.html 16 | # 17 | # The agreement for individuals can be filled out on the web. 18 | # 19 | # When adding J Random Contributor's name to this file, 20 | # either J's name or J's organization's name should be 21 | # added to the AUTHORS file, depending on whether the 22 | # individual or corporate CLA was used. 23 | 24 | # Names should be added to this file like so: 25 | # Name 26 | 27 | # Please keep the list sorted. 28 | 29 | Bill Thiede 30 | Brad Fitzpatrick 31 | ChaiShushan 32 | Christoph Hack 33 | Markus Sonderegger 34 | Matt Proud 35 | Mike Wiacek 36 | Nigel Tao 37 | Yves Junqueira 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011 The LevelDB-Go Authors. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | * Redistributions in binary form must reproduce the above 10 | copyright notice, this list of conditions and the following disclaimer 11 | in the documentation and/or other materials provided with the 12 | distribution. 13 | * Neither the name of Google Inc. nor the names of its 14 | contributors may be used to endorse or promote products derived from 15 | this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | # WARNING: This is an incomplete work-in-progress. 2 | 3 | ## It is not ready for production use. Some features aren't implemented yet. Documentation is missing. 4 | 5 | The LevelDB key-value database in the Go programming language. 6 | 7 | To download and install from source: 8 | $ go get github.com/golang/leveldb 9 | 10 | Unless otherwise noted, the LevelDB-Go source files are distributed 11 | under the BSD-style license found in the LICENSE file. 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # WARNING: This is an incomplete work-in-progress. 2 | 3 | ## It is not ready for production use. Some features aren't implemented yet. Documentation is missing. 4 | 5 | The LevelDB key-value database in the Go programming language. 6 | 7 | To download and install from source: 8 | $ go get github.com/golang/leveldb 9 | 10 | Unless otherwise noted, the LevelDB-Go source files are distributed 11 | under the BSD-style license found in the LICENSE file. 12 | -------------------------------------------------------------------------------- /batch.go: -------------------------------------------------------------------------------- 1 | // Copyright 2012 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package leveldb 6 | 7 | import ( 8 | "encoding/binary" 9 | ) 10 | 11 | const batchHeaderLen = 12 12 | 13 | const invalidBatchCount = 1<<32 - 1 14 | 15 | // Batch is a sequence of Sets and/or Deletes that are applied atomically. 16 | type Batch struct { 17 | // Data is the wire format of a batch's log entry: 18 | // - 8 bytes for a sequence number of the first batch element, 19 | // or zeroes if the batch has not yet been applied, 20 | // - 4 bytes for the count: the number of elements in the batch, 21 | // or "\xff\xff\xff\xff" if the batch is invalid, 22 | // - count elements, being: 23 | // - one byte for the kind: delete (0) or set (1), 24 | // - the varint-string user key, 25 | // - the varint-string value (if kind == set). 26 | // The sequence number and count are stored in little-endian order. 27 | data []byte 28 | } 29 | 30 | // Set adds an action to the batch that sets the key to map to the value. 31 | func (b *Batch) Set(key, value []byte) { 32 | if len(b.data) == 0 { 33 | b.init(len(key) + len(value) + 2*binary.MaxVarintLen64 + batchHeaderLen) 34 | } 35 | if b.increment() { 36 | b.data = append(b.data, byte(internalKeyKindSet)) 37 | b.appendStr(key) 38 | b.appendStr(value) 39 | } 40 | } 41 | 42 | // Delete adds an action to the batch that deletes the entry for key. 43 | func (b *Batch) Delete(key []byte) { 44 | if len(b.data) == 0 { 45 | b.init(len(key) + binary.MaxVarintLen64 + batchHeaderLen) 46 | } 47 | if b.increment() { 48 | b.data = append(b.data, byte(internalKeyKindDelete)) 49 | b.appendStr(key) 50 | } 51 | } 52 | 53 | func (b *Batch) init(cap int) { 54 | n := 256 55 | for n < cap { 56 | n *= 2 57 | } 58 | b.data = make([]byte, batchHeaderLen, n) 59 | } 60 | 61 | // seqNumData returns the 8 byte little-endian sequence number. Zero means that 62 | // the batch has not yet been applied. 63 | func (b *Batch) seqNumData() []byte { 64 | return b.data[:8] 65 | } 66 | 67 | // countData returns the 4 byte little-endian count data. "\xff\xff\xff\xff" 68 | // means that the batch is invalid. 69 | func (b *Batch) countData() []byte { 70 | return b.data[8:12] 71 | } 72 | 73 | func (b *Batch) increment() (ok bool) { 74 | p := b.countData() 75 | for i := range p { 76 | p[i]++ 77 | if p[i] != 0x00 { 78 | return true 79 | } 80 | } 81 | // The countData was "\xff\xff\xff\xff". Leave it as it was. 82 | p[0] = 0xff 83 | p[1] = 0xff 84 | p[2] = 0xff 85 | p[3] = 0xff 86 | return false 87 | } 88 | 89 | func (b *Batch) appendStr(s []byte) { 90 | var buf [binary.MaxVarintLen64]byte 91 | n := binary.PutUvarint(buf[:], uint64(len(s))) 92 | b.data = append(b.data, buf[:n]...) 93 | b.data = append(b.data, s...) 94 | } 95 | 96 | func (b *Batch) setSeqNum(seqNum uint64) { 97 | binary.LittleEndian.PutUint64(b.seqNumData(), seqNum) 98 | } 99 | 100 | func (b *Batch) seqNum() uint64 { 101 | return binary.LittleEndian.Uint64(b.seqNumData()) 102 | } 103 | 104 | func (b *Batch) count() uint32 { 105 | return binary.LittleEndian.Uint32(b.countData()) 106 | } 107 | 108 | func (b *Batch) iter() batchIter { 109 | return b.data[batchHeaderLen:] 110 | } 111 | 112 | type batchIter []byte 113 | 114 | // next returns the next operation in this batch. 115 | // The final return value is false if the batch is corrupt. 116 | func (t *batchIter) next() (kind internalKeyKind, ukey []byte, value []byte, ok bool) { 117 | p := *t 118 | if len(p) == 0 { 119 | return 0, nil, nil, false 120 | } 121 | kind, *t = internalKeyKind(p[0]), p[1:] 122 | if kind > internalKeyKindMax { 123 | return 0, nil, nil, false 124 | } 125 | ukey, ok = t.nextStr() 126 | if !ok { 127 | return 0, nil, nil, false 128 | } 129 | if kind != internalKeyKindDelete { 130 | value, ok = t.nextStr() 131 | if !ok { 132 | return 0, nil, nil, false 133 | } 134 | } 135 | return kind, ukey, value, true 136 | } 137 | 138 | func (t *batchIter) nextStr() (s []byte, ok bool) { 139 | p := *t 140 | u, numBytes := binary.Uvarint(p) 141 | if numBytes <= 0 { 142 | return nil, false 143 | } 144 | p = p[numBytes:] 145 | if u > uint64(len(p)) { 146 | return nil, false 147 | } 148 | s, *t = p[:u], p[u:] 149 | return s, true 150 | } 151 | -------------------------------------------------------------------------------- /batch_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2012 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package leveldb 6 | 7 | import ( 8 | "encoding/binary" 9 | "strings" 10 | "testing" 11 | ) 12 | 13 | func TestBatch(t *testing.T) { 14 | testCases := []struct { 15 | kind internalKeyKind 16 | key, value string 17 | }{ 18 | {internalKeyKindSet, "roses", "red"}, 19 | {internalKeyKindSet, "violets", "blue"}, 20 | {internalKeyKindDelete, "roses", ""}, 21 | {internalKeyKindSet, "", ""}, 22 | {internalKeyKindSet, "", "non-empty"}, 23 | {internalKeyKindDelete, "", ""}, 24 | {internalKeyKindSet, "grass", "green"}, 25 | {internalKeyKindSet, "grass", "greener"}, 26 | {internalKeyKindSet, "eleventy", strings.Repeat("!!11!", 100)}, 27 | {internalKeyKindDelete, "nosuchkey", ""}, 28 | {internalKeyKindSet, "binarydata", "\x00"}, 29 | {internalKeyKindSet, "binarydata", "\xff"}, 30 | } 31 | var b Batch 32 | for _, tc := range testCases { 33 | if tc.kind == internalKeyKindDelete { 34 | b.Delete([]byte(tc.key)) 35 | } else { 36 | b.Set([]byte(tc.key), []byte(tc.value)) 37 | } 38 | } 39 | iter := b.iter() 40 | for _, tc := range testCases { 41 | kind, k, v, ok := iter.next() 42 | if !ok { 43 | t.Fatalf("next returned !ok: test case = %q", tc) 44 | } 45 | key, value := string(k), string(v) 46 | if kind != tc.kind || key != tc.key || value != tc.value { 47 | t.Errorf("got (%d, %q, %q), want (%d, %q, %q)", 48 | kind, key, value, tc.kind, tc.key, tc.value) 49 | } 50 | } 51 | if len(iter) != 0 { 52 | t.Errorf("iterator was not exhausted: remaining bytes = %q", iter) 53 | } 54 | } 55 | 56 | func TestBatchIncrement(t *testing.T) { 57 | testCases := []uint32{ 58 | 0x00000000, 59 | 0x00000001, 60 | 0x00000002, 61 | 0x0000007f, 62 | 0x00000080, 63 | 0x000000fe, 64 | 0x000000ff, 65 | 0x00000100, 66 | 0x00000101, 67 | 0x000001ff, 68 | 0x00000200, 69 | 0x00000fff, 70 | 0x00001234, 71 | 0x0000fffe, 72 | 0x0000ffff, 73 | 0x00010000, 74 | 0x00010001, 75 | 0x000100fe, 76 | 0x000100ff, 77 | 0x00020100, 78 | 0x03fffffe, 79 | 0x03ffffff, 80 | 0x04000000, 81 | 0x04000001, 82 | 0x7fffffff, 83 | 0xfffffffe, 84 | 0xffffffff, 85 | } 86 | for _, tc := range testCases { 87 | var buf [12]byte 88 | binary.LittleEndian.PutUint32(buf[8:12], tc) 89 | b := Batch{buf[:]} 90 | b.increment() 91 | got := binary.LittleEndian.Uint32(buf[8:12]) 92 | want := tc + 1 93 | if tc == 0xffffffff { 94 | want = tc 95 | } 96 | if got != want { 97 | t.Errorf("input=%d: got %d, want %d", tc, got, want) 98 | } 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /bloom/bloom.go: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // Package bloom implements Bloom filters. 6 | package bloom // import "github.com/golang/leveldb/bloom" 7 | 8 | // Filter is an encoded set of []byte keys. 9 | type Filter []byte 10 | 11 | // MayContain returns whether the filter may contain given key. False positives 12 | // are possible, where it returns true for keys not in the original set. 13 | func (f Filter) MayContain(key []byte) bool { 14 | if len(f) < 2 { 15 | return false 16 | } 17 | k := f[len(f)-1] 18 | if k > 30 { 19 | // This is reserved for potentially new encodings for short Bloom filters. 20 | // Consider it a match. 21 | return true 22 | } 23 | nBits := uint32(8 * (len(f) - 1)) 24 | h := hash(key) 25 | delta := h>>17 | h<<15 26 | for j := uint8(0); j < k; j++ { 27 | bitPos := h % nBits 28 | if f[bitPos/8]&(1<<(bitPos%8)) == 0 { 29 | return false 30 | } 31 | h += delta 32 | } 33 | return true 34 | } 35 | 36 | // NewFilter returns a new Bloom filter that encodes a set of []byte keys with 37 | // the given number of bits per key, approximately. 38 | // 39 | // A good bitsPerKey value is 10, which yields a filter with ~ 1% false 40 | // positive rate. 41 | func NewFilter(buf []byte, keys [][]byte, bitsPerKey int) Filter { 42 | return Filter(appendFilter(nil, keys, bitsPerKey)) 43 | } 44 | 45 | func appendFilter(buf []byte, keys [][]byte, bitsPerKey int) []byte { 46 | if bitsPerKey < 0 { 47 | bitsPerKey = 0 48 | } 49 | // 0.69 is approximately ln(2). 50 | k := uint32(float64(bitsPerKey) * 0.69) 51 | if k < 1 { 52 | k = 1 53 | } 54 | if k > 30 { 55 | k = 30 56 | } 57 | 58 | nBits := len(keys) * int(bitsPerKey) 59 | // For small len(keys), we can see a very high false positive rate. Fix it 60 | // by enforcing a minimum bloom filter length. 61 | if nBits < 64 { 62 | nBits = 64 63 | } 64 | nBytes := (nBits + 7) / 8 65 | nBits = nBytes * 8 66 | buf, filter := extend(buf, nBytes+1) 67 | 68 | for _, key := range keys { 69 | h := hash(key) 70 | delta := h>>17 | h<<15 71 | for j := uint32(0); j < k; j++ { 72 | bitPos := h % uint32(nBits) 73 | filter[bitPos/8] |= 1 << (bitPos % 8) 74 | h += delta 75 | } 76 | } 77 | filter[nBytes] = uint8(k) 78 | 79 | return buf 80 | } 81 | 82 | // extend appends n zero bytes to b. It returns the overall slice (of length 83 | // n+len(originalB)) and the slice of n trailing zeroes. 84 | func extend(b []byte, n int) (overall, trailer []byte) { 85 | want := n + len(b) 86 | if want <= cap(b) { 87 | overall = b[:want] 88 | trailer = overall[len(b):] 89 | for i := range trailer { 90 | trailer[i] = 0 91 | } 92 | } else { 93 | // Grow the capacity exponentially, with a 1KiB minimum. 94 | c := 1024 95 | for c < want { 96 | c += c / 4 97 | } 98 | overall = make([]byte, want, c) 99 | trailer = overall[len(b):] 100 | copy(overall, b) 101 | } 102 | return overall, trailer 103 | } 104 | 105 | // hash implements a hashing algorithm similar to the Murmur hash. 106 | func hash(b []byte) uint32 { 107 | const ( 108 | seed = 0xbc9f1d34 109 | m = 0xc6a4a793 110 | ) 111 | h := uint32(seed) ^ uint32(len(b)*m) 112 | for ; len(b) >= 4; b = b[4:] { 113 | h += uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 114 | h *= m 115 | h ^= h >> 16 116 | } 117 | switch len(b) { 118 | case 3: 119 | h += uint32(b[2]) << 16 120 | fallthrough 121 | case 2: 122 | h += uint32(b[1]) << 8 123 | fallthrough 124 | case 1: 125 | h += uint32(b[0]) 126 | h *= m 127 | h ^= h >> 24 128 | } 129 | return h 130 | } 131 | 132 | // FilterPolicy implements the db.FilterPolicy interface from the leveldb/db 133 | // package. 134 | // 135 | // The integer value is the approximate number of bits used per key. A good 136 | // value is 10, which yields a filter with ~ 1% false positive rate. 137 | // 138 | // It is valid to use the other API in this package (leveldb/bloom) without 139 | // using this type or the leveldb/db package. 140 | type FilterPolicy int 141 | 142 | // Name implements the db.FilterPolicy interface. 143 | func (p FilterPolicy) Name() string { 144 | // This string looks arbitrary, but its value is written to LevelDB .ldb 145 | // files, and should be this exact value to be compatible with those files 146 | // and with the C++ LevelDB code. 147 | return "leveldb.BuiltinBloomFilter2" 148 | } 149 | 150 | // AppendFilter implements the db.FilterPolicy interface. 151 | func (p FilterPolicy) AppendFilter(dst []byte, keys [][]byte) []byte { 152 | return appendFilter(dst, keys, int(p)) 153 | } 154 | 155 | // MayContain implements the db.FilterPolicy interface. 156 | func (p FilterPolicy) MayContain(filter, key []byte) bool { 157 | return Filter(filter).MayContain(key) 158 | } 159 | -------------------------------------------------------------------------------- /bloom/bloom_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package bloom 6 | 7 | import ( 8 | "testing" 9 | ) 10 | 11 | func (f Filter) String() string { 12 | s := make([]byte, 8*len(f)) 13 | for i, x := range f { 14 | for j := 0; j < 8; j++ { 15 | if x&(1<> 0) 67 | b[1] = uint8(uint32(i) >> 8) 68 | b[2] = uint8(uint32(i) >> 16) 69 | b[3] = uint8(uint32(i) >> 24) 70 | return b 71 | } 72 | 73 | nMediocreFilters, nGoodFilters := 0, 0 74 | loop: 75 | for length := 1; length <= 10000; length = nextLength(length) { 76 | keys := make([][]byte, 0, length) 77 | for i := 0; i < length; i++ { 78 | keys = append(keys, le32(i)) 79 | } 80 | f := NewFilter(nil, keys, 10) 81 | 82 | if len(f) > (length*10/8)+40 { 83 | t.Errorf("length=%d: len(f)=%d is too large", length, len(f)) 84 | continue 85 | } 86 | 87 | // All added keys must match. 88 | for _, key := range keys { 89 | if !f.MayContain(key) { 90 | t.Errorf("length=%d: did not contain key %q", length, key) 91 | continue loop 92 | } 93 | } 94 | 95 | // Check false positive rate. 96 | nFalsePositive := 0 97 | for i := 0; i < 10000; i++ { 98 | if f.MayContain(le32(1e9 + i)) { 99 | nFalsePositive++ 100 | } 101 | } 102 | if nFalsePositive > 0.02*10000 { 103 | t.Errorf("length=%d: %d false positives in 10000", length, nFalsePositive) 104 | continue 105 | } 106 | if nFalsePositive > 0.0125*10000 { 107 | nMediocreFilters++ 108 | } else { 109 | nGoodFilters++ 110 | } 111 | } 112 | 113 | if nMediocreFilters > nGoodFilters/5 { 114 | t.Errorf("%d mediocre filters but only %d good filters", nMediocreFilters, nGoodFilters) 115 | } 116 | } 117 | 118 | func TestHash(t *testing.T) { 119 | // The magic want numbers come from running the C++ leveldb code in hash.cc. 120 | testCases := []struct { 121 | s string 122 | want uint32 123 | }{ 124 | {"", 0xbc9f1d34}, 125 | {"g", 0xd04a8bda}, 126 | {"go", 0x3e0b0745}, 127 | {"gop", 0x0c326610}, 128 | {"goph", 0x8c9d6390}, 129 | {"gophe", 0x9bfd4b0a}, 130 | {"gopher", 0xa78edc7c}, 131 | {"I had a dream it would end this way.", 0xe14a9db9}, 132 | } 133 | for _, tc := range testCases { 134 | if got := hash([]byte(tc.s)); got != tc.want { 135 | t.Errorf("s=%q: got 0x%08x, want 0x%08x", tc.s, got, tc.want) 136 | } 137 | } 138 | } 139 | -------------------------------------------------------------------------------- /cmd/ldbdump/main.go: -------------------------------------------------------------------------------- 1 | // The ldbdump program dumps the contents of LevelDB tables (.ldb files), 2 | // formerly known as sorted string tables (.sst files). 3 | package main 4 | 5 | import ( 6 | "bytes" 7 | "flag" 8 | "fmt" 9 | "os" 10 | 11 | "github.com/golang/leveldb/db" 12 | "github.com/golang/leveldb/table" 13 | ) 14 | 15 | var ( 16 | verifyChecksums = flag.Bool("c", false, "Verify checksums.") 17 | truncate = flag.Bool("t", false, "Truncate long keys and values.") 18 | 19 | kBuf, vBuf bytes.Buffer 20 | ) 21 | 22 | func main() { 23 | flag.Parse() 24 | bad := false 25 | for i, arg := range flag.Args() { 26 | if i != 0 { 27 | fmt.Println() 28 | } 29 | fmt.Printf("filename: %q\n", arg) 30 | if err := dump(arg); err != nil { 31 | fmt.Printf("error: %q\n", err) 32 | bad = true 33 | } 34 | } 35 | if bad { 36 | os.Exit(1) 37 | } 38 | } 39 | 40 | func dump(filename string) error { 41 | f, err := os.Open(filename) 42 | if err != nil { 43 | return err 44 | } 45 | // No need to "defer f.Close()", as closing r will close f. 46 | r := table.NewReader(f, &db.Options{ 47 | VerifyChecksums: *verifyChecksums, 48 | }) 49 | defer r.Close() 50 | 51 | t := r.Find(nil, nil) 52 | for t.Next() { 53 | k, v := t.Key(), t.Value() 54 | if *truncate { 55 | k = trunc(&kBuf, k) 56 | v = trunc(&vBuf, v) 57 | } 58 | fmt.Printf("%q: %q,\n", k, v) 59 | } 60 | return t.Close() 61 | } 62 | 63 | func trunc(dst *bytes.Buffer, b []byte) []byte { 64 | if len(b) < 64 { 65 | return b 66 | } 67 | dst.Reset() 68 | fmt.Fprintf(dst, "%s...(%d bytes)...%s", b[:20], len(b)-40, b[len(b)-20:]) 69 | return dst.Bytes() 70 | } 71 | -------------------------------------------------------------------------------- /compaction.go: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package leveldb 6 | 7 | import ( 8 | "fmt" 9 | 10 | "github.com/golang/leveldb/db" 11 | "github.com/golang/leveldb/table" 12 | ) 13 | 14 | const ( 15 | targetFileSize = 2 * 1024 * 1024 16 | 17 | // maxGrandparentOverlapBytes is the maximum bytes of overlap with 18 | // level+2 before we stop building a single file in a level to level+1 19 | // compaction. 20 | maxGrandparentOverlapBytes = 10 * targetFileSize 21 | 22 | // expandedCompactionByteSizeLimit is the maximum number of bytes in 23 | // all compacted files. We avoid expanding the lower level file set of 24 | // a compaction if it would make the total compaction cover more than 25 | // this many bytes. 26 | expandedCompactionByteSizeLimit = 25 * targetFileSize 27 | ) 28 | 29 | // compaction is a table compaction from one level to the next, starting from a 30 | // given version. 31 | type compaction struct { 32 | version *version 33 | 34 | // level is the level that is being compacted. Inputs from level and 35 | // level+1 will be merged to produce a set of level+1 files. 36 | level int 37 | 38 | // inputs are the tables to be compacted. 39 | inputs [3][]fileMetadata 40 | } 41 | 42 | // pickCompaction picks the best compaction, if any, for vs' current version. 43 | func pickCompaction(vs *versionSet) (c *compaction) { 44 | cur := vs.currentVersion() 45 | 46 | // Pick a compaction based on size. If none exist, pick one based on seeks. 47 | if cur.compactionScore >= 1 { 48 | c = &compaction{ 49 | version: cur, 50 | level: cur.compactionLevel, 51 | } 52 | // TODO: Pick the first file that comes after the compaction pointer for c.level. 53 | c.inputs[0] = []fileMetadata{cur.files[c.level][0]} 54 | 55 | } else if false { 56 | // TODO: look for a compaction triggered by seeks. 57 | 58 | } else { 59 | return nil 60 | } 61 | 62 | // Files in level 0 may overlap each other, so pick up all overlapping ones. 63 | if c.level == 0 { 64 | smallest, largest := ikeyRange(vs.icmp, c.inputs[0], nil) 65 | c.inputs[0] = cur.overlaps(0, vs.ucmp, smallest.ukey(), largest.ukey()) 66 | if len(c.inputs) == 0 { 67 | panic("leveldb: empty compaction") 68 | } 69 | } 70 | 71 | c.setupOtherInputs(vs) 72 | return c 73 | } 74 | 75 | // TODO: user initiated compactions. 76 | 77 | // setupOtherInputs fills in the rest of the compaction inputs, regardless of 78 | // whether the compaction was automatically scheduled or user initiated. 79 | func (c *compaction) setupOtherInputs(vs *versionSet) { 80 | smallest0, largest0 := ikeyRange(vs.icmp, c.inputs[0], nil) 81 | c.inputs[1] = c.version.overlaps(c.level+1, vs.ucmp, smallest0.ukey(), largest0.ukey()) 82 | smallest01, largest01 := ikeyRange(vs.icmp, c.inputs[0], c.inputs[1]) 83 | 84 | // Grow the inputs if it doesn't affect the number of level+1 files. 85 | if c.grow(vs, smallest01, largest01) { 86 | smallest01, largest01 = ikeyRange(vs.icmp, c.inputs[0], c.inputs[1]) 87 | } 88 | 89 | // Compute the set of level+2 files that overlap this compaction. 90 | if c.level+2 < numLevels { 91 | c.inputs[2] = c.version.overlaps(c.level+2, vs.ucmp, smallest01.ukey(), largest01.ukey()) 92 | } 93 | 94 | // TODO: update the compaction pointer for c.level. 95 | } 96 | 97 | // grow grows the number of inputs at c.level without changing the number of 98 | // c.level+1 files in the compaction, and returns whether the inputs grew. sm 99 | // and la are the smallest and largest internalKeys in all of the inputs. 100 | func (c *compaction) grow(vs *versionSet, sm, la internalKey) bool { 101 | if len(c.inputs[1]) == 0 { 102 | return false 103 | } 104 | grow0 := c.version.overlaps(c.level, vs.ucmp, sm.ukey(), la.ukey()) 105 | if len(grow0) <= len(c.inputs[0]) { 106 | return false 107 | } 108 | if totalSize(grow0)+totalSize(c.inputs[1]) >= expandedCompactionByteSizeLimit { 109 | return false 110 | } 111 | sm1, la1 := ikeyRange(vs.icmp, grow0, nil) 112 | grow1 := c.version.overlaps(c.level+1, vs.ucmp, sm1, la1) 113 | if len(grow1) != len(c.inputs[1]) { 114 | return false 115 | } 116 | c.inputs[0] = grow0 117 | c.inputs[1] = grow1 118 | return true 119 | } 120 | 121 | // isBaseLevelForUkey reports whether it is guaranteed that there are no 122 | // key/value pairs at c.level+2 or higher that have the user key ukey. 123 | func (c *compaction) isBaseLevelForUkey(userCmp db.Comparer, ukey []byte) bool { 124 | // TODO: this can be faster if ukey is always increasing between successive 125 | // isBaseLevelForUkey calls and we can keep some state in between calls. 126 | for level := c.level + 2; level < numLevels; level++ { 127 | for _, f := range c.version.files[level] { 128 | if userCmp.Compare(ukey, f.largest.ukey()) <= 0 { 129 | if userCmp.Compare(ukey, f.smallest.ukey()) >= 0 { 130 | return false 131 | } 132 | // For levels above level 0, the files within a level are in 133 | // increasing ikey order, so we can break early. 134 | break 135 | } 136 | } 137 | } 138 | return true 139 | } 140 | 141 | // maybeScheduleCompaction schedules a compaction if necessary. 142 | // 143 | // d.mu must be held when calling this. 144 | func (d *DB) maybeScheduleCompaction() { 145 | if d.compacting || d.closed { 146 | return 147 | } 148 | // TODO: check for manual compactions. 149 | if d.imm == nil { 150 | v := d.versions.currentVersion() 151 | // TODO: check v.fileToCompact. 152 | if v.compactionScore < 1 { 153 | // There is no work to be done. 154 | return 155 | } 156 | } 157 | d.compacting = true 158 | go d.compact() 159 | } 160 | 161 | // compact runs one compaction and maybe schedules another call to compact. 162 | func (d *DB) compact() { 163 | d.mu.Lock() 164 | defer d.mu.Unlock() 165 | if err := d.compact1(); err != nil { 166 | // TODO: count consecutive compaction errors and backoff. 167 | } 168 | d.compacting = false 169 | // The previous compaction may have produced too many files in a 170 | // level, so reschedule another compaction if needed. 171 | d.maybeScheduleCompaction() 172 | d.compactionCond.Broadcast() 173 | } 174 | 175 | // compact1 runs one compaction. 176 | // 177 | // d.mu must be held when calling this, but the mutex may be dropped and 178 | // re-acquired during the course of this method. 179 | func (d *DB) compact1() error { 180 | if d.imm != nil { 181 | return d.compactMemTable() 182 | } 183 | 184 | // TODO: support manual compactions. 185 | 186 | c := pickCompaction(&d.versions) 187 | if c == nil { 188 | return nil 189 | } 190 | 191 | // Check for a trivial move of one table from one level to the next. 192 | // We avoid such a move if there is lots of overlapping grandparent data. 193 | // Otherwise, the move could create a parent file that will require 194 | // a very expensive merge later on. 195 | if len(c.inputs[0]) == 1 && len(c.inputs[1]) == 0 && 196 | totalSize(c.inputs[2]) <= maxGrandparentOverlapBytes { 197 | 198 | meta := &c.inputs[0][0] 199 | return d.versions.logAndApply(d.dirname, &versionEdit{ 200 | deletedFiles: map[deletedFileEntry]bool{ 201 | deletedFileEntry{level: c.level, fileNum: meta.fileNum}: true, 202 | }, 203 | newFiles: []newFileEntry{ 204 | {level: c.level + 1, meta: *meta}, 205 | }, 206 | }) 207 | } 208 | 209 | ve, pendingOutputs, err := d.compactDiskTables(c) 210 | if err != nil { 211 | return err 212 | } 213 | err = d.versions.logAndApply(d.dirname, ve) 214 | for _, fileNum := range pendingOutputs { 215 | delete(d.pendingOutputs, fileNum) 216 | } 217 | if err != nil { 218 | return err 219 | } 220 | d.deleteObsoleteFiles() 221 | return nil 222 | } 223 | 224 | // compactMemTable runs a compaction that copies d.imm from memory to disk. 225 | // 226 | // d.mu must be held when calling this, but the mutex may be dropped and 227 | // re-acquired during the course of this method. 228 | func (d *DB) compactMemTable() error { 229 | meta, err := d.writeLevel0Table(d.opts.GetFileSystem(), d.imm) 230 | if err != nil { 231 | return err 232 | } 233 | err = d.versions.logAndApply(d.dirname, &versionEdit{ 234 | logNumber: d.logNumber, 235 | newFiles: []newFileEntry{ 236 | {level: 0, meta: meta}, 237 | }, 238 | }) 239 | delete(d.pendingOutputs, meta.fileNum) 240 | if err != nil { 241 | return err 242 | } 243 | d.imm = nil 244 | d.deleteObsoleteFiles() 245 | return nil 246 | } 247 | 248 | // compactDiskTables runs a compaction that produces new on-disk tables from 249 | // old on-disk tables. 250 | // 251 | // d.mu must be held when calling this, but the mutex may be dropped and 252 | // re-acquired during the course of this method. 253 | func (d *DB) compactDiskTables(c *compaction) (ve *versionEdit, pendingOutputs []uint64, retErr error) { 254 | defer func() { 255 | if retErr != nil { 256 | for _, fileNum := range pendingOutputs { 257 | delete(d.pendingOutputs, fileNum) 258 | } 259 | pendingOutputs = nil 260 | } 261 | }() 262 | 263 | // TODO: track snapshots. 264 | smallestSnapshot := d.versions.lastSequence 265 | 266 | // Release the d.mu lock while doing I/O. 267 | // Note the unusual order: Unlock and then Lock. 268 | d.mu.Unlock() 269 | defer d.mu.Lock() 270 | 271 | iter, err := compactionIterator(&d.tableCache, d.icmp, c) 272 | if err != nil { 273 | return nil, pendingOutputs, err 274 | } 275 | 276 | // TODO: output to more than one table, if it would otherwise be too large. 277 | var ( 278 | fileNum uint64 279 | filename string 280 | tw *table.Writer 281 | ) 282 | defer func() { 283 | if iter != nil { 284 | retErr = firstError(retErr, iter.Close()) 285 | } 286 | if tw != nil { 287 | retErr = firstError(retErr, tw.Close()) 288 | } 289 | if retErr != nil { 290 | d.opts.GetFileSystem().Remove(filename) 291 | } 292 | }() 293 | 294 | currentUkey := make([]byte, 0, 4096) 295 | hasCurrentUkey := false 296 | lastSeqNumForKey := internalKeySeqNumMax 297 | smallest, largest := internalKey(nil), internalKey(nil) 298 | for iter.Next() { 299 | // TODO: prioritize compacting d.imm. 300 | 301 | // TODO: support c.shouldStopBefore. 302 | 303 | ikey := internalKey(iter.Key()) 304 | if !ikey.valid() { 305 | // Do not hide invalid keys. 306 | currentUkey = currentUkey[:0] 307 | hasCurrentUkey = false 308 | lastSeqNumForKey = internalKeySeqNumMax 309 | 310 | } else { 311 | ukey := ikey.ukey() 312 | if !hasCurrentUkey || d.icmp.userCmp.Compare(currentUkey, ukey) != 0 { 313 | // This is the first occurrence of this user key. 314 | currentUkey = append(currentUkey[:0], ukey...) 315 | hasCurrentUkey = true 316 | lastSeqNumForKey = internalKeySeqNumMax 317 | } 318 | 319 | drop, ikeySeqNum := false, ikey.seqNum() 320 | if lastSeqNumForKey <= smallestSnapshot { 321 | drop = true // Rule (A) referenced below. 322 | 323 | } else if ikey.kind() == internalKeyKindDelete && 324 | ikeySeqNum <= smallestSnapshot && 325 | c.isBaseLevelForUkey(d.icmp.userCmp, ukey) { 326 | 327 | // For this user key: 328 | // (1) there is no data in higher levels 329 | // (2) data in lower levels will have larger sequence numbers 330 | // (3) data in layers that are being compacted here and have 331 | // smaller sequence numbers will be dropped in the next 332 | // few iterations of this loop (by rule (A) above). 333 | // Therefore this deletion marker is obsolete and can be dropped. 334 | drop = true 335 | } 336 | 337 | lastSeqNumForKey = ikeySeqNum 338 | if drop { 339 | continue 340 | } 341 | } 342 | 343 | if tw == nil { 344 | d.mu.Lock() 345 | fileNum = d.versions.nextFileNum() 346 | d.pendingOutputs[fileNum] = struct{}{} 347 | pendingOutputs = append(pendingOutputs, fileNum) 348 | d.mu.Unlock() 349 | 350 | filename = dbFilename(d.dirname, fileTypeTable, fileNum) 351 | file, err := d.opts.GetFileSystem().Create(filename) 352 | if err != nil { 353 | return nil, pendingOutputs, err 354 | } 355 | tw = table.NewWriter(file, &d.icmpOpts) 356 | 357 | smallest = make(internalKey, len(ikey)) 358 | copy(smallest, ikey) 359 | largest = make(internalKey, 0, 2*len(ikey)) 360 | } 361 | largest = append(largest[:0], ikey...) 362 | if err := tw.Set(ikey, iter.Value(), nil); err != nil { 363 | return nil, pendingOutputs, err 364 | } 365 | } 366 | 367 | ve = &versionEdit{ 368 | deletedFiles: map[deletedFileEntry]bool{}, 369 | newFiles: []newFileEntry{ 370 | { 371 | level: c.level + 1, 372 | meta: fileMetadata{ 373 | fileNum: fileNum, 374 | size: 1, 375 | smallest: smallest, 376 | largest: largest, 377 | }, 378 | }, 379 | }, 380 | } 381 | for i := 0; i < 2; i++ { 382 | for _, f := range c.inputs[i] { 383 | ve.deletedFiles[deletedFileEntry{ 384 | level: c.level + i, 385 | fileNum: f.fileNum, 386 | }] = true 387 | } 388 | } 389 | return ve, pendingOutputs, nil 390 | } 391 | 392 | // compactionIterator returns an iterator over all the tables in a compaction. 393 | func compactionIterator(tc *tableCache, icmp db.Comparer, c *compaction) (cIter db.Iterator, retErr error) { 394 | iters := make([]db.Iterator, 0, len(c.inputs[0])+1) 395 | defer func() { 396 | if retErr != nil { 397 | for _, iter := range iters { 398 | if iter != nil { 399 | iter.Close() 400 | } 401 | } 402 | } 403 | }() 404 | 405 | if c.level != 0 { 406 | iter, err := newConcatenatingIterator(tc, c.inputs[0]) 407 | if err != nil { 408 | return nil, err 409 | } 410 | iters = append(iters, iter) 411 | } else { 412 | for _, f := range c.inputs[0] { 413 | iter, err := tc.find(f.fileNum, nil) 414 | if err != nil { 415 | return nil, fmt.Errorf("leveldb: could not open table %d: %v", f.fileNum, err) 416 | } 417 | iters = append(iters, iter) 418 | } 419 | } 420 | 421 | iter, err := newConcatenatingIterator(tc, c.inputs[1]) 422 | if err != nil { 423 | return nil, err 424 | } 425 | iters = append(iters, iter) 426 | return db.NewMergingIterator(icmp, iters...), nil 427 | } 428 | 429 | // newConcatenatingIterator returns a concatenating iterator over all of the 430 | // input tables. 431 | func newConcatenatingIterator(tc *tableCache, inputs []fileMetadata) (cIter db.Iterator, retErr error) { 432 | iters := make([]db.Iterator, len(inputs)) 433 | defer func() { 434 | if retErr != nil { 435 | for _, iter := range iters { 436 | if iter != nil { 437 | iter.Close() 438 | } 439 | } 440 | } 441 | }() 442 | 443 | for i, f := range inputs { 444 | iter, err := tc.find(f.fileNum, nil) 445 | if err != nil { 446 | return nil, fmt.Errorf("leveldb: could not open table %d: %v", f.fileNum, err) 447 | } 448 | iters[i] = iter 449 | } 450 | return db.NewConcatenatingIterator(iters...), nil 451 | } 452 | -------------------------------------------------------------------------------- /crc/crc.go: -------------------------------------------------------------------------------- 1 | // Copyright 2011 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // Package crc implements the checksum algorithm used throughout leveldb. 6 | // 7 | // The algorithm is CRC-32 with Castagnoli's polynomial, followed by a bit 8 | // rotation and an additional delta. The additional processing is to lessen the 9 | // probability of arbitrary key/value data coincidentally containing bytes that 10 | // look like a checksum. 11 | // 12 | // To calculate the uint32 checksum of some data: 13 | // var u uint32 = crc.New(data).Value() 14 | // In leveldb, the uint32 value is then stored in little-endian format. 15 | package crc // import "github.com/golang/leveldb/crc" 16 | 17 | import ( 18 | "hash/crc32" 19 | ) 20 | 21 | var table = crc32.MakeTable(crc32.Castagnoli) 22 | 23 | type CRC uint32 24 | 25 | func New(b []byte) CRC { 26 | return CRC(0).Update(b) 27 | } 28 | 29 | func (c CRC) Update(b []byte) CRC { 30 | return CRC(crc32.Update(uint32(c), table, b)) 31 | } 32 | 33 | func (c CRC) Value() uint32 { 34 | return uint32(c>>15|c<<17) + 0xa282ead8 35 | } 36 | -------------------------------------------------------------------------------- /db/comparer.go: -------------------------------------------------------------------------------- 1 | // Copyright 2011 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package db 6 | 7 | import ( 8 | "bytes" 9 | ) 10 | 11 | // Comparer defines a total ordering over the space of []byte keys: a 'less 12 | // than' relationship. 13 | type Comparer interface { 14 | // Compare returns -1, 0, or +1 depending on whether a is 'less than', 15 | // 'equal to' or 'greater than' b. The two arguments can only be 'equal' 16 | // if their contents are exactly equal. Furthermore, the empty slice 17 | // must be 'less than' any non-empty slice. 18 | Compare(a, b []byte) int 19 | 20 | // Name returns the name of the comparer. 21 | // 22 | // The Level-DB on-disk format stores the comparer name, and opening a 23 | // database with a different comparer from the one it was created with 24 | // will result in an error. 25 | Name() string 26 | 27 | // AppendSeparator appends a sequence of bytes x to dst such that 28 | // a <= x && x < b, where 'less than' is consistent with Compare. 29 | // It returns the enlarged slice, like the built-in append function. 30 | // 31 | // Precondition: either a is 'less than' b, or b is an empty slice. 32 | // In the latter case, empty means 'positive infinity', and appending any 33 | // x such that a <= x will be valid. 34 | // 35 | // An implementation may simply be "return append(dst, a...)" but appending 36 | // fewer bytes will result in smaller tables. 37 | // 38 | // For example, if dst, a and b are the []byte equivalents of the strings 39 | // "aqua", "black" and "blue", then the result may be "aquablb". 40 | // Similarly, if the arguments were "aqua", "green" and "", then the result 41 | // may be "aquah". 42 | AppendSeparator(dst, a, b []byte) []byte 43 | } 44 | 45 | // DefaultComparer is the default implementation of the Comparer interface. 46 | // It uses the natural ordering, consistent with bytes.Compare. 47 | var DefaultComparer Comparer = defCmp{} 48 | 49 | type defCmp struct{} 50 | 51 | func (defCmp) Compare(a, b []byte) int { 52 | return bytes.Compare(a, b) 53 | } 54 | 55 | func (defCmp) Name() string { 56 | // This string is part of the C++ Level-DB implementation's default file format, 57 | // and should not be changed. 58 | return "leveldb.BytewiseComparator" 59 | } 60 | 61 | func (defCmp) AppendSeparator(dst, a, b []byte) []byte { 62 | i, n := SharedPrefixLen(a, b), len(dst) 63 | dst = append(dst, a...) 64 | if len(b) > 0 { 65 | if i == len(a) { 66 | return dst 67 | } 68 | if i == len(b) { 69 | panic("a < b is a precondition, but b is a prefix of a") 70 | } 71 | if a[i] == 0xff || a[i]+1 >= b[i] { 72 | // This isn't optimal, but it matches the C++ Level-DB implementation, and 73 | // it's good enough. For example, if a is "1357" and b is "2", then the 74 | // optimal (i.e. shortest) result is appending "14", but we append "1357". 75 | return dst 76 | } 77 | } 78 | i += n 79 | for ; i < len(dst); i++ { 80 | if dst[i] != 0xff { 81 | dst[i]++ 82 | return dst[:i+1] 83 | } 84 | } 85 | return dst 86 | } 87 | 88 | // SharedPrefixLen returns the largest i such that a[:i] equals b[:i]. 89 | // This function can be useful in implementing the Comparer interface. 90 | func SharedPrefixLen(a, b []byte) int { 91 | i, n := 0, len(a) 92 | if n > len(b) { 93 | n = len(b) 94 | } 95 | for i < n && a[i] == b[i] { 96 | i++ 97 | } 98 | return i 99 | } 100 | -------------------------------------------------------------------------------- /db/comparer_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2011 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package db 6 | 7 | import ( 8 | "testing" 9 | ) 10 | 11 | func TestDefCmp(t *testing.T) { 12 | testCases := []struct { 13 | a, b, want string 14 | }{ 15 | // Examples from the doc comments. 16 | {"black", "blue", "blb"}, 17 | {"green", "", "h"}, 18 | // Non-empty b values. The C++ Level-DB code calls these separators. 19 | {"", "2", ""}, 20 | {"1", "2", "1"}, 21 | {"1", "29", "1"}, 22 | {"13", "19", "14"}, 23 | {"13", "99", "2"}, 24 | {"135", "19", "14"}, 25 | {"1357", "19", "14"}, 26 | {"1357", "2", "1357"}, 27 | {"13\xff", "14", "13\xff"}, 28 | {"13\xff", "19", "14"}, 29 | {"1\xff\xff", "19", "1\xff\xff"}, 30 | {"1\xff\xff", "2", "1\xff\xff"}, 31 | {"1\xff\xff", "9", "2"}, 32 | // Empty b values. The C++ Level-DB code calls these successors. 33 | {"", "", ""}, 34 | {"1", "", "2"}, 35 | {"11", "", "2"}, 36 | {"11\xff", "", "2"}, 37 | {"1\xff", "", "2"}, 38 | {"1\xff\xff", "", "2"}, 39 | {"\xff", "", "\xff"}, 40 | {"\xff\xff", "", "\xff\xff"}, 41 | {"\xff\xff\xff", "", "\xff\xff\xff"}, 42 | } 43 | for _, tc := range testCases { 44 | const s = "pqrs" 45 | got := string(DefaultComparer.AppendSeparator([]byte(s), []byte(tc.a), []byte(tc.b))) 46 | if got != s+tc.want { 47 | t.Errorf("a, b = %q, %q: got %q, want %q", tc.a, tc.b, got, s+tc.want) 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /db/db.go: -------------------------------------------------------------------------------- 1 | // Copyright 2011 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // Package db defines the interfaces for a key/value store. 6 | // 7 | // A DB's basic operations (Get, Set, Delete) should be self-explanatory. Get 8 | // and Delete will return ErrNotFound if the requested key is not in the store. 9 | // Callers are free to ignore this error. 10 | // 11 | // A DB also allows for iterating over the key/value pairs in key order. If d 12 | // is a DB, the code below prints all key/value pairs whose keys are 'greater 13 | // than or equal to' k: 14 | // 15 | // iter := d.Find(k, readOptions) 16 | // for iter.Next() { 17 | // fmt.Printf("key=%q value=%q\n", iter.Key(), iter.Value()) 18 | // } 19 | // return iter.Close() 20 | // 21 | // Other leveldb packages provide implementations of these interfaces. The 22 | // Options struct in this package holds the optional parameters for these 23 | // implementations, including a Comparer to define a 'less than' relationship 24 | // over keys. It is always valid to pass a nil *Options, which means to use 25 | // the default parameter values. Any zero field of a non-nil *Options also 26 | // means to use the default value for that parameter. Thus, the code below 27 | // uses a custom Comparer, but the default values for every other parameter: 28 | // 29 | // db := memdb.New(&db.Options{ 30 | // Comparer: myComparer, 31 | // }) 32 | package db // import "github.com/golang/leveldb/db" 33 | 34 | import ( 35 | "errors" 36 | ) 37 | 38 | // ErrNotFound means that a get or delete call did not find the requested key. 39 | var ErrNotFound = errors.New("leveldb/db: not found") 40 | 41 | // Iterator iterates over a DB's key/value pairs in key order. 42 | // 43 | // An iterator must be closed after use, but it is not necessary to read an 44 | // iterator until exhaustion. 45 | // 46 | // An iterator is not necessarily goroutine-safe, but it is safe to use 47 | // multiple iterators concurrently, with each in a dedicated goroutine. 48 | // 49 | // It is also safe to use an iterator concurrently with modifying its 50 | // underlying DB, if that DB permits modification. However, the resultant 51 | // key/value pairs are not guaranteed to be a consistent snapshot of that DB 52 | // at a particular point in time. 53 | type Iterator interface { 54 | // Next moves the iterator to the next key/value pair. 55 | // It returns whether the iterator is exhausted. 56 | Next() bool 57 | 58 | // Key returns the key of the current key/value pair, or nil if done. 59 | // The caller should not modify the contents of the returned slice, and 60 | // its contents may change on the next call to Next. 61 | Key() []byte 62 | 63 | // Value returns the value of the current key/value pair, or nil if done. 64 | // The caller should not modify the contents of the returned slice, and 65 | // its contents may change on the next call to Next. 66 | Value() []byte 67 | 68 | // Close closes the iterator and returns any accumulated error. Exhausting 69 | // all the key/value pairs in a table is not considered to be an error. 70 | // It is valid to call Close multiple times. Other methods should not be 71 | // called after the iterator has been closed. 72 | Close() error 73 | } 74 | 75 | // DB is a key/value store. 76 | // 77 | // It is safe to call Get and Find from concurrent goroutines. It is not 78 | // necessarily safe to do so for Set and Delete. 79 | // 80 | // Some implementations may impose additional restrictions. For example: 81 | // - Set calls may need to be in increasing key order. 82 | // - a DB may be read-only or write-only. 83 | type DB interface { 84 | // Get gets the value for the given key. It returns ErrNotFound if the DB 85 | // does not contain the key. 86 | // 87 | // The caller should not modify the contents of the returned slice, but 88 | // it is safe to modify the contents of the argument after Get returns. 89 | Get(key []byte, o *ReadOptions) (value []byte, err error) 90 | 91 | // Set sets the value for the given key. It overwrites any previous value 92 | // for that key; a DB is not a multi-map. 93 | // 94 | // It is safe to modify the contents of the arguments after Set returns. 95 | Set(key, value []byte, o *WriteOptions) error 96 | 97 | // Delete deletes the value for the given key. It returns ErrNotFound if 98 | // the DB does not contain the key. 99 | // 100 | // It is safe to modify the contents of the arguments after Delete returns. 101 | Delete(key []byte, o *WriteOptions) error 102 | 103 | // Find returns an iterator positioned before the first key/value pair 104 | // whose key is 'greater than or equal to' the given key. There may be no 105 | // such pair, in which case the iterator will return false on Next. 106 | // 107 | // Any error encountered will be implicitly returned via the iterator. An 108 | // error-iterator will yield no key/value pairs and closing that iterator 109 | // will return that error. 110 | // 111 | // It is safe to modify the contents of the argument after Find returns. 112 | Find(key []byte, o *ReadOptions) Iterator 113 | 114 | // Close closes the DB. It may or may not close any underlying io.Reader 115 | // or io.Writer, depending on how the DB was created. 116 | // 117 | // It is not safe to close a DB until all outstanding iterators are closed. 118 | // It is valid to call Close multiple times. Other methods should not be 119 | // called after the DB has been closed. 120 | Close() error 121 | } 122 | 123 | // NewConcatenatingIterator returns an iterator that concatenates its input. 124 | // Walking the resultant iterator will walk each input iterator in turn, 125 | // exhausting each input before moving on to the next. 126 | // 127 | // The sequence of the combined inputs' keys are assumed to be in strictly 128 | // increasing order: iters[i]'s last key is less than iters[i+1]'s first key. 129 | // 130 | // None of the iters may be nil. 131 | func NewConcatenatingIterator(iters ...Iterator) Iterator { 132 | if len(iters) == 1 { 133 | return iters[0] 134 | } 135 | return &concatenatingIter{ 136 | iters: iters, 137 | } 138 | } 139 | 140 | type concatenatingIter struct { 141 | iters []Iterator 142 | err error 143 | } 144 | 145 | func (c *concatenatingIter) Next() bool { 146 | if c.err != nil { 147 | return false 148 | } 149 | for len(c.iters) > 0 { 150 | if c.iters[0].Next() { 151 | return true 152 | } 153 | c.err = c.iters[0].Close() 154 | if c.err != nil { 155 | return false 156 | } 157 | c.iters = c.iters[1:] 158 | } 159 | return false 160 | } 161 | 162 | func (c *concatenatingIter) Key() []byte { 163 | if len(c.iters) == 0 || c.err != nil { 164 | return nil 165 | } 166 | return c.iters[0].Key() 167 | } 168 | 169 | func (c *concatenatingIter) Value() []byte { 170 | if len(c.iters) == 0 || c.err != nil { 171 | return nil 172 | } 173 | return c.iters[0].Value() 174 | } 175 | 176 | func (c *concatenatingIter) Close() error { 177 | for _, t := range c.iters { 178 | err := t.Close() 179 | if c.err == nil { 180 | c.err = err 181 | } 182 | } 183 | c.iters = nil 184 | return c.err 185 | } 186 | 187 | // NewMergingIterator returns an iterator that merges its input. Walking the 188 | // resultant iterator will return all key/value pairs of all input iterators 189 | // in strictly increasing key order, as defined by cmp. 190 | // 191 | // The input's key ranges may overlap, but there are assumed to be no duplicate 192 | // keys: if iters[i] contains a key k then iters[j] will not contain that key k. 193 | // 194 | // None of the iters may be nil. 195 | func NewMergingIterator(cmp Comparer, iters ...Iterator) Iterator { 196 | if len(iters) == 1 { 197 | return iters[0] 198 | } 199 | return &mergingIter{ 200 | iters: iters, 201 | cmp: cmp, 202 | keys: make([][]byte, len(iters)), 203 | index: -1, 204 | } 205 | } 206 | 207 | type mergingIter struct { 208 | // iters are the input iterators. An element is set to nil when that 209 | // input iterator is done. 210 | iters []Iterator 211 | err error 212 | cmp Comparer 213 | // keys[i] is the current key for iters[i]. 214 | keys [][]byte 215 | // index is: 216 | // - -2 if the mergingIter is done, 217 | // - -1 if the mergingIter has not yet started, 218 | // - otherwise, the index (in iters and in keys) of the smallest key. 219 | index int 220 | } 221 | 222 | // close records that the i'th input iterator is done. 223 | func (m *mergingIter) close(i int) error { 224 | t := m.iters[i] 225 | if t == nil { 226 | return nil 227 | } 228 | err := t.Close() 229 | if m.err == nil { 230 | m.err = err 231 | } 232 | m.iters[i] = nil 233 | m.keys[i] = nil 234 | return err 235 | } 236 | 237 | func (m *mergingIter) Next() bool { 238 | if m.err != nil { 239 | return false 240 | } 241 | switch m.index { 242 | case -2: 243 | return false 244 | case -1: 245 | for i, t := range m.iters { 246 | if t.Next() { 247 | m.keys[i] = t.Key() 248 | } else if m.close(i) != nil { 249 | return false 250 | } 251 | } 252 | default: 253 | t := m.iters[m.index] 254 | if t.Next() { 255 | m.keys[m.index] = t.Key() 256 | } else if m.close(m.index) != nil { 257 | return false 258 | } 259 | } 260 | // Find the smallest key. We could maintain a heap instead of doing 261 | // a linear scan, but len(iters) is typically small. 262 | m.index = -2 263 | for i, t := range m.iters { 264 | if t == nil { 265 | continue 266 | } 267 | if m.index < 0 { 268 | m.index = i 269 | continue 270 | } 271 | if m.cmp.Compare(m.keys[i], m.keys[m.index]) < 0 { 272 | m.index = i 273 | } 274 | } 275 | return m.index >= 0 276 | } 277 | 278 | func (m *mergingIter) Key() []byte { 279 | if m.index < 0 || m.err != nil { 280 | return nil 281 | } 282 | return m.keys[m.index] 283 | } 284 | 285 | func (m *mergingIter) Value() []byte { 286 | if m.index < 0 || m.err != nil { 287 | return nil 288 | } 289 | return m.iters[m.index].Value() 290 | } 291 | 292 | func (m *mergingIter) Close() error { 293 | for i := range m.iters { 294 | m.close(i) 295 | } 296 | m.index = -2 297 | return m.err 298 | } 299 | -------------------------------------------------------------------------------- /db/db_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package db 6 | 7 | import ( 8 | "bytes" 9 | "errors" 10 | "fmt" 11 | "math/rand" 12 | "strings" 13 | "testing" 14 | ) 15 | 16 | var testKeyValuePairs = []string{ 17 | "10:ten", 18 | "11:eleven", 19 | "12:twelve", 20 | "13:thirteen", 21 | "14:fourteen", 22 | "15:fifteen", 23 | "16:sixteen", 24 | "17:seventeen", 25 | "18:eighteen", 26 | "19:nineteen", 27 | } 28 | 29 | type fakeIter struct { 30 | kvPairs []string 31 | index int 32 | closeErr error 33 | } 34 | 35 | func newFakeIterator(closeErr error, kvPairs ...string) *fakeIter { 36 | for _, kv := range kvPairs { 37 | if !strings.Contains(kv, ":") { 38 | panic(fmt.Sprintf(`key-value pair %q does not contain ":"`, kv)) 39 | } 40 | } 41 | return &fakeIter{ 42 | kvPairs: kvPairs, 43 | index: -1, 44 | closeErr: closeErr, 45 | } 46 | } 47 | 48 | func (f *fakeIter) Next() bool { 49 | f.index++ 50 | return f.index < len(f.kvPairs) 51 | } 52 | 53 | func (f *fakeIter) Key() []byte { 54 | kv := f.kvPairs[f.index] 55 | i := strings.Index(kv, ":") 56 | return []byte(kv[:i]) 57 | } 58 | 59 | func (f *fakeIter) Value() []byte { 60 | kv := f.kvPairs[f.index] 61 | i := strings.Index(kv, ":") 62 | return []byte(kv[i+1:]) 63 | } 64 | 65 | func (f *fakeIter) Close() error { 66 | return f.closeErr 67 | } 68 | 69 | // testIterator tests creating a combined iterator from a number of sub- 70 | // iterators. newFunc is a constructor function. splitFunc returns a random 71 | // split of the testKeyValuePairs slice such that walking a combined iterator 72 | // over those splits should recover the original key/value pairs in order. 73 | func testIterator(t *testing.T, newFunc func(...Iterator) Iterator, splitFunc func(r *rand.Rand) [][]string) { 74 | // Test pre-determined sub-iterators. The sub-iterators are designed 75 | // so that the combined key/value pair order is the same whether the 76 | // combined iterator is concatenating or merging. 77 | testCases := []struct { 78 | desc string 79 | iters []Iterator 80 | want string 81 | }{ 82 | { 83 | "one sub-iterator", 84 | []Iterator{ 85 | newFakeIterator(nil, "e:east", "w:west"), 86 | }, 87 | ".", 88 | }, 89 | { 90 | "two sub-iterators", 91 | []Iterator{ 92 | newFakeIterator(nil, "a0:0"), 93 | newFakeIterator(nil, "b1:1", "b2:2"), 94 | }, 95 | ".", 96 | }, 97 | { 98 | "empty sub-iterators", 99 | []Iterator{ 100 | newFakeIterator(nil), 101 | newFakeIterator(nil), 102 | newFakeIterator(nil), 103 | }, 104 | ".", 105 | }, 106 | { 107 | "sub-iterator errors", 108 | []Iterator{ 109 | newFakeIterator(nil, "a0:0", "a1:1"), 110 | newFakeIterator(errors.New("the sky is falling!"), "b2:2", "b3:3", "b4:4"), 111 | newFakeIterator(errors.New("run for your lives!"), "c5:5", "c6:6"), 112 | }, 113 | "err=the sky is falling!", 114 | }, 115 | } 116 | for _, tc := range testCases { 117 | var b bytes.Buffer 118 | iter := newFunc(tc.iters...) 119 | for iter.Next() { 120 | fmt.Fprintf(&b, "<%s:%s>", iter.Key(), iter.Value()) 121 | } 122 | if err := iter.Close(); err != nil { 123 | fmt.Fprintf(&b, "err=%v", err) 124 | } else { 125 | b.WriteByte('.') 126 | } 127 | if got := b.String(); got != tc.want { 128 | t.Errorf("%s:\ngot %q\nwant %q", tc.desc, got, tc.want) 129 | } 130 | } 131 | 132 | // Test randomly generated sub-iterators. 133 | r := rand.New(rand.NewSource(0)) 134 | for i, nBad := 0, 0; i < 1000; i++ { 135 | bad := false 136 | 137 | splits := splitFunc(r) 138 | iters := make([]Iterator, len(splits)) 139 | for i, split := range splits { 140 | iters[i] = newFakeIterator(nil, split...) 141 | } 142 | iter := newFunc(iters...) 143 | 144 | j := 0 145 | for ; iter.Next() && j < len(testKeyValuePairs); j++ { 146 | got := string(iter.Key()) + ":" + string(iter.Value()) 147 | want := testKeyValuePairs[j] 148 | if got != want { 149 | bad = true 150 | t.Errorf("random splits: i=%d, j=%d: got %q, want %q", i, j, got, want) 151 | } 152 | } 153 | if iter.Next() { 154 | bad = true 155 | t.Errorf("random splits: i=%d, j=%d: iter was not exhausted", i, j) 156 | } 157 | if j != len(testKeyValuePairs) { 158 | bad = true 159 | t.Errorf("random splits: i=%d, j=%d: want j=%d", i, j, len(testKeyValuePairs)) 160 | } 161 | if err := iter.Close(); err != nil { 162 | bad = true 163 | t.Errorf("random splits: i=%d, j=%d: %v", i, j, err) 164 | } 165 | 166 | if bad { 167 | nBad++ 168 | if nBad == 10 { 169 | t.Fatal("random splits: too many errors; stopping") 170 | } 171 | } 172 | } 173 | } 174 | 175 | func TestConcatenatingIterator(t *testing.T) { 176 | testIterator(t, NewConcatenatingIterator, func(r *rand.Rand) [][]string { 177 | // Partition testKeyValuePairs into one or more splits. Each individual 178 | // split is in increasing order, and different splits may not overlap 179 | // in range. Some of the splits may be empty. 180 | splits, remainder := [][]string{}, testKeyValuePairs 181 | for r.Intn(4) != 0 { 182 | i := r.Intn(1 + len(remainder)) 183 | splits = append(splits, remainder[:i]) 184 | remainder = remainder[i:] 185 | } 186 | if len(remainder) > 0 { 187 | splits = append(splits, remainder) 188 | } 189 | return splits 190 | }) 191 | } 192 | 193 | func TestMergingIterator(t *testing.T) { 194 | newFunc := func(iters ...Iterator) Iterator { 195 | return NewMergingIterator(DefaultComparer, iters...) 196 | } 197 | testIterator(t, newFunc, func(r *rand.Rand) [][]string { 198 | // Shuffle testKeyValuePairs into one or more splits. Each individual 199 | // split is in increasing order, but different splits may overlap in 200 | // range. Some of the splits may be empty. 201 | splits := make([][]string, 1+r.Intn(2+len(testKeyValuePairs))) 202 | for _, kv := range testKeyValuePairs { 203 | j := r.Intn(len(splits)) 204 | splits[j] = append(splits[j], kv) 205 | } 206 | return splits 207 | }) 208 | } 209 | -------------------------------------------------------------------------------- /db/file.go: -------------------------------------------------------------------------------- 1 | // Copyright 2012 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package db 6 | 7 | import ( 8 | "io" 9 | "os" 10 | ) 11 | 12 | // File is a readable, writable sequence of bytes. 13 | // 14 | // Typically, it will be an *os.File, but test code may choose to substitute 15 | // memory-backed implementations. 16 | type File interface { 17 | io.Closer 18 | io.Reader 19 | io.ReaderAt 20 | io.Writer 21 | Stat() (os.FileInfo, error) 22 | Sync() error 23 | } 24 | 25 | // FileSystem is a namespace for files. 26 | // 27 | // The names are filepath names: they may be / separated or \ separated, 28 | // depending on the underlying operating system. 29 | type FileSystem interface { 30 | // Create creates the named file for writing, truncating it if it already 31 | // exists. 32 | Create(name string) (File, error) 33 | 34 | // Open opens the named file for reading. 35 | Open(name string) (File, error) 36 | 37 | // Remove removes the named file or directory. 38 | Remove(name string) error 39 | 40 | // Rename renames a file. It overwrites the file at newname if one exists, 41 | // the same as os.Rename. 42 | Rename(oldname, newname string) error 43 | 44 | // MkdirAll creates a directory and all necessary parents. The permission 45 | // bits perm have the same semantics as in os.MkdirAll. If the directory 46 | // already exists, MkdirAll does nothing and returns nil. 47 | MkdirAll(dir string, perm os.FileMode) error 48 | 49 | // Lock locks the given file, creating the file if necessary, and 50 | // truncating the file if it already exists. The lock is an exclusive lock 51 | // (a write lock), but locked files should neither be read from nor written 52 | // to. Such files should have zero size and only exist to co-ordinate 53 | // ownership across processes. 54 | // 55 | // A nil Closer is returned if an error occurred. Otherwise, close that 56 | // Closer to release the lock. 57 | // 58 | // On Linux and OSX, a lock has the same semantics as fcntl(2)'s advisory 59 | // locks. In particular, closing any other file descriptor for the same 60 | // file will release the lock prematurely. 61 | // 62 | // Attempting to lock a file that is already locked by the current process 63 | // has undefined behavior. 64 | // 65 | // Lock is not yet implemented on other operating systems, and calling it 66 | // will return an error. 67 | Lock(name string) (io.Closer, error) 68 | 69 | // List returns a listing of the given directory. The names returned are 70 | // relative to dir. 71 | List(dir string) ([]string, error) 72 | 73 | // Stat returns an os.FileInfo describing the named file. 74 | Stat(name string) (os.FileInfo, error) 75 | } 76 | 77 | // DefaultFileSystem is a FileSystem implementation backed by the underlying 78 | // operating system's file system. 79 | var DefaultFileSystem FileSystem = defFS{} 80 | 81 | type defFS struct{} 82 | 83 | func (defFS) Create(name string) (File, error) { 84 | return os.Create(name) 85 | } 86 | 87 | func (defFS) Open(name string) (File, error) { 88 | return os.Open(name) 89 | } 90 | 91 | func (defFS) Remove(name string) error { 92 | return os.Remove(name) 93 | } 94 | 95 | func (defFS) Rename(oldname, newname string) error { 96 | return os.Rename(oldname, newname) 97 | } 98 | 99 | func (defFS) MkdirAll(dir string, perm os.FileMode) error { 100 | return os.MkdirAll(dir, perm) 101 | } 102 | 103 | func (defFS) List(dir string) ([]string, error) { 104 | f, err := os.Open(dir) 105 | if err != nil { 106 | return nil, err 107 | } 108 | defer f.Close() 109 | return f.Readdirnames(-1) 110 | } 111 | 112 | func (defFS) Stat(name string) (os.FileInfo, error) { 113 | return os.Stat(name) 114 | } 115 | -------------------------------------------------------------------------------- /db/file_lock_generic.go: -------------------------------------------------------------------------------- 1 | // Copyright 2012 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // +build !darwin,!dragonfly,!freebsd,!linux,!netbsd,!openbsd,!solaris,!windows 6 | 7 | package db 8 | 9 | import ( 10 | "fmt" 11 | "io" 12 | "runtime" 13 | ) 14 | 15 | func (defFS) Lock(name string) (io.Closer, error) { 16 | return nil, fmt.Errorf("leveldb/db: file locking is not implemented on %s/%s", runtime.GOOS, runtime.GOARCH) 17 | } 18 | -------------------------------------------------------------------------------- /db/file_lock_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | package db_test 5 | 6 | import ( 7 | "bytes" 8 | "flag" 9 | "io/ioutil" 10 | "os" 11 | "os/exec" 12 | "testing" 13 | 14 | "github.com/golang/leveldb/db" 15 | ) 16 | 17 | var lockFilename = flag.String("lockfile", "", "File to lock. A non-empty value implies a child process.") 18 | 19 | func spawn(prog, filename string) ([]byte, error) { 20 | return exec.Command(prog, "-lockfile", filename, "-test.v", 21 | "-test.run=TestLock$").CombinedOutput() 22 | } 23 | 24 | // TestLock locks a file, spawns a second process that attempts to grab the 25 | // lock to verify it fails. 26 | // Then it closes the lock, and spawns a third copy to verify it can be 27 | // relocked. 28 | func TestLock(t *testing.T) { 29 | child := *lockFilename != "" 30 | var filename string 31 | if child { 32 | filename = *lockFilename 33 | } else { 34 | f, err := ioutil.TempFile("", "golang-leveldb-db-testlock-") 35 | if err != nil { 36 | t.Fatal(err) 37 | } 38 | filename = f.Name() 39 | defer os.Remove(filename) 40 | } 41 | 42 | // Avoid truncating an existing, non-empty file. 43 | fi, err := os.Stat(filename) 44 | if err == nil && fi.Size() != 0 { 45 | t.Fatalf("The file %s is not empty", filename) 46 | } 47 | 48 | t.Logf("Locking %s\n", filename) 49 | lock, err := db.DefaultFileSystem.Lock(filename) 50 | if err != nil { 51 | t.Fatalf("Could not lock %s: %v", filename, err) 52 | } 53 | 54 | if !child { 55 | t.Logf("Spawning child, should fail to grab lock.") 56 | out, err := spawn(os.Args[0], filename) 57 | if err == nil { 58 | t.Fatalf("Attempt to grab open lock should have failed.\n%s", out) 59 | } 60 | if !bytes.Contains(out, []byte("Could not lock")) { 61 | t.Fatalf("Child failed with unexpected output: %s\n", out) 62 | } 63 | t.Logf("Child failed to grab lock as expected.") 64 | } 65 | 66 | t.Logf("Unlocking %s", filename) 67 | if err := lock.Close(); err != nil { 68 | t.Fatalf("Could not unlock %s: %v", filename, err) 69 | } 70 | 71 | if !child { 72 | t.Logf("Spawning child, should successfully grab lock.") 73 | if out, err := spawn(os.Args[0], filename); err != nil { 74 | t.Fatalf("Attempt to re-open lock should have succeeded: %v\n%s", 75 | err, out) 76 | } 77 | t.Logf("Child grabbed lock.") 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /db/file_lock_unix.go: -------------------------------------------------------------------------------- 1 | // Copyright 2014 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // +build darwin dragonfly freebsd linux netbsd openbsd solaris 6 | 7 | package db 8 | 9 | import ( 10 | "io" 11 | "os" 12 | "syscall" 13 | ) 14 | 15 | // lockCloser hides all of an os.File's methods, except for Close. 16 | type lockCloser struct { 17 | f *os.File 18 | } 19 | 20 | func (l lockCloser) Close() error { 21 | return l.f.Close() 22 | } 23 | 24 | func (defFS) Lock(name string) (io.Closer, error) { 25 | f, err := os.Create(name) 26 | if err != nil { 27 | return nil, err 28 | } 29 | spec := syscall.Flock_t{ 30 | Type: syscall.F_WRLCK, 31 | Whence: int16(os.SEEK_SET), 32 | Start: 0, 33 | Len: 0, // 0 means to lock the entire file. 34 | Pid: int32(os.Getpid()), 35 | } 36 | if err := syscall.FcntlFlock(f.Fd(), syscall.F_SETLK, &spec); err != nil { 37 | f.Close() 38 | return nil, err 39 | } 40 | 41 | return lockCloser{f}, nil 42 | } 43 | -------------------------------------------------------------------------------- /db/file_lock_windows.go: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package db 6 | 7 | import ( 8 | "io" 9 | "syscall" 10 | ) 11 | 12 | // lockCloser hides all of an syscall.Handle's methods, except for Close. 13 | type lockCloser struct { 14 | fd syscall.Handle 15 | } 16 | 17 | func (l lockCloser) Close() error { 18 | return syscall.Close(l.fd) 19 | } 20 | 21 | func (defFS) Lock(name string) (io.Closer, error) { 22 | p, err := syscall.UTF16PtrFromString(name) 23 | if err != nil { 24 | return nil, err 25 | } 26 | fd, err := syscall.CreateFile(p, 27 | syscall.GENERIC_READ|syscall.GENERIC_WRITE, 28 | 0, nil, syscall.CREATE_ALWAYS, 29 | syscall.FILE_ATTRIBUTE_NORMAL, 30 | 0, 31 | ) 32 | if err != nil { 33 | return nil, err 34 | } 35 | return lockCloser{fd: fd}, nil 36 | } 37 | -------------------------------------------------------------------------------- /db/options.go: -------------------------------------------------------------------------------- 1 | // Copyright 2011 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package db 6 | 7 | // Compression is the per-block compression algorithm to use. 8 | type Compression int 9 | 10 | const ( 11 | DefaultCompression Compression = iota 12 | NoCompression 13 | SnappyCompression 14 | nCompression 15 | ) 16 | 17 | // FilterPolicy is an algorithm for probabilistically encoding a set of keys. 18 | // The canonical implementation is a Bloom filter. 19 | // 20 | // Every FilterPolicy has a name. This names the algorithm itself, not any one 21 | // particular instance. Aspects specific to a particular instance, such as the 22 | // set of keys or any other parameters, will be encoded in the []byte filter 23 | // returned by NewFilter. 24 | // 25 | // The name may be written to files on disk, along with the filter data. To use 26 | // these filters, the FilterPolicy name at the time of writing must equal the 27 | // name at the time of reading. If they do not match, the filters will be 28 | // ignored, which will not affect correctness but may affect performance. 29 | type FilterPolicy interface { 30 | // Name names the filter policy. 31 | Name() string 32 | 33 | // AppendFilter appends to dst an encoded filter that holds a set of []byte 34 | // keys. 35 | AppendFilter(dst []byte, keys [][]byte) []byte 36 | 37 | // MayContain returns whether the encoded filter may contain given key. 38 | // False positives are possible, where it returns true for keys not in the 39 | // original set. 40 | MayContain(filter, key []byte) bool 41 | } 42 | 43 | // Options holds the optional parameters for leveldb's DB implementations. 44 | // These options apply to the DB at large; per-query options are defined by 45 | // the ReadOptions and WriteOptions types. 46 | // 47 | // Options are typically passed to a constructor function as a struct literal. 48 | // The GetXxx methods are used inside the DB implementations; they return the 49 | // default parameter value if the *Options receiver is nil or the field value 50 | // is zero. 51 | // 52 | // Read/Write options: 53 | // - Comparer 54 | // - FileSystem 55 | // - FilterPolicy 56 | // - MaxOpenFiles 57 | // Read options: 58 | // - VerifyChecksums 59 | // Write options: 60 | // - BlockRestartInterval 61 | // - BlockSize 62 | // - Compression 63 | // - ErrorIfDBExists 64 | // - WriteBufferSize 65 | type Options struct { 66 | // BlockRestartInterval is the number of keys between restart points 67 | // for delta encoding of keys. 68 | // 69 | // The default value is 16. 70 | BlockRestartInterval int 71 | 72 | // BlockSize is the minimum uncompressed size in bytes of each table block. 73 | // 74 | // The default value is 4096. 75 | BlockSize int 76 | 77 | // Comparer defines a total ordering over the space of []byte keys: a 'less 78 | // than' relationship. The same comparison algorithm must be used for reads 79 | // and writes over the lifetime of the DB. 80 | // 81 | // The default value uses the same ordering as bytes.Compare. 82 | Comparer Comparer 83 | 84 | // Compression defines the per-block compression to use. 85 | // 86 | // The default value (DefaultCompression) uses snappy compression. 87 | Compression Compression 88 | 89 | // ErrorIfDBExists is whether it is an error if the database already exists. 90 | // 91 | // The default value is false. 92 | ErrorIfDBExists bool 93 | 94 | // FileSystem maps file names to byte storage. 95 | // 96 | // The default value uses the underlying operating system's file system. 97 | FileSystem FileSystem 98 | 99 | // FilterPolicy defines a filter algorithm (such as a Bloom filter) that 100 | // can reduce disk reads for Get calls. 101 | // 102 | // One such implementation is bloom.FilterPolicy(10) from the leveldb/bloom 103 | // package. 104 | // 105 | // The default value means to use no filter. 106 | FilterPolicy FilterPolicy 107 | 108 | // MaxOpenFiles is a soft limit on the number of open files that can be 109 | // used by the DB. 110 | // 111 | // The default value is 1000. 112 | MaxOpenFiles int 113 | 114 | // WriteBufferSize is the amount of data to build up in memory (backed by 115 | // an unsorted log on disk) before converting to a sorted on-disk file. 116 | // 117 | // Larger values increase performance, especially during bulk loads. Up to 118 | // two write buffers may be held in memory at the same time, so you may 119 | // wish to adjust this parameter to control memory usage. Also, a larger 120 | // write buffer will result in a longer recovery time the next time the 121 | // database is opened. 122 | // 123 | // The default value is 4MiB. 124 | WriteBufferSize int 125 | 126 | // VerifyChecksums is whether to verify the per-block checksums in a DB. 127 | // 128 | // The default value is false. 129 | VerifyChecksums bool 130 | } 131 | 132 | func (o *Options) GetBlockRestartInterval() int { 133 | if o == nil || o.BlockRestartInterval <= 0 { 134 | return 16 135 | } 136 | return o.BlockRestartInterval 137 | } 138 | 139 | func (o *Options) GetBlockSize() int { 140 | if o == nil || o.BlockSize <= 0 { 141 | return 4096 142 | } 143 | return o.BlockSize 144 | } 145 | 146 | func (o *Options) GetComparer() Comparer { 147 | if o == nil || o.Comparer == nil { 148 | return DefaultComparer 149 | } 150 | return o.Comparer 151 | } 152 | 153 | func (o *Options) GetCompression() Compression { 154 | if o == nil || o.Compression <= DefaultCompression || o.Compression >= nCompression { 155 | // Default to SnappyCompression. 156 | return SnappyCompression 157 | } 158 | return o.Compression 159 | } 160 | 161 | func (o *Options) GetErrorIfDBExists() bool { 162 | if o == nil { 163 | return false 164 | } 165 | return o.ErrorIfDBExists 166 | } 167 | 168 | func (o *Options) GetFileSystem() FileSystem { 169 | if o == nil || o.FileSystem == nil { 170 | return DefaultFileSystem 171 | } 172 | return o.FileSystem 173 | } 174 | 175 | func (o *Options) GetFilterPolicy() FilterPolicy { 176 | if o == nil { 177 | return nil 178 | } 179 | return o.FilterPolicy 180 | } 181 | 182 | func (o *Options) GetMaxOpenFiles() int { 183 | if o == nil || o.MaxOpenFiles == 0 { 184 | return 1000 185 | } 186 | return o.MaxOpenFiles 187 | } 188 | 189 | func (o *Options) GetWriteBufferSize() int { 190 | if o == nil || o.WriteBufferSize <= 0 { 191 | return 4 * 1024 * 1024 192 | } 193 | return o.WriteBufferSize 194 | } 195 | 196 | func (o *Options) GetVerifyChecksums() bool { 197 | if o == nil { 198 | return false 199 | } 200 | return o.VerifyChecksums 201 | } 202 | 203 | // ReadOptions hold the optional per-query parameters for Get and Find 204 | // operations. 205 | // 206 | // Like Options, a nil *ReadOptions is valid and means to use the default 207 | // values. 208 | type ReadOptions struct { 209 | // No fields so far. 210 | } 211 | 212 | // WriteOptions hold the optional per-query parameters for Set and Delete 213 | // operations. 214 | // 215 | // Like Options, a nil *WriteOptions is valid and means to use the default 216 | // values. 217 | type WriteOptions struct { 218 | // Sync is whether to sync underlying writes from the OS buffer cache 219 | // through to actual disk, if applicable. Setting Sync can result in 220 | // slower writes. 221 | // 222 | // If false, and the machine crashes, then some recent writes may be lost. 223 | // Note that if it is just the process that crashes (and the machine does 224 | // not) then no writes will be lost. 225 | // 226 | // In other words, Sync being false has the same semantics as a write 227 | // system call. Sync being true means write followed by fsync. 228 | // 229 | // The default value is false. 230 | Sync bool 231 | } 232 | 233 | func (o *WriteOptions) GetSync() bool { 234 | return o != nil && o.Sync 235 | } 236 | -------------------------------------------------------------------------------- /filenames.go: -------------------------------------------------------------------------------- 1 | // Copyright 2012 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package leveldb 6 | 7 | import ( 8 | "fmt" 9 | "os" 10 | "path/filepath" 11 | "strconv" 12 | "strings" 13 | 14 | "github.com/golang/leveldb/db" 15 | ) 16 | 17 | type fileType int 18 | 19 | const ( 20 | fileTypeLog fileType = iota 21 | fileTypeLock 22 | fileTypeTable 23 | fileTypeOldFashionedTable 24 | fileTypeManifest 25 | fileTypeCurrent 26 | ) 27 | 28 | func dbFilename(dirname string, fileType fileType, fileNum uint64) string { 29 | for len(dirname) > 0 && dirname[len(dirname)-1] == os.PathSeparator { 30 | dirname = dirname[:len(dirname)-1] 31 | } 32 | switch fileType { 33 | case fileTypeLog: 34 | return fmt.Sprintf("%s%c%06d.log", dirname, os.PathSeparator, fileNum) 35 | case fileTypeLock: 36 | return fmt.Sprintf("%s%cLOCK", dirname, os.PathSeparator) 37 | case fileTypeTable: 38 | return fmt.Sprintf("%s%c%06d.ldb", dirname, os.PathSeparator, fileNum) 39 | case fileTypeOldFashionedTable: 40 | return fmt.Sprintf("%s%c%06d.sst", dirname, os.PathSeparator, fileNum) 41 | case fileTypeManifest: 42 | return fmt.Sprintf("%s%cMANIFEST-%06d", dirname, os.PathSeparator, fileNum) 43 | case fileTypeCurrent: 44 | return fmt.Sprintf("%s%cCURRENT", dirname, os.PathSeparator) 45 | } 46 | panic("unreachable") 47 | } 48 | 49 | func parseDBFilename(filename string) (fileType fileType, fileNum uint64, ok bool) { 50 | filename = filepath.Base(filename) 51 | switch { 52 | case filename == "CURRENT": 53 | return fileTypeCurrent, 0, true 54 | case filename == "LOCK": 55 | return fileTypeLock, 0, true 56 | case strings.HasPrefix(filename, "MANIFEST-"): 57 | u, err := strconv.ParseUint(filename[len("MANIFEST-"):], 10, 64) 58 | if err != nil { 59 | break 60 | } 61 | return fileTypeManifest, u, true 62 | default: 63 | i := strings.IndexByte(filename, '.') 64 | if i < 0 { 65 | break 66 | } 67 | u, err := strconv.ParseUint(filename[:i], 10, 64) 68 | if err != nil { 69 | break 70 | } 71 | switch filename[i+1:] { 72 | case "ldb": 73 | return fileTypeTable, u, true 74 | case "log": 75 | return fileTypeLog, u, true 76 | case "sst": 77 | return fileTypeOldFashionedTable, u, true 78 | } 79 | } 80 | return 0, 0, false 81 | } 82 | 83 | func setCurrentFile(dirname string, fs db.FileSystem, fileNum uint64) error { 84 | newFilename := dbFilename(dirname, fileTypeCurrent, fileNum) 85 | oldFilename := fmt.Sprintf("%s.%06d.dbtmp", newFilename, fileNum) 86 | fs.Remove(oldFilename) 87 | f, err := fs.Create(oldFilename) 88 | if err != nil { 89 | return err 90 | } 91 | if _, err := fmt.Fprintf(f, "MANIFEST-%06d\n", fileNum); err != nil { 92 | return err 93 | } 94 | if err := f.Close(); err != nil { 95 | return err 96 | } 97 | return fs.Rename(oldFilename, newFilename) 98 | } 99 | -------------------------------------------------------------------------------- /filenames_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package leveldb 6 | 7 | import ( 8 | "path/filepath" 9 | "testing" 10 | ) 11 | 12 | func TestParseDBFilename(t *testing.T) { 13 | testCases := map[string]bool{ 14 | "000000.log": true, 15 | "000000.log.zip": false, 16 | "000000..log": false, 17 | "a000000.log": false, 18 | "abcdef.log": false, 19 | "000001ldb": false, 20 | "000001.ldb": true, 21 | "000002.sst": true, 22 | "CURRENT": true, 23 | "CURRaNT": false, 24 | "LOCK": true, 25 | "xLOCK": false, 26 | "x.LOCK": false, 27 | "MANIFEST": false, 28 | "MANIFEST123456": false, 29 | "MANIFEST-": false, 30 | "MANIFEST-123456": true, 31 | "MANIFEST-123456.doc": false, 32 | } 33 | for tc, want := range testCases { 34 | _, _, got := parseDBFilename(filepath.Join("foo", tc)) 35 | if got != want { 36 | t.Errorf("%q: got %v, want %v", tc, got, want) 37 | } 38 | } 39 | } 40 | 41 | func TestFilenameRoundTrip(t *testing.T) { 42 | testCases := map[fileType]bool{ 43 | // CURRENT and LOCK files aren't numbered. 44 | fileTypeCurrent: false, 45 | fileTypeLock: false, 46 | // The remaining file types are numbered. 47 | fileTypeLog: true, 48 | fileTypeManifest: true, 49 | fileTypeOldFashionedTable: true, 50 | fileTypeTable: true, 51 | } 52 | for fileType, numbered := range testCases { 53 | fileNums := []uint64{0} 54 | if numbered { 55 | fileNums = []uint64{0, 1, 2, 3, 10, 42, 99, 1001} 56 | } 57 | for _, fileNum := range fileNums { 58 | filename := dbFilename("foo", fileType, fileNum) 59 | gotFT, gotFN, gotOK := parseDBFilename(filename) 60 | if !gotOK { 61 | t.Errorf("could not parse %q", filename) 62 | continue 63 | } 64 | if gotFT != fileType || gotFN != fileNum { 65 | t.Errorf("filename=%q: got %v, %v, want %v, %v", filename, gotFT, gotFN, fileType, fileNum) 66 | continue 67 | } 68 | } 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /ikey.go: -------------------------------------------------------------------------------- 1 | // Copyright 2012 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package leveldb 6 | 7 | import ( 8 | "bytes" 9 | 10 | "github.com/golang/leveldb/db" 11 | ) 12 | 13 | // internalKey is a key used for the in-memory and on-disk partial DBs that 14 | // make up a leveldb DB. 15 | // 16 | // It consists of the user key (as given by the arbitrary code that uses 17 | // package leveldb) followed by an 8-byte trailer: 18 | // - 1 byte for the kind of internal key: delete or set, 19 | // - 7 bytes for a uint56 sequence number, in little-endian format. 20 | type internalKey []byte 21 | 22 | type internalKeyKind uint8 23 | 24 | const ( 25 | // These constants are part of the file format, and should not be changed. 26 | internalKeyKindDelete internalKeyKind = 0 27 | internalKeyKindSet internalKeyKind = 1 28 | 29 | // This maximum value isn't part of the file format. It's unlikely, 30 | // but future extensions may increase this value. 31 | // 32 | // When constructing an internal key to pass to DB.Find, internalKeyComparer 33 | // sorts decreasing by kind (after sorting increasing by user key and 34 | // decreasing by sequence number). Thus, use internalKeyKindMax, which sorts 35 | // 'less than or equal to' any other valid internalKeyKind, when searching 36 | // for any kind of internal key formed by a certain user key and seqNum. 37 | internalKeyKindMax internalKeyKind = 1 38 | ) 39 | 40 | // internalKeySeqNumMax is the largest valid sequence number. 41 | const internalKeySeqNumMax = uint64(1<<56 - 1) 42 | 43 | // makeInternalKey makes an internalKey from a user key, a kind, and a sequence 44 | // number. The return value may be a slice of dst[:cap(dst)] if it is large 45 | // enough. Otherwise, it may be a slice of a newly allocated buffer. In any 46 | // case, all of dst[:cap(dst)] may be overwritten. 47 | func makeInternalKey(dst internalKey, ukey []byte, kind internalKeyKind, seqNum uint64) internalKey { 48 | if cap(dst) < len(ukey)+8 { 49 | n := 256 50 | for n < len(ukey)+8 { 51 | n *= 2 52 | } 53 | dst = make(internalKey, n) 54 | } 55 | ikey := dst[:len(ukey)+8] 56 | i := copy(ikey, ukey) 57 | ikey[i+0] = uint8(kind) 58 | ikey[i+1] = uint8(seqNum) 59 | ikey[i+2] = uint8(seqNum >> 8) 60 | ikey[i+3] = uint8(seqNum >> 16) 61 | ikey[i+4] = uint8(seqNum >> 24) 62 | ikey[i+5] = uint8(seqNum >> 32) 63 | ikey[i+6] = uint8(seqNum >> 40) 64 | ikey[i+7] = uint8(seqNum >> 48) 65 | return ikey 66 | } 67 | 68 | // valid returns whether k is a valid internal key. 69 | func (k internalKey) valid() bool { 70 | i := len(k) - 8 71 | return i >= 0 && internalKeyKind(k[i]) <= internalKeyKindMax 72 | } 73 | 74 | // ukey returns the user key portion of an internal key. 75 | // ukey may panic if k is not valid. 76 | func (k internalKey) ukey() []byte { 77 | return []byte(k[:len(k)-8]) 78 | } 79 | 80 | // kind returns the kind of an internal key. 81 | // kind may panic if k is not valid. 82 | func (k internalKey) kind() internalKeyKind { 83 | return internalKeyKind(k[len(k)-8]) 84 | } 85 | 86 | // seqNum returns the sequence number of an internal key. 87 | // seqNum may panic if k is not valid. 88 | func (k internalKey) seqNum() uint64 { 89 | i := len(k) - 7 90 | n := uint64(k[i+0]) 91 | n |= uint64(k[i+1]) << 8 92 | n |= uint64(k[i+2]) << 16 93 | n |= uint64(k[i+3]) << 24 94 | n |= uint64(k[i+4]) << 32 95 | n |= uint64(k[i+5]) << 40 96 | n |= uint64(k[i+6]) << 48 97 | return n 98 | } 99 | 100 | // clone returns an internalKey that has the same contents but is backed by a 101 | // different array. 102 | func (k internalKey) clone() internalKey { 103 | x := make(internalKey, len(k)) 104 | copy(x, k) 105 | return x 106 | } 107 | 108 | // internalKeyComparer is a db.Comparer that wraps another db.Comparer. 109 | // 110 | // It compares internal keys first by their user keys (as ordered by userCmp), 111 | // then by sequence number (decreasing), then by kind (decreasing). The last 112 | // step is only for completeness; for a given leveldb DB, no two internal keys 113 | // should have the same sequence number. 114 | // 115 | // This ordering is designed so that when iterating through an internal table 116 | // starting at (ukey0, seqNum0), one first encounters those entries with the 117 | // same user key and lower sequence number (i.e. sets or deletes from earlier 118 | // in time), followed by those entries with 'greater' user keys (where 119 | // 'greater' is defined by userCmp). Specifically, one does not encounter 120 | // entries with the same user key and higher sequence number (i.e. sets or 121 | // deletes for ukey0 from the 'future' relative to the particular snapshot 122 | // seqNum0 of the DB). 123 | type internalKeyComparer struct { 124 | userCmp db.Comparer 125 | } 126 | 127 | var _ db.Comparer = internalKeyComparer{} 128 | 129 | func (c internalKeyComparer) Compare(a, b []byte) int { 130 | ak, bk := internalKey(a), internalKey(b) 131 | if !ak.valid() { 132 | if bk.valid() { 133 | return -1 134 | } 135 | return bytes.Compare(a, b) 136 | } 137 | if !bk.valid() { 138 | return 1 139 | } 140 | if x := c.userCmp.Compare(ak.ukey(), bk.ukey()); x != 0 { 141 | return x 142 | } 143 | if an, bn := ak.seqNum(), bk.seqNum(); an < bn { 144 | return +1 145 | } else if an > bn { 146 | return -1 147 | } 148 | if ai, bi := ak.kind(), bk.kind(); ai < bi { 149 | return +1 150 | } else if ai > bi { 151 | return -1 152 | } 153 | return 0 154 | } 155 | 156 | func (c internalKeyComparer) Name() string { 157 | // This is the same name given by the C++ leveldb's InternalKeyComparator class. 158 | return "leveldb.InternalKeyComparator" 159 | } 160 | 161 | func (c internalKeyComparer) AppendSeparator(dst, a, b []byte) []byte { 162 | // TODO: this could be more sophisticated. 163 | return append(dst, a...) 164 | } 165 | -------------------------------------------------------------------------------- /ikey_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2012 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package leveldb 6 | 7 | import ( 8 | "testing" 9 | 10 | "github.com/golang/leveldb/db" 11 | ) 12 | 13 | func TestInternalKey(t *testing.T) { 14 | k := makeInternalKey(nil, []byte("foo"), 1, 0x08070605040302) 15 | if got, want := string(k), "foo\x01\x02\x03\x04\x05\x06\x07\x08"; got != want { 16 | t.Fatalf("k = %q want %q", got, want) 17 | } 18 | if !k.valid() { 19 | t.Fatalf("invalid key") 20 | } 21 | if got, want := string(k.ukey()), "foo"; got != want { 22 | t.Errorf("ukey = %q want %q", got, want) 23 | } 24 | if got, want := k.kind(), internalKeyKind(1); got != want { 25 | t.Errorf("kind = %d want %d", got, want) 26 | } 27 | if got, want := k.seqNum(), uint64(0x08070605040302); got != want { 28 | t.Errorf("seqNum = %d want %d", got, want) 29 | } 30 | } 31 | 32 | func TestInvalidInternalKey(t *testing.T) { 33 | testCases := []string{ 34 | "", 35 | "\x01\x02\x03\x04\x05\x06\x07", 36 | "foo", 37 | "foo\x08\x07\x06\x05\x04\x03\x02", 38 | "foo\x08\x07\x06\x05\x04\x03\x02\x01", 39 | } 40 | for _, tc := range testCases { 41 | if internalKey(tc).valid() { 42 | t.Errorf("%q is a valid key, want invalid", tc) 43 | } 44 | } 45 | } 46 | 47 | func TestInternalKeyComparer(t *testing.T) { 48 | // keys are some internal keys, in sorted order. 49 | keys := []string{ 50 | // The empty key is not a valid internal key, but it still must 51 | // sort lower than any other key. It is used as a zero value when 52 | // checking that a sequence of internal keys are in sorted order. 53 | "", 54 | // The next two keys are also invalid internal keys. They are 'less 55 | // than' any valid internal key, and 'greater than' the empty key. 56 | "A", 57 | "B", 58 | // The remaining test keys are all valid. 59 | "" + "\x01\xff\xff\xff\xff\xff\xff\xff", 60 | "" + "\x00\xff\xff\xff\xff\xff\xff\xff", 61 | "" + "\x01\x01\x00\x00\x00\x00\x00\x00", 62 | "" + "\x00\x01\x00\x00\x00\x00\x00\x00", 63 | "" + "\x01\x00\x00\x00\x00\x00\x00\x00", 64 | "" + "\x00\x00\x00\x00\x00\x00\x00\x00", 65 | "\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", 66 | "\x00blue" + "\x01\x11\x00\x00\x00\x00\x00\x00", 67 | "bl\x00ue" + "\x01\x11\x00\x00\x00\x00\x00\x00", 68 | "blue" + "\x01\x11\x00\x00\x00\x00\x00\x00", 69 | "blue\x00" + "\x01\x11\x00\x00\x00\x00\x00\x00", 70 | "green" + "\x01\x11\x00\x00\x00\x00\x00\x00", 71 | "red" + "\x01\xff\xff\xff\xff\xff\xff\xff", 72 | "red" + "\x01\x72\x73\x74\x75\x76\x77\x78", 73 | "red" + "\x01\x00\x00\x00\x00\x00\x00\x11", 74 | "red" + "\x01\x00\x00\x00\x00\x00\x11\x00", 75 | "red" + "\x01\x00\x00\x00\x00\x11\x00\x00", 76 | "red" + "\x01\x00\x00\x00\x11\x00\x00\x00", 77 | "red" + "\x01\x00\x00\x11\x00\x00\x00\x00", 78 | "red" + "\x01\x00\x11\x00\x00\x00\x00\x00", 79 | "red" + "\x01\x11\x00\x00\x00\x00\x00\x00", 80 | "red" + "\x00\x11\x00\x00\x00\x00\x00\x00", 81 | "red" + "\x00\x00\x00\x00\x00\x00\x00\x00", 82 | "\xfe" + "\x01\xff\xff\xff\xff\xff\xff\xff", 83 | "\xfe" + "\x00\x00\x00\x00\x00\x00\x00\x00", 84 | "\xff" + "\x01\xff\xff\xff\xff\xff\xff\xff", 85 | "\xff" + "\x00\x00\x00\x00\x00\x00\x00\x00", 86 | "\xff\x40" + "\x01\xff\xff\xff\xff\xff\xff\xff", 87 | "\xff\x40" + "\x00\x00\x00\x00\x00\x00\x00\x00", 88 | "\xff\xff" + "\x01\xff\xff\xff\xff\xff\xff\xff", 89 | "\xff\xff" + "\x00\x00\x00\x00\x00\x00\x00\x00", 90 | } 91 | c := internalKeyComparer{db.DefaultComparer} 92 | for i := range keys { 93 | for j := range keys { 94 | got := c.Compare([]byte(keys[i]), []byte(keys[j])) 95 | want := 0 96 | if i < j { 97 | want = -1 98 | } else if i > j { 99 | want = +1 100 | } 101 | if got != want { 102 | t.Errorf("i=%d, j=%d, keys[i]=%q, keys[j]=%q: got %d, want %d", 103 | i, j, keys[i], keys[j], got, want) 104 | } 105 | } 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /leveldb_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2012 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. 3 | 4 | package leveldb 5 | 6 | import ( 7 | "bytes" 8 | "errors" 9 | "io" 10 | "math/rand" 11 | "os" 12 | "path/filepath" 13 | "reflect" 14 | "sort" 15 | "strconv" 16 | "strings" 17 | "sync" 18 | "testing" 19 | "time" 20 | 21 | "github.com/golang/leveldb/db" 22 | "github.com/golang/leveldb/memfs" 23 | ) 24 | 25 | // try repeatedly calls f, sleeping between calls with exponential back-off, 26 | // until f returns a nil error or the total sleep time is greater than or equal 27 | // to maxTotalSleep. It always calls f at least once. 28 | func try(initialSleep, maxTotalSleep time.Duration, f func() error) error { 29 | totalSleep := time.Duration(0) 30 | for d := initialSleep; ; d *= 2 { 31 | time.Sleep(d) 32 | totalSleep += d 33 | if err := f(); err == nil || totalSleep >= maxTotalSleep { 34 | return err 35 | } 36 | } 37 | } 38 | 39 | func TestTry(t *testing.T) { 40 | c := make(chan struct{}) 41 | go func() { 42 | time.Sleep(1 * time.Millisecond) 43 | close(c) 44 | }() 45 | 46 | attemptsMu := sync.Mutex{} 47 | attempts := 0 48 | 49 | err := try(100*time.Microsecond, 20*time.Second, func() error { 50 | attemptsMu.Lock() 51 | attempts++ 52 | attemptsMu.Unlock() 53 | 54 | select { 55 | default: 56 | return errors.New("timed out") 57 | case <-c: 58 | return nil 59 | } 60 | }) 61 | if err != nil { 62 | t.Fatal(err) 63 | } 64 | 65 | attemptsMu.Lock() 66 | a := attempts 67 | attemptsMu.Unlock() 68 | 69 | if a == 0 { 70 | t.Fatalf("attempts: got 0, want > 0") 71 | } 72 | } 73 | 74 | func TestErrorIfDBExists(t *testing.T) { 75 | for _, b := range [...]bool{false, true} { 76 | fs := memfs.New() 77 | d0, err := Open("", &db.Options{ 78 | FileSystem: fs, 79 | }) 80 | if err != nil { 81 | t.Errorf("b=%v: d0 Open: %v", b, err) 82 | continue 83 | } 84 | if err := d0.Close(); err != nil { 85 | t.Errorf("b=%v: d0 Close: %v", b, err) 86 | continue 87 | } 88 | 89 | d1, err := Open("", &db.Options{ 90 | FileSystem: fs, 91 | ErrorIfDBExists: b, 92 | }) 93 | if d1 != nil { 94 | defer d1.Close() 95 | } 96 | if got := err != nil; got != b { 97 | t.Errorf("b=%v: d1 Open: err is %v, got (err != nil) is %v, want %v", b, err, got, b) 98 | continue 99 | } 100 | } 101 | } 102 | 103 | func TestNewDBFilenames(t *testing.T) { 104 | fooBar := filepath.Join("foo", "bar") 105 | fs := memfs.New() 106 | d, err := Open(fooBar, &db.Options{ 107 | FileSystem: fs, 108 | }) 109 | if err != nil { 110 | t.Fatalf("Open: %v", err) 111 | } 112 | if err := d.Close(); err != nil { 113 | t.Fatalf("Close: %v", err) 114 | } 115 | got, err := fs.List(fooBar) 116 | if err != nil { 117 | t.Fatalf("List: %v", err) 118 | } 119 | sort.Strings(got) 120 | // TODO: should there be a LOCK file here? 121 | want := []string{ 122 | "000003.log", 123 | "CURRENT", 124 | "MANIFEST-000002", 125 | } 126 | if !reflect.DeepEqual(got, want) { 127 | t.Errorf("\ngot %v\nwant %v", got, want) 128 | } 129 | } 130 | 131 | // cloneFileSystem returns a new memory-backed file system whose root contains 132 | // a copy of the directory dirname in the source file system srcFS. The copy 133 | // is not recursive; directories under dirname are not copied. 134 | // 135 | // Changes to the resultant file system do not modify the source file system. 136 | // 137 | // For example, if srcFS contained: 138 | // - /bar 139 | // - /baz/0 140 | // - /foo/x 141 | // - /foo/y 142 | // - /foo/z/A 143 | // - /foo/z/B 144 | // then calling cloneFileSystem(srcFS, "/foo") would result in a file system 145 | // containing: 146 | // - /x 147 | // - /y 148 | func cloneFileSystem(srcFS db.FileSystem, dirname string) (db.FileSystem, error) { 149 | if len(dirname) == 0 || dirname[len(dirname)-1] != os.PathSeparator { 150 | dirname += string(os.PathSeparator) 151 | } 152 | 153 | dstFS := memfs.New() 154 | list, err := srcFS.List(dirname) 155 | if err != nil { 156 | return nil, err 157 | } 158 | for _, name := range list { 159 | srcFile, err := srcFS.Open(dirname + name) 160 | if err != nil { 161 | return nil, err 162 | } 163 | stat, err := srcFile.Stat() 164 | if err != nil { 165 | return nil, err 166 | } 167 | if stat.IsDir() { 168 | err = srcFile.Close() 169 | if err != nil { 170 | return nil, err 171 | } 172 | continue 173 | } 174 | data := make([]byte, stat.Size()) 175 | _, err = io.ReadFull(srcFile, data) 176 | if err != nil { 177 | return nil, err 178 | } 179 | err = srcFile.Close() 180 | if err != nil { 181 | return nil, err 182 | } 183 | dstFile, err := dstFS.Create(name) 184 | if err != nil { 185 | return nil, err 186 | } 187 | _, err = dstFile.Write(data) 188 | if err != nil { 189 | return nil, err 190 | } 191 | err = dstFile.Close() 192 | if err != nil { 193 | return nil, err 194 | } 195 | } 196 | return dstFS, nil 197 | } 198 | 199 | func TestBasicReads(t *testing.T) { 200 | testCases := []struct { 201 | dirname string 202 | wantMap map[string]string 203 | }{ 204 | { 205 | "db-stage-1", 206 | map[string]string{ 207 | "aaa": "", 208 | "bar": "", 209 | "baz": "", 210 | "foo": "", 211 | "quux": "", 212 | "zzz": "", 213 | }, 214 | }, 215 | { 216 | "db-stage-2", 217 | map[string]string{ 218 | "aaa": "", 219 | "bar": "", 220 | "baz": "three", 221 | "foo": "four", 222 | "quux": "", 223 | "zzz": "", 224 | }, 225 | }, 226 | { 227 | "db-stage-3", 228 | map[string]string{ 229 | "aaa": "", 230 | "bar": "", 231 | "baz": "three", 232 | "foo": "four", 233 | "quux": "", 234 | "zzz": "", 235 | }, 236 | }, 237 | { 238 | "db-stage-4", 239 | map[string]string{ 240 | "aaa": "", 241 | "bar": "", 242 | "baz": "", 243 | "foo": "five", 244 | "quux": "six", 245 | "zzz": "", 246 | }, 247 | }, 248 | } 249 | for _, tc := range testCases { 250 | fs, err := cloneFileSystem(db.DefaultFileSystem, "testdata/"+tc.dirname) 251 | if err != nil { 252 | t.Errorf("%s: cloneFileSystem failed: %v", tc.dirname, err) 253 | continue 254 | } 255 | d, err := Open("", &db.Options{ 256 | FileSystem: fs, 257 | }) 258 | if err != nil { 259 | t.Errorf("%s: Open failed: %v", tc.dirname, err) 260 | continue 261 | } 262 | for key, want := range tc.wantMap { 263 | got, err := d.Get([]byte(key), nil) 264 | if err != nil && err != db.ErrNotFound { 265 | t.Errorf("%s: Get(%q) failed: %v", tc.dirname, key, err) 266 | continue 267 | } 268 | if string(got) != string(want) { 269 | t.Errorf("%s: Get(%q): got %q, want %q", tc.dirname, key, got, want) 270 | continue 271 | } 272 | } 273 | err = d.Close() 274 | if err != nil { 275 | t.Errorf("%s: Close failed: %v", tc.dirname, err) 276 | continue 277 | } 278 | } 279 | } 280 | 281 | func TestBasicWrites(t *testing.T) { 282 | d, err := Open("", &db.Options{ 283 | FileSystem: memfs.New(), 284 | }) 285 | if err != nil { 286 | t.Fatalf("Open failed: %v", err) 287 | } 288 | 289 | names := []string{ 290 | "Alatar", 291 | "Gandalf", 292 | "Pallando", 293 | "Radagast", 294 | "Saruman", 295 | "Joe", 296 | } 297 | wantMap := map[string]string{} 298 | 299 | inBatch, batch, pending := false, Batch{}, [][]string(nil) 300 | set0 := func(k, v string) error { 301 | return d.Set([]byte(k), []byte(v), nil) 302 | } 303 | del0 := func(k string) error { 304 | return d.Delete([]byte(k), nil) 305 | } 306 | set1 := func(k, v string) error { 307 | batch.Set([]byte(k), []byte(v)) 308 | return nil 309 | } 310 | del1 := func(k string) error { 311 | batch.Delete([]byte(k)) 312 | return nil 313 | } 314 | set, del := set0, del0 315 | 316 | testCases := []string{ 317 | "set Gandalf Grey", 318 | "set Saruman White", 319 | "set Radagast Brown", 320 | "delete Saruman", 321 | "set Gandalf White", 322 | "batch", 323 | " set Alatar AliceBlue", 324 | "apply", 325 | "delete Pallando", 326 | "set Alatar AntiqueWhite", 327 | "set Pallando PapayaWhip", 328 | "batch", 329 | "apply", 330 | "set Pallando PaleVioletRed", 331 | "batch", 332 | " delete Alatar", 333 | " set Gandalf GhostWhite", 334 | " set Saruman Seashell", 335 | " delete Saruman", 336 | " set Saruman SeaGreen", 337 | " set Radagast RosyBrown", 338 | " delete Pallando", 339 | "apply", 340 | "delete Radagast", 341 | "delete Radagast", 342 | "delete Radagast", 343 | "set Gandalf Goldenrod", 344 | "set Pallando PeachPuff", 345 | "batch", 346 | " delete Joe", 347 | " delete Saruman", 348 | " delete Radagast", 349 | " delete Pallando", 350 | " delete Gandalf", 351 | " delete Alatar", 352 | "apply", 353 | "set Joe Plumber", 354 | } 355 | for i, tc := range testCases { 356 | s := strings.Split(strings.TrimSpace(tc), " ") 357 | switch s[0] { 358 | case "set": 359 | if err := set(s[1], s[2]); err != nil { 360 | t.Fatalf("#%d %s: %v", i, tc, err) 361 | } 362 | if inBatch { 363 | pending = append(pending, s) 364 | } else { 365 | wantMap[s[1]] = s[2] 366 | } 367 | case "delete": 368 | if err := del(s[1]); err != nil { 369 | t.Fatalf("#%d %s: %v", i, tc, err) 370 | } 371 | if inBatch { 372 | pending = append(pending, s) 373 | } else { 374 | delete(wantMap, s[1]) 375 | } 376 | case "batch": 377 | inBatch, batch, set, del = true, Batch{}, set1, del1 378 | case "apply": 379 | if err := d.Apply(batch, nil); err != nil { 380 | t.Fatalf("#%d %s: %v", i, tc, err) 381 | } 382 | for _, p := range pending { 383 | switch p[0] { 384 | case "set": 385 | wantMap[p[1]] = p[2] 386 | case "delete": 387 | delete(wantMap, p[1]) 388 | } 389 | } 390 | inBatch, pending, set, del = false, nil, set0, del0 391 | default: 392 | t.Fatalf("#%d %s: bad test case: %q", i, tc, s) 393 | } 394 | 395 | fail := false 396 | for _, name := range names { 397 | g, err := d.Get([]byte(name), nil) 398 | if err != nil && err != db.ErrNotFound { 399 | t.Errorf("#%d %s: Get(%q): %v", i, tc, name, err) 400 | fail = true 401 | } 402 | got, gOK := string(g), err == nil 403 | want, wOK := wantMap[name] 404 | if got != want || gOK != wOK { 405 | t.Errorf("#%d %s: Get(%q): got %q, %t, want %q, %t", 406 | i, tc, name, got, gOK, want, wOK) 407 | fail = true 408 | } 409 | } 410 | if fail { 411 | return 412 | } 413 | } 414 | 415 | if err := d.Close(); err != nil { 416 | t.Fatalf("Close failed: %v", err) 417 | } 418 | } 419 | 420 | func TestRandomWrites(t *testing.T) { 421 | d, err := Open("", &db.Options{ 422 | FileSystem: memfs.New(), 423 | WriteBufferSize: 8 * 1024, 424 | }) 425 | if err != nil { 426 | t.Fatalf("Open: %v", err) 427 | } 428 | 429 | keys := [64][]byte{} 430 | wants := [64]int{} 431 | for k := range keys { 432 | keys[k] = []byte(strconv.Itoa(k)) 433 | wants[k] = -1 434 | } 435 | xxx := bytes.Repeat([]byte("x"), 512) 436 | 437 | rng := rand.New(rand.NewSource(123)) 438 | const N = 1000 439 | for i := 0; i < N; i++ { 440 | k := rng.Intn(len(keys)) 441 | if rng.Intn(20) != 0 { 442 | wants[k] = rng.Intn(len(xxx) + 1) 443 | if err := d.Set(keys[k], xxx[:wants[k]], nil); err != nil { 444 | t.Fatalf("i=%d: Set: %v", i, err) 445 | } 446 | } else { 447 | wants[k] = -1 448 | if err := d.Delete(keys[k], nil); err != nil { 449 | t.Fatalf("i=%d: Delete: %v", i, err) 450 | } 451 | } 452 | 453 | if i != N-1 || rng.Intn(50) != 0 { 454 | continue 455 | } 456 | for k := range keys { 457 | got := -1 458 | if v, err := d.Get(keys[k], nil); err != nil { 459 | if err != db.ErrNotFound { 460 | t.Fatalf("Get: %v", err) 461 | } 462 | } else { 463 | got = len(v) 464 | } 465 | if got != wants[k] { 466 | t.Errorf("i=%d, k=%d: got %d, want %d", i, k, got, wants[k]) 467 | } 468 | } 469 | } 470 | 471 | if err := d.Close(); err != nil { 472 | t.Fatalf("db Close: %v", err) 473 | } 474 | } 475 | 476 | func TestOpenCloseOpenClose(t *testing.T) { 477 | opts := &db.Options{ 478 | FileSystem: memfs.New(), 479 | } 480 | 481 | for _, startFromEmpty := range []bool{false, true} { 482 | for _, length := range []int{-1, 0, 1, 1000, 10000, 100000} { 483 | dirname := "sharedDatabase" 484 | if startFromEmpty { 485 | dirname = "startFromEmpty" + strconv.Itoa(length) 486 | } 487 | 488 | got, xxx := []byte(nil), "" 489 | if length >= 0 { 490 | xxx = strings.Repeat("x", length) 491 | } 492 | 493 | d0, err := Open(dirname, opts) 494 | if err != nil { 495 | t.Errorf("sfe=%t, length=%d: Open #0: %v", 496 | startFromEmpty, length, err) 497 | continue 498 | } 499 | if length >= 0 { 500 | err = d0.Set([]byte("key"), []byte(xxx), nil) 501 | if err != nil { 502 | t.Errorf("sfe=%t, length=%d: Set: %v", 503 | startFromEmpty, length, err) 504 | continue 505 | } 506 | } 507 | err = d0.Close() 508 | if err != nil { 509 | t.Errorf("sfe=%t, length=%d: Close #0: %v", 510 | startFromEmpty, length, err) 511 | continue 512 | } 513 | 514 | // TODO: make the second Open recover (without a fatal "corrupt log 515 | // file" error) even if the d0 database was not closed but the xxx 516 | // value is large enough to write a partial record. Writing to the 517 | // database should not corrupt it even if the writer process was 518 | // killed part-way through. 519 | 520 | d1, err := Open(dirname, opts) 521 | if err != nil { 522 | t.Errorf("sfe=%t, length=%d: Open #1: %v", 523 | startFromEmpty, length, err) 524 | continue 525 | } 526 | if length >= 0 { 527 | got, err = d1.Get([]byte("key"), nil) 528 | if err != nil { 529 | t.Errorf("sfe=%t, length=%d: Get: %v", 530 | startFromEmpty, length, err) 531 | continue 532 | } 533 | } 534 | err = d1.Close() 535 | if err != nil { 536 | t.Errorf("sfe=%t, length=%d: Close #1: %v", 537 | startFromEmpty, length, err) 538 | continue 539 | } 540 | 541 | if length >= 0 && string(got) != xxx { 542 | t.Errorf("sfe=%t, length=%d: got value differs from set value", 543 | startFromEmpty, length) 544 | continue 545 | } 546 | } 547 | } 548 | } 549 | -------------------------------------------------------------------------------- /memdb/memdb.go: -------------------------------------------------------------------------------- 1 | // Copyright 2011 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // Package memdb provides a memory-backed implementation of the db.DB 6 | // interface. 7 | // 8 | // A MemDB's memory consumption increases monotonically, even if keys are 9 | // deleted or values are updated with shorter slices. Callers of the package 10 | // are responsible for explicitly compacting a MemDB into a separate DB 11 | // (whether in-memory or on-disk) when appropriate. 12 | package memdb // import "github.com/golang/leveldb/memdb" 13 | 14 | import ( 15 | "encoding/binary" 16 | "math/rand" 17 | "sync" 18 | 19 | "github.com/golang/leveldb/db" 20 | ) 21 | 22 | // maxHeight is the maximum height of a MemDB's skiplist. 23 | const maxHeight = 12 24 | 25 | // A MemDB's skiplist consists of a number of nodes, and each node is 26 | // represented by a variable number of ints: a key-offset, a value-offset, and 27 | // between 1 and maxHeight next nodes. The key-offset and value-offset encode 28 | // the node's key/value pair and are offsets into a MemDB's kvData slice. 29 | // The remaining ints, for the next nodes in the skiplist's linked lists, are 30 | // offsets into a MemDB's nodeData slice. 31 | // 32 | // The fXxx constants represent how to find the Xxx field of a node in the 33 | // nodeData. For example, given an int 30 representing a node, and given 34 | // nodeData[30:36] that looked like [60, 71, 82, 83, 84, 85], then 35 | // nodeData[30 + fKey] = 60 would be the node's key-offset, 36 | // nodeData[30 + fVal] = 71 would be the node's value-offset, and 37 | // nodeData[30 + fNxt + 0] = 82 would be the next node at the height-0 list, 38 | // nodeData[30 + fNxt + 1] = 83 would be the next node at the height-1 list, 39 | // and so on. A node's height is implied by the skiplist construction: a node 40 | // of height x appears in the height-h list iff 0 <= h && h < x. 41 | const ( 42 | fKey = iota 43 | fVal 44 | fNxt 45 | ) 46 | 47 | const ( 48 | // zeroNode represents the end of a linked list. 49 | zeroNode = 0 50 | // headNode represents the start of the linked list. It is equal to -fNxt 51 | // so that the next nodes at height-h are at nodeData[h]. 52 | // The head node is an artificial node and has no key or value. 53 | headNode = -fNxt 54 | ) 55 | 56 | // A node's key-offset and value-offset fields are offsets into a MemDB's 57 | // kvData slice that stores varint-prefixed strings: the node's key and value. 58 | // A negative offset means a zero-length string, whether explicitly set to 59 | // empty or implicitly set by deletion. 60 | const ( 61 | kvOffsetEmptySlice = -1 62 | kvOffsetDeletedNode = -2 63 | ) 64 | 65 | // MemDB is a memory-backed implementation of the db.DB interface. 66 | // 67 | // It is safe to call Get, Set, Delete and Find concurrently. 68 | type MemDB struct { 69 | mutex sync.RWMutex 70 | // height is the number of such lists, which can increase over time. 71 | height int 72 | // cmp defines an ordering on keys. 73 | cmp db.Comparer 74 | // kvData is an append-only buffer that holds varint-prefixed strings. 75 | kvData []byte 76 | // nodeData is an append-only buffer that holds a node's fields. 77 | nodeData []int 78 | } 79 | 80 | // MemDB implements the db.DB interface. 81 | var _ db.DB = (*MemDB)(nil) 82 | 83 | // load loads a []byte from m.kvData. 84 | func (m *MemDB) load(kvOffset int) (b []byte) { 85 | if kvOffset < 0 { 86 | return nil 87 | } 88 | bLen, n := binary.Uvarint(m.kvData[kvOffset:]) 89 | i, j := kvOffset+n, kvOffset+n+int(bLen) 90 | return m.kvData[i:j:j] 91 | } 92 | 93 | // save saves a []byte to m.kvData. 94 | func (m *MemDB) save(b []byte) (kvOffset int) { 95 | if len(b) == 0 { 96 | return kvOffsetEmptySlice 97 | } 98 | kvOffset = len(m.kvData) 99 | var buf [binary.MaxVarintLen64]byte 100 | length := binary.PutUvarint(buf[:], uint64(len(b))) 101 | m.kvData = append(m.kvData, buf[:length]...) 102 | m.kvData = append(m.kvData, b...) 103 | return kvOffset 104 | } 105 | 106 | // findNode returns the first node n whose key is >= the given key (or nil if 107 | // there is no such node) and whether n's key equals key. The search is based 108 | // solely on the contents of a node's key. Whether or not that key was 109 | // previously deleted from the MemDB is not relevant. 110 | // 111 | // If prev is non-nil, it also sets the first m.height elements of prev to the 112 | // preceding node at each height. 113 | func (m *MemDB) findNode(key []byte, prev *[maxHeight]int) (n int, exactMatch bool) { 114 | for h, p := m.height-1, headNode; h >= 0; h-- { 115 | // Walk the skiplist at height h until we find either a zero node 116 | // or one whose key is >= the given key. 117 | n = m.nodeData[p+fNxt+h] 118 | for { 119 | if n == zeroNode { 120 | exactMatch = false 121 | break 122 | } 123 | kOff := m.nodeData[n+fKey] 124 | if c := m.cmp.Compare(m.load(kOff), key); c >= 0 { 125 | exactMatch = c == 0 126 | break 127 | } 128 | p, n = n, m.nodeData[n+fNxt+h] 129 | } 130 | if prev != nil { 131 | (*prev)[h] = p 132 | } 133 | } 134 | return n, exactMatch 135 | } 136 | 137 | // Get implements DB.Get, as documented in the leveldb/db package. 138 | func (m *MemDB) Get(key []byte, o *db.ReadOptions) (value []byte, err error) { 139 | m.mutex.RLock() 140 | defer m.mutex.RUnlock() 141 | n, exactMatch := m.findNode(key, nil) 142 | vOff := m.nodeData[n+fVal] 143 | if !exactMatch || vOff == kvOffsetDeletedNode { 144 | return nil, db.ErrNotFound 145 | } 146 | return m.load(vOff), nil 147 | } 148 | 149 | // Set implements DB.Set, as documented in the leveldb/db package. 150 | func (m *MemDB) Set(key, value []byte, o *db.WriteOptions) error { 151 | m.mutex.Lock() 152 | defer m.mutex.Unlock() 153 | // Find the node, and its predecessors at all heights. 154 | var prev [maxHeight]int 155 | n, exactMatch := m.findNode(key, &prev) 156 | if exactMatch { 157 | m.nodeData[n+fVal] = m.save(value) 158 | return nil 159 | } 160 | // Choose the new node's height, branching with 25% probability. 161 | h := 1 162 | for h < maxHeight && rand.Intn(4) == 0 { 163 | h++ 164 | } 165 | // Raise the skiplist's height to the node's height, if necessary. 166 | if m.height < h { 167 | for i := m.height; i < h; i++ { 168 | prev[i] = headNode 169 | } 170 | m.height = h 171 | } 172 | // Insert the new node. 173 | var x [fNxt + maxHeight]int 174 | n1 := len(m.nodeData) 175 | x[fKey] = m.save(key) 176 | x[fVal] = m.save(value) 177 | for i := 0; i < h; i++ { 178 | j := prev[i] + fNxt + i 179 | x[fNxt+i] = m.nodeData[j] 180 | m.nodeData[j] = n1 181 | } 182 | m.nodeData = append(m.nodeData, x[:fNxt+h]...) 183 | return nil 184 | } 185 | 186 | // Delete implements DB.Delete, as documented in the leveldb/db package. 187 | func (m *MemDB) Delete(key []byte, o *db.WriteOptions) error { 188 | m.mutex.Lock() 189 | defer m.mutex.Unlock() 190 | n, exactMatch := m.findNode(key, nil) 191 | if !exactMatch || m.nodeData[n+fVal] == kvOffsetDeletedNode { 192 | return db.ErrNotFound 193 | } 194 | m.nodeData[n+fVal] = kvOffsetDeletedNode 195 | return nil 196 | } 197 | 198 | // Find implements DB.Find, as documented in the leveldb/db package. 199 | func (m *MemDB) Find(key []byte, o *db.ReadOptions) db.Iterator { 200 | m.mutex.RLock() 201 | defer m.mutex.RUnlock() 202 | n, _ := m.findNode(key, nil) 203 | for n != zeroNode && m.nodeData[n+fVal] == kvOffsetDeletedNode { 204 | n = m.nodeData[n+fNxt] 205 | } 206 | t := &iterator{ 207 | m: m, 208 | restartNode: n, 209 | } 210 | t.fill() 211 | // The iterator is positioned at the first node >= key. The iterator API 212 | // requires that the caller the Next first, so we set t.i0 to -1. 213 | t.i0 = -1 214 | return t 215 | } 216 | 217 | // Close implements DB.Close, as documented in the leveldb/db package. 218 | func (m *MemDB) Close() error { 219 | return nil 220 | } 221 | 222 | // ApproximateMemoryUsage returns the approximate memory usage of the MemDB. 223 | func (m *MemDB) ApproximateMemoryUsage() int { 224 | m.mutex.RLock() 225 | defer m.mutex.RUnlock() 226 | return len(m.kvData) 227 | } 228 | 229 | // Empty returns whether the MemDB has no key/value pairs. 230 | func (m *MemDB) Empty() bool { 231 | m.mutex.Lock() 232 | defer m.mutex.Unlock() 233 | return len(m.nodeData) == maxHeight 234 | } 235 | 236 | // New returns a new MemDB. 237 | func New(o *db.Options) *MemDB { 238 | return &MemDB{ 239 | height: 1, 240 | cmp: o.GetComparer(), 241 | kvData: make([]byte, 0, 4096), 242 | // The first maxHeight values of nodeData are the next nodes after the 243 | // head node at each possible height. Their initial value is zeroNode. 244 | nodeData: make([]int, maxHeight, 256), 245 | } 246 | } 247 | 248 | // iterator is a MemDB iterator that buffers upcoming results, so that it does 249 | // not have to acquire the MemDB's mutex on each Next call. 250 | type iterator struct { 251 | m *MemDB 252 | // restartNode is the node to start refilling the buffer from. 253 | restartNode int 254 | // i0 is the current iterator position with respect to buf. A value of -1 255 | // means that the iterator is at the start, end or both of the iteration. 256 | // i1 is the number of buffered entries. 257 | // Invariant: -1 <= i0 && i0 < i1 && i1 <= len(buf). 258 | i0, i1 int 259 | // buf buffers up to 32 key/value pairs. 260 | buf [32][2][]byte 261 | } 262 | 263 | // iterator implements the db.Iterator interface. 264 | var _ db.Iterator = (*iterator)(nil) 265 | 266 | // fill fills the iterator's buffer with key/value pairs from the MemDB. 267 | // 268 | // Precondition: t.m.mutex is locked for reading. 269 | func (t *iterator) fill() { 270 | i, n := 0, t.restartNode 271 | for i < len(t.buf) && n != zeroNode { 272 | if t.m.nodeData[n+fVal] != kvOffsetDeletedNode { 273 | t.buf[i][fKey] = t.m.load(t.m.nodeData[n+fKey]) 274 | t.buf[i][fVal] = t.m.load(t.m.nodeData[n+fVal]) 275 | i++ 276 | } 277 | n = t.m.nodeData[n+fNxt] 278 | } 279 | if i == 0 { 280 | // There were no non-deleted nodes on or after t.restartNode. 281 | // The iterator is exhausted. 282 | t.i0 = -1 283 | } else { 284 | t.i0 = 0 285 | } 286 | t.i1 = i 287 | t.restartNode = n 288 | } 289 | 290 | // Next implements Iterator.Next, as documented in the leveldb/db package. 291 | func (t *iterator) Next() bool { 292 | t.i0++ 293 | if t.i0 < t.i1 { 294 | return true 295 | } 296 | if t.restartNode == zeroNode { 297 | t.i0 = -1 298 | t.i1 = 0 299 | return false 300 | } 301 | t.m.mutex.RLock() 302 | defer t.m.mutex.RUnlock() 303 | t.fill() 304 | return true 305 | } 306 | 307 | // Key implements Iterator.Key, as documented in the leveldb/db package. 308 | func (t *iterator) Key() []byte { 309 | if t.i0 < 0 { 310 | return nil 311 | } 312 | return t.buf[t.i0][fKey] 313 | } 314 | 315 | // Value implements Iterator.Value, as documented in the leveldb/db package. 316 | func (t *iterator) Value() []byte { 317 | if t.i0 < 0 { 318 | return nil 319 | } 320 | return t.buf[t.i0][fVal] 321 | } 322 | 323 | // Close implements Iterator.Close, as documented in the leveldb/db package. 324 | func (t *iterator) Close() error { 325 | return nil 326 | } 327 | -------------------------------------------------------------------------------- /memdb/memdb_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2011 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package memdb 6 | 7 | import ( 8 | "fmt" 9 | "math/rand" 10 | "strconv" 11 | "strings" 12 | "testing" 13 | 14 | "github.com/golang/leveldb/db" 15 | ) 16 | 17 | // count returns the number of entries in a DB. 18 | func count(d db.DB) (n int) { 19 | x := d.Find(nil, nil) 20 | for x.Next() { 21 | n++ 22 | } 23 | if x.Close() != nil { 24 | return -1 25 | } 26 | return n 27 | } 28 | 29 | // compact compacts a MemDB. 30 | func compact(m *MemDB) (*MemDB, error) { 31 | n, x := New(nil), m.Find(nil, nil) 32 | for x.Next() { 33 | if err := n.Set(x.Key(), x.Value(), nil); err != nil { 34 | return nil, err 35 | } 36 | } 37 | if err := x.Close(); err != nil { 38 | return nil, err 39 | } 40 | return n, nil 41 | } 42 | 43 | func TestBasic(t *testing.T) { 44 | // Check the empty DB. 45 | m := New(nil) 46 | if got, want := count(m), 0; got != want { 47 | t.Fatalf("0.count: got %v, want %v", got, want) 48 | } 49 | v, err := m.Get([]byte("cherry"), nil) 50 | if string(v) != "" || err != db.ErrNotFound { 51 | t.Fatalf("1.get: got (%q, %v), want (%q, %v)", v, err, "", db.ErrNotFound) 52 | } 53 | // Add some key/value pairs. 54 | m.Set([]byte("cherry"), []byte("red"), nil) 55 | m.Set([]byte("peach"), []byte("yellow"), nil) 56 | m.Set([]byte("grape"), []byte("red"), nil) 57 | m.Set([]byte("grape"), []byte("green"), nil) 58 | m.Set([]byte("plum"), []byte("purple"), nil) 59 | if got, want := count(m), 4; got != want { 60 | t.Fatalf("2.count: got %v, want %v", got, want) 61 | } 62 | // Delete a key twice. 63 | if got, want := m.Delete([]byte("grape"), nil), error(nil); got != want { 64 | t.Fatalf("3.delete: got %v, want %v", got, want) 65 | } 66 | if got, want := m.Delete([]byte("grape"), nil), db.ErrNotFound; got != want { 67 | t.Fatalf("4.delete: got %v, want %v", got, want) 68 | } 69 | if got, want := count(m), 3; got != want { 70 | t.Fatalf("5.count: got %v, want %v", got, want) 71 | } 72 | // Get keys that are and aren't in the DB. 73 | v, err = m.Get([]byte("plum"), nil) 74 | if string(v) != "purple" || err != nil { 75 | t.Fatalf("6.get: got (%q, %v), want (%q, %v)", v, err, "purple", error(nil)) 76 | } 77 | v, err = m.Get([]byte("lychee"), nil) 78 | if string(v) != "" || err != db.ErrNotFound { 79 | t.Fatalf("7.get: got (%q, %v), want (%q, %v)", v, err, "", db.ErrNotFound) 80 | } 81 | // Check an iterator. 82 | s, x := "", m.Find([]byte("mango"), nil) 83 | for x.Next() { 84 | s += fmt.Sprintf("%s/%s.", x.Key(), x.Value()) 85 | } 86 | if want := "peach/yellow.plum/purple."; s != want { 87 | t.Fatalf("8.iter: got %q, want %q", s, want) 88 | } 89 | if err = x.Close(); err != nil { 90 | t.Fatalf("9.close: %v", err) 91 | } 92 | // Check some more sets and deletes. 93 | if got, want := m.Delete([]byte("cherry"), nil), error(nil); got != want { 94 | t.Fatalf("10.delete: got %v, want %v", got, want) 95 | } 96 | if got, want := count(m), 2; got != want { 97 | t.Fatalf("11.count: got %v, want %v", got, want) 98 | } 99 | if err := m.Set([]byte("apricot"), []byte("orange"), nil); err != nil { 100 | t.Fatalf("12.set: %v", err) 101 | } 102 | if got, want := count(m), 3; got != want { 103 | t.Fatalf("13.count: got %v, want %v", got, want) 104 | } 105 | // Clean up. 106 | if err := m.Close(); err != nil { 107 | t.Fatalf("14.close: %v", err) 108 | } 109 | } 110 | 111 | func TestCount(t *testing.T) { 112 | m := New(nil) 113 | for i := 0; i < 200; i++ { 114 | if j := count(m); j != i { 115 | t.Fatalf("count: got %d, want %d", j, i) 116 | } 117 | m.Set([]byte{byte(i)}, nil, nil) 118 | } 119 | if err := m.Close(); err != nil { 120 | t.Fatal(err) 121 | } 122 | } 123 | 124 | func TestEmpty(t *testing.T) { 125 | m := New(nil) 126 | if !m.Empty() { 127 | t.Errorf("got !empty, want empty") 128 | } 129 | // Add one key/value pair with an empty key and empty value. 130 | m.Set(nil, nil, nil) 131 | if m.Empty() { 132 | t.Errorf("got empty, want !empty") 133 | } 134 | } 135 | 136 | func Test1000Entries(t *testing.T) { 137 | // Initialize the DB. 138 | const N = 1000 139 | m0 := New(nil) 140 | for i := 0; i < N; i++ { 141 | k := []byte(strconv.Itoa(i)) 142 | v := []byte(strings.Repeat("x", i)) 143 | m0.Set(k, v, nil) 144 | } 145 | // Delete one third of the entries, update another third, 146 | // and leave the last third alone. 147 | for i := 0; i < N; i++ { 148 | switch i % 3 { 149 | case 0: 150 | k := []byte(strconv.Itoa(i)) 151 | m0.Delete(k, nil) 152 | case 1: 153 | k := []byte(strconv.Itoa(i)) 154 | v := []byte(strings.Repeat("y", i)) 155 | m0.Set(k, v, nil) 156 | case 2: 157 | // No-op. 158 | } 159 | } 160 | // Check the DB count. 161 | if got, want := count(m0), 666; got != want { 162 | t.Fatalf("count: got %v, want %v", got, want) 163 | } 164 | // Check random-access lookup. 165 | r := rand.New(rand.NewSource(0)) 166 | for i := 0; i < 3*N; i++ { 167 | j := r.Intn(N) 168 | k := []byte(strconv.Itoa(j)) 169 | v, err := m0.Get(k, nil) 170 | if len(v) != cap(v) { 171 | t.Fatalf("get: j=%d, got len(v)=%d, cap(v)=%d", j, len(v), cap(v)) 172 | } 173 | var c uint8 174 | if len(v) != 0 { 175 | c = v[0] 176 | } 177 | switch j % 3 { 178 | case 0: 179 | if err != db.ErrNotFound { 180 | t.Fatalf("get: j=%d, got err=%v, want %v", j, err, db.ErrNotFound) 181 | } 182 | case 1: 183 | if len(v) != j || c != 'y' { 184 | t.Fatalf("get: j=%d, got len(v),c=%d,%c, want %d,%c", j, len(v), c, j, 'y') 185 | } 186 | case 2: 187 | if len(v) != j || c != 'x' { 188 | t.Fatalf("get: j=%d, got len(v),c=%d,%c, want %d,%c", j, len(v), c, j, 'x') 189 | } 190 | } 191 | } 192 | // Check that iterating through the middle of the DB looks OK. 193 | // Keys are in lexicographic order, not numerical order. 194 | // Multiples of 3 are not present. 195 | wants := []string{ 196 | "499", 197 | "5", 198 | "50", 199 | "500", 200 | "502", 201 | "503", 202 | "505", 203 | "506", 204 | "508", 205 | "509", 206 | "511", 207 | } 208 | x := m0.Find([]byte(wants[0]), nil) 209 | for _, want := range wants { 210 | if !x.Next() { 211 | t.Fatalf("iter: next failed, want=%q", want) 212 | } 213 | if got := string(x.Key()); got != want { 214 | t.Fatalf("iter: got %q, want %q", got, want) 215 | } 216 | if k := x.Key(); len(k) != cap(k) { 217 | t.Fatalf("iter: len(k)=%d, cap(k)=%d", len(k), cap(k)) 218 | } 219 | if v := x.Value(); len(v) != cap(v) { 220 | t.Fatalf("iter: len(v)=%d, cap(v)=%d", len(v), cap(v)) 221 | } 222 | } 223 | if err := x.Close(); err != nil { 224 | t.Fatalf("close: %v", err) 225 | } 226 | // Check that compaction reduces memory usage by at least one third. 227 | amu0 := m0.ApproximateMemoryUsage() 228 | if amu0 == 0 { 229 | t.Fatalf("compact: memory usage is zero") 230 | } 231 | m1, err := compact(m0) 232 | if err != nil { 233 | t.Fatalf("compact: %v", err) 234 | } 235 | amu1 := m1.ApproximateMemoryUsage() 236 | if ratio := float64(amu1) / float64(amu0); ratio > 0.667 { 237 | t.Fatalf("compact: memory usage before=%d, after=%d, ratio=%f", amu0, amu1, ratio) 238 | } 239 | // Clean up. 240 | if err := m0.Close(); err != nil { 241 | t.Fatalf("close: %v", err) 242 | } 243 | } 244 | -------------------------------------------------------------------------------- /memfs/memfs.go: -------------------------------------------------------------------------------- 1 | // Copyright 2012 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // Package memfs provides a memory-backed db.FileSystem implementation. 6 | // 7 | // It can be useful for tests, and also for LevelDB instances that should not 8 | // ever touch persistent storage, such as a web browser's private browsing mode. 9 | package memfs // import "github.com/golang/leveldb/memfs" 10 | 11 | import ( 12 | "bytes" 13 | "errors" 14 | "fmt" 15 | "io" 16 | "os" 17 | "sort" 18 | "strings" 19 | "sync" 20 | "time" 21 | 22 | "github.com/golang/leveldb/db" 23 | ) 24 | 25 | const sep = string(os.PathSeparator) 26 | 27 | type nopCloser struct{} 28 | 29 | func (nopCloser) Close() error { 30 | return nil 31 | } 32 | 33 | // New returns a new memory-backed db.FileSystem implementation. 34 | func New() db.FileSystem { 35 | return &fileSystem{ 36 | root: &node{ 37 | children: make(map[string]*node), 38 | isDir: true, 39 | }, 40 | } 41 | } 42 | 43 | // fileSystem implements db.FileSystem. 44 | type fileSystem struct { 45 | mu sync.Mutex 46 | root *node 47 | } 48 | 49 | func (y *fileSystem) String() string { 50 | y.mu.Lock() 51 | defer y.mu.Unlock() 52 | 53 | s := new(bytes.Buffer) 54 | y.root.dump(s, 0) 55 | return s.String() 56 | } 57 | 58 | // walk walks the directory tree for the fullname, calling f at each step. If 59 | // f returns an error, the walk will be aborted and return that same error. 60 | // 61 | // Each walk is atomic: y's mutex is held for the entire operation, including 62 | // all calls to f. 63 | // 64 | // dir is the directory at that step, frag is the name fragment, and final is 65 | // whether it is the final step. For example, walking "/foo/bar/x" will result 66 | // in 3 calls to f: 67 | // - "/", "foo", false 68 | // - "/foo/", "bar", false 69 | // - "/foo/bar/", "x", true 70 | // Similarly, walking "/y/z/", with a trailing slash, will result in 3 calls to f: 71 | // - "/", "y", false 72 | // - "/y/", "z", false 73 | // - "/y/z/", "", true 74 | func (y *fileSystem) walk(fullname string, f func(dir *node, frag string, final bool) error) error { 75 | y.mu.Lock() 76 | defer y.mu.Unlock() 77 | 78 | // For memfs, the current working directory is the same as the root directory, 79 | // so we strip off any leading "/"s to make fullname a relative path, and 80 | // the walk starts at y.root. 81 | for len(fullname) > 0 && fullname[0] == os.PathSeparator { 82 | fullname = fullname[1:] 83 | } 84 | dir := y.root 85 | 86 | for { 87 | frag, remaining := fullname, "" 88 | i := strings.IndexRune(fullname, os.PathSeparator) 89 | final := i < 0 90 | if !final { 91 | frag, remaining = fullname[:i], fullname[i+1:] 92 | for len(remaining) > 0 && remaining[0] == os.PathSeparator { 93 | remaining = remaining[1:] 94 | } 95 | } 96 | if err := f(dir, frag, final); err != nil { 97 | return err 98 | } 99 | if final { 100 | break 101 | } 102 | child := dir.children[frag] 103 | if child == nil { 104 | return errors.New("leveldb/memfs: no such directory") 105 | } 106 | if !child.isDir { 107 | return errors.New("leveldb/memfs: not a directory") 108 | } 109 | dir, fullname = child, remaining 110 | } 111 | return nil 112 | } 113 | 114 | func (y *fileSystem) Create(fullname string) (db.File, error) { 115 | var ret *file 116 | err := y.walk(fullname, func(dir *node, frag string, final bool) error { 117 | if final { 118 | if frag == "" { 119 | return errors.New("leveldb/memfs: empty file name") 120 | } 121 | n := &node{name: frag} 122 | dir.children[frag] = n 123 | ret = &file{ 124 | n: n, 125 | write: true, 126 | } 127 | } 128 | return nil 129 | }) 130 | if err != nil { 131 | return nil, err 132 | } 133 | return ret, nil 134 | } 135 | 136 | func (y *fileSystem) Open(fullname string) (db.File, error) { 137 | var ret *file 138 | err := y.walk(fullname, func(dir *node, frag string, final bool) error { 139 | if final { 140 | if frag == "" { 141 | return errors.New("leveldb/memfs: empty file name") 142 | } 143 | if n := dir.children[frag]; n != nil { 144 | ret = &file{ 145 | n: n, 146 | read: true, 147 | } 148 | } 149 | } 150 | return nil 151 | }) 152 | if err != nil { 153 | return nil, err 154 | } 155 | if ret == nil { 156 | return nil, &os.PathError{ 157 | Op: "open", 158 | Path: fullname, 159 | Err: os.ErrNotExist, 160 | } 161 | } 162 | return ret, nil 163 | } 164 | 165 | func (y *fileSystem) Remove(fullname string) error { 166 | return y.walk(fullname, func(dir *node, frag string, final bool) error { 167 | if final { 168 | if frag == "" { 169 | return errors.New("leveldb/memfs: empty file name") 170 | } 171 | _, ok := dir.children[frag] 172 | if !ok { 173 | return errors.New("leveldb/memfs: no such file or directory") 174 | } 175 | delete(dir.children, frag) 176 | } 177 | return nil 178 | }) 179 | } 180 | 181 | func (y *fileSystem) Rename(oldname, newname string) error { 182 | var n *node 183 | err := y.walk(oldname, func(dir *node, frag string, final bool) error { 184 | if final { 185 | if frag == "" { 186 | return errors.New("leveldb/memfs: empty file name") 187 | } 188 | n = dir.children[frag] 189 | delete(dir.children, frag) 190 | } 191 | return nil 192 | }) 193 | if err != nil { 194 | return err 195 | } 196 | if n == nil { 197 | return errors.New("leveldb/memfs: no such file or directory") 198 | } 199 | return y.walk(newname, func(dir *node, frag string, final bool) error { 200 | if final { 201 | if frag == "" { 202 | return errors.New("leveldb/memfs: empty file name") 203 | } 204 | dir.children[frag] = n 205 | } 206 | return nil 207 | }) 208 | } 209 | 210 | func (y *fileSystem) MkdirAll(dirname string, perm os.FileMode) error { 211 | return y.walk(dirname, func(dir *node, frag string, final bool) error { 212 | if frag == "" { 213 | if final { 214 | return nil 215 | } 216 | return errors.New("leveldb/memfs: empty file name") 217 | } 218 | child := dir.children[frag] 219 | if child == nil { 220 | dir.children[frag] = &node{ 221 | name: frag, 222 | children: make(map[string]*node), 223 | isDir: true, 224 | } 225 | return nil 226 | } 227 | if !child.isDir { 228 | return errors.New("leveldb/memfs: not a directory") 229 | } 230 | return nil 231 | }) 232 | } 233 | 234 | func (y *fileSystem) Lock(fullname string) (io.Closer, error) { 235 | // FileSystem.Lock excludes other processes, but other processes cannot 236 | // see this process' memory, so Lock is a no-op. 237 | return nopCloser{}, nil 238 | } 239 | 240 | func (y *fileSystem) List(dirname string) ([]string, error) { 241 | if !strings.HasSuffix(dirname, sep) { 242 | dirname += sep 243 | } 244 | var ret []string 245 | err := y.walk(dirname, func(dir *node, frag string, final bool) error { 246 | if final { 247 | if frag != "" { 248 | panic("unreachable") 249 | } 250 | ret = make([]string, 0, len(dir.children)) 251 | for s := range dir.children { 252 | ret = append(ret, s) 253 | } 254 | } 255 | return nil 256 | }) 257 | return ret, err 258 | } 259 | 260 | func (y *fileSystem) Stat(name string) (os.FileInfo, error) { 261 | f, err := y.Open(name) 262 | if err != nil { 263 | if pe, ok := err.(*os.PathError); ok { 264 | pe.Op = "stat" 265 | } 266 | return nil, err 267 | } 268 | defer f.Close() 269 | return f.Stat() 270 | } 271 | 272 | // node holds a file's data or a directory's children, and implements os.FileInfo. 273 | type node struct { 274 | name string 275 | data []byte 276 | modTime time.Time 277 | children map[string]*node 278 | isDir bool 279 | } 280 | 281 | func (f *node) IsDir() bool { 282 | return f.isDir 283 | } 284 | 285 | func (f *node) ModTime() time.Time { 286 | return f.modTime 287 | } 288 | 289 | func (f *node) Mode() os.FileMode { 290 | if f.isDir { 291 | return os.ModeDir | 0755 292 | } 293 | return 0755 294 | } 295 | 296 | func (f *node) Name() string { 297 | return f.name 298 | } 299 | 300 | func (f *node) Size() int64 { 301 | return int64(len(f.data)) 302 | } 303 | 304 | func (f *node) Sys() interface{} { 305 | return nil 306 | } 307 | 308 | func (f *node) dump(w *bytes.Buffer, level int) { 309 | if f.isDir { 310 | w.WriteString(" ") 311 | } else { 312 | fmt.Fprintf(w, "%8d ", len(f.data)) 313 | } 314 | for i := 0; i < level; i++ { 315 | w.WriteString(" ") 316 | } 317 | w.WriteString(f.name) 318 | if !f.isDir { 319 | w.WriteByte('\n') 320 | return 321 | } 322 | w.WriteByte(os.PathSeparator) 323 | w.WriteByte('\n') 324 | names := make([]string, 0, len(f.children)) 325 | for name := range f.children { 326 | names = append(names, name) 327 | } 328 | sort.Strings(names) 329 | for _, name := range names { 330 | f.children[name].dump(w, level+1) 331 | } 332 | } 333 | 334 | // file is a reader or writer of a node's data, and implements db.File. 335 | type file struct { 336 | n *node 337 | rpos int 338 | read, write bool 339 | } 340 | 341 | func (f *file) Close() error { 342 | return nil 343 | } 344 | 345 | func (f *file) Read(p []byte) (int, error) { 346 | if !f.read { 347 | return 0, errors.New("leveldb/memfs: file was not opened for reading") 348 | } 349 | if f.n.isDir { 350 | return 0, errors.New("leveldb/memfs: cannot read a directory") 351 | } 352 | if f.rpos >= len(f.n.data) { 353 | return 0, io.EOF 354 | } 355 | n := copy(p, f.n.data[f.rpos:]) 356 | f.rpos += n 357 | return n, nil 358 | } 359 | 360 | func (f *file) ReadAt(p []byte, off int64) (int, error) { 361 | if !f.read { 362 | return 0, errors.New("leveldb/memfs: file was not opened for reading") 363 | } 364 | if f.n.isDir { 365 | return 0, errors.New("leveldb/memfs: cannot read a directory") 366 | } 367 | if off >= int64(len(f.n.data)) { 368 | return 0, io.EOF 369 | } 370 | return copy(p, f.n.data[off:]), nil 371 | } 372 | 373 | func (f *file) Write(p []byte) (int, error) { 374 | if !f.write { 375 | return 0, errors.New("leveldb/memfs: file was not created for writing") 376 | } 377 | if f.n.isDir { 378 | return 0, errors.New("leveldb/memfs: cannot write a directory") 379 | } 380 | f.n.modTime = time.Now() 381 | f.n.data = append(f.n.data, p...) 382 | return len(p), nil 383 | } 384 | 385 | func (f *file) Stat() (os.FileInfo, error) { 386 | return f.n, nil 387 | } 388 | 389 | func (f *file) Sync() error { 390 | return nil 391 | } 392 | -------------------------------------------------------------------------------- /memfs/memfs_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2012 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package memfs 6 | 7 | import ( 8 | "io" 9 | "os" 10 | "sort" 11 | "strconv" 12 | "strings" 13 | "testing" 14 | 15 | "github.com/golang/leveldb/db" 16 | ) 17 | 18 | func normalize(name string) string { 19 | if os.PathSeparator == '/' { 20 | return name 21 | } 22 | return strings.Replace(name, "/", string(os.PathSeparator), -1) 23 | } 24 | 25 | func TestBasics(t *testing.T) { 26 | fs := New() 27 | testCases := []string{ 28 | // Create a top-level file. 29 | "1a: create /foo", 30 | // Create a child of that file. It should fail, since /foo is not a directory. 31 | "2a: create /foo/x fails", 32 | // Create a third-level file. It should fail, since /bar has not been created. 33 | // Similarly, opening that file should fail. 34 | "3a: create /bar/baz/y fails", 35 | "3b: open /bar/baz/y fails", 36 | // Make the /bar/baz directory; create a third-level file. Creation should now succeed. 37 | "4a: mkdirall /bar/baz", 38 | "4b: f = create /bar/baz/y", 39 | "4c: f.stat.name == y", 40 | // Write some data; read it back. 41 | "5a: f.write abcde", 42 | "5b: f.close", 43 | "5c: f = open /bar/baz/y", 44 | "5d: f.read 5 == abcde", 45 | "5e: f.readat 2 1 == bc", 46 | "5f: f.close", 47 | // Remove the file twice. The first should succeed, the second should fail. 48 | "6a: remove /bar/baz/y", 49 | "6b: remove /bar/baz/y fails", 50 | "6c: open /bar/baz/y fails", 51 | // Rename /foo to /goo. Trying to open /foo should succeed before the rename and 52 | // fail afterwards, and vice versa for /goo. 53 | "7a: open /foo", 54 | "7b: open /goo fails", 55 | "7c: rename /foo /goo", 56 | "7d: open /foo fails", 57 | "7e: open /goo", 58 | // Create /bar/baz/z and rename /bar/baz to /bar/caz. 59 | "8a: create /bar/baz/z", 60 | "8b: open /bar/baz/z", 61 | "8c: open /bar/caz/z fails", 62 | "8d: rename /bar/baz /bar/caz", 63 | "8e: open /bar/baz/z fails", 64 | "8f: open /bar/caz/z", 65 | } 66 | var f db.File 67 | for _, tc := range testCases { 68 | s := strings.Split(tc, " ")[1:] 69 | 70 | saveF := s[0] == "f" && s[1] == "=" 71 | if saveF { 72 | s = s[2:] 73 | } 74 | 75 | fails := s[len(s)-1] == "fails" 76 | if fails { 77 | s = s[:len(s)-1] 78 | } 79 | 80 | var ( 81 | fi os.FileInfo 82 | g db.File 83 | err error 84 | ) 85 | switch s[0] { 86 | case "create": 87 | g, err = fs.Create(normalize(s[1])) 88 | case "open": 89 | g, err = fs.Open(normalize(s[1])) 90 | case "mkdirall": 91 | err = fs.MkdirAll(normalize(s[1]), 0755) 92 | case "remove": 93 | err = fs.Remove(normalize(s[1])) 94 | case "rename": 95 | err = fs.Rename(normalize(s[1]), normalize(s[2])) 96 | case "f.write": 97 | _, err = f.Write([]byte(s[1])) 98 | case "f.read": 99 | n, _ := strconv.Atoi(s[1]) 100 | buf := make([]byte, n) 101 | _, err = io.ReadFull(f, buf) 102 | if err != nil { 103 | break 104 | } 105 | if got, want := string(buf), s[3]; got != want { 106 | t.Fatalf("%q: got %q, want %q", tc, got, want) 107 | } 108 | case "f.readat": 109 | n, _ := strconv.Atoi(s[1]) 110 | off, _ := strconv.Atoi(s[2]) 111 | buf := make([]byte, n) 112 | _, err = f.ReadAt(buf, int64(off)) 113 | if err != nil { 114 | break 115 | } 116 | if got, want := string(buf), s[4]; got != want { 117 | t.Fatalf("%q: got %q, want %q", tc, got, want) 118 | } 119 | case "f.close": 120 | f, err = nil, f.Close() 121 | case "f.stat.name": 122 | fi, err = f.Stat() 123 | if err != nil { 124 | break 125 | } 126 | if got, want := fi.Name(), s[2]; got != want { 127 | t.Fatalf("%q: got %q, want %q", tc, got, want) 128 | } 129 | default: 130 | t.Fatalf("bad test case: %q", tc) 131 | } 132 | 133 | if saveF { 134 | f, g = g, nil 135 | } else if g != nil { 136 | g.Close() 137 | } 138 | 139 | if fails { 140 | if err == nil { 141 | t.Fatalf("%q: got nil error, want non-nil", tc) 142 | } 143 | } else { 144 | if err != nil { 145 | t.Fatalf("%q: %v", tc, err) 146 | } 147 | } 148 | } 149 | } 150 | 151 | func TestList(t *testing.T) { 152 | fs := New() 153 | 154 | dirnames := []string{ 155 | "/bar", 156 | "/foo/2", 157 | } 158 | for _, dirname := range dirnames { 159 | err := fs.MkdirAll(normalize(dirname), 0755) 160 | if err != nil { 161 | t.Fatalf("MkdirAll %q: %v", dirname, err) 162 | } 163 | } 164 | 165 | filenames := []string{ 166 | "/a", 167 | "/bar/baz", 168 | "/foo/0", 169 | "/foo/1", 170 | "/foo/2/a", 171 | "/foo/2/b", 172 | "/foo/3", 173 | "/foot", 174 | } 175 | for _, filename := range filenames { 176 | f, err := fs.Create(normalize(filename)) 177 | if err != nil { 178 | t.Fatalf("Create %q: %v", filename, err) 179 | } 180 | if err := f.Close(); err != nil { 181 | t.Fatalf("Close %q: %v", filename, err) 182 | } 183 | } 184 | 185 | { 186 | got := fs.(*fileSystem).String() 187 | want := normalize(` / 188 | 0 a 189 | bar/ 190 | 0 baz 191 | foo/ 192 | 0 0 193 | 0 1 194 | 2/ 195 | 0 a 196 | 0 b 197 | 0 3 198 | 0 foot 199 | `) 200 | if got != want { 201 | t.Fatalf("String:\n----got----\n%s----want----\n%s", got, want) 202 | } 203 | } 204 | 205 | testCases := []string{ 206 | "/:a bar foo foot", 207 | "/bar:baz", 208 | "/bar/:baz", 209 | "/baz:", 210 | "/baz/:", 211 | "/foo:0 1 2 3", 212 | "/foo/:0 1 2 3", 213 | "/foo/1:", 214 | "/foo/1/:", 215 | "/foo/2:a b", 216 | "/foo/2/:a b", 217 | "/foot:", 218 | "/foot/:", 219 | } 220 | for _, tc := range testCases { 221 | s := strings.Split(tc, ":") 222 | list, _ := fs.List(normalize(s[0])) 223 | sort.Strings(list) 224 | got := strings.Join(list, " ") 225 | want := s[1] 226 | if got != want { 227 | t.Errorf("List %q: got %q, want %q", s[0], got, want) 228 | } 229 | } 230 | } 231 | -------------------------------------------------------------------------------- /table/table.go: -------------------------------------------------------------------------------- 1 | // Copyright 2011 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | /* 6 | Package table implements readers and writers of leveldb tables. 7 | 8 | Tables are either opened for reading or created for writing but not both. 9 | 10 | A reader can create iterators, which yield all key/value pairs whose keys 11 | are 'greater than or equal' to a starting key. There may be multiple key/ 12 | value pairs that have the same key. 13 | 14 | A reader can be used concurrently. Multiple goroutines can call Find 15 | concurrently, and each iterator can run concurrently with other iterators. 16 | However, any particular iterator should not be used concurrently, and 17 | iterators should not be used once a reader is closed. 18 | 19 | A writer writes key/value pairs in increasing key order, and cannot be used 20 | concurrently. A table cannot be read until the writer has finished. 21 | 22 | Readers and writers can be created with various options. Passing a nil 23 | Options pointer is valid and means to use the default values. 24 | 25 | One such option is to define the 'less than' ordering for keys. The default 26 | Comparer uses the natural ordering consistent with bytes.Compare. The same 27 | ordering should be used for reading and writing a table. 28 | 29 | To return the value for a key: 30 | 31 | r := table.NewReader(file, options) 32 | defer r.Close() 33 | return r.Get(key) 34 | 35 | To count the number of entries in a table: 36 | 37 | i, n := r.Find(nil, ropts), 0 38 | for i.Next() { 39 | n++ 40 | } 41 | if err := i.Close(); err != nil { 42 | return 0, err 43 | } 44 | return n, nil 45 | 46 | To write a table with three entries: 47 | 48 | w := table.NewWriter(file, options) 49 | if err := w.Set([]byte("apple"), []byte("red"), wopts); err != nil { 50 | w.Close() 51 | return err 52 | } 53 | if err := w.Set([]byte("banana"), []byte("yellow"), wopts); err != nil { 54 | w.Close() 55 | return err 56 | } 57 | if err := w.Set([]byte("cherry"), []byte("red"), wopts); err != nil { 58 | w.Close() 59 | return err 60 | } 61 | return w.Close() 62 | */ 63 | package table // import "github.com/golang/leveldb/table" 64 | 65 | /* 66 | The table file format looks like: 67 | 68 | 69 | [data block 0] 70 | [data block 1] 71 | ... 72 | [data block N-1] 73 | [meta block 0] 74 | [meta block 1] 75 | ... 76 | [meta block K-1] 77 | [metaindex block] 78 | [index block] 79 | [footer] 80 | 81 | 82 | Each block consists of some data and a 5 byte trailer: a 1 byte block type and 83 | a 4 byte checksum of the compressed data. The block type gives the per-block 84 | compression used; each block is compressed independently. The checksum 85 | algorithm is described in the leveldb/crc package. 86 | 87 | The decompressed block data consists of a sequence of key/value entries 88 | followed by a trailer. Each key is encoded as a shared prefix length and a 89 | remainder string. For example, if two adjacent keys are "tweedledee" and 90 | "tweedledum", then the second key would be encoded as {8, "um"}. The shared 91 | prefix length is varint encoded. The remainder string and the value are 92 | encoded as a varint-encoded length followed by the literal contents. To 93 | continue the example, suppose that the key "tweedledum" mapped to the value 94 | "socks". The encoded key/value entry would be: "\x08\x02\x05umsocks". 95 | 96 | Every block has a restart interval I. Every I'th key/value entry in that block 97 | is called a restart point, and shares no key prefix with the previous entry. 98 | Continuing the example above, if the key after "tweedledum" was "two", but was 99 | part of a restart point, then that key would be encoded as {0, "two"} instead 100 | of {2, "o"}. If a block has P restart points, then the block trailer consists 101 | of (P+1)*4 bytes: (P+1) little-endian uint32 values. The first P of these 102 | uint32 values are the block offsets of each restart point. The final uint32 103 | value is P itself. Thus, when seeking for a particular key, one can use binary 104 | search to find the largest restart point whose key is <= the key sought. 105 | 106 | An index block is a block with N key/value entries. The i'th value is the 107 | encoded block handle of the i'th data block. The i'th key is a separator for 108 | i < N-1, and a successor for i == N-1. The separator between blocks i and i+1 109 | is a key that is >= every key in block i and is < every key i block i+1. The 110 | successor for the final block is a key that is >= every key in block N-1. The 111 | index block restart interval is 1: every entry is a restart point. 112 | 113 | The table footer is exactly 48 bytes long: 114 | - the block handle for the metaindex block, 115 | - the block handle for the index block, 116 | - padding to take the two items above up to 40 bytes, 117 | - an 8-byte magic string. 118 | 119 | A block handle is an offset and a length; the length does not include the 5 120 | byte trailer. Both numbers are varint-encoded, with no padding between the two 121 | values. The maximum size of an encoded block handle is therefore 20 bytes. 122 | */ 123 | 124 | const ( 125 | blockTrailerLen = 5 126 | footerLen = 48 127 | 128 | magic = "\x57\xfb\x80\x8b\x24\x75\x47\xdb" 129 | 130 | // The block type gives the per-block compression format. 131 | // These constants are part of the file format and should not be changed. 132 | // They are different from the db.Compression constants because the latter 133 | // are designed so that the zero value of the db.Compression type means to 134 | // use the default compression (which is snappy). 135 | noCompressionBlockType = 0 136 | snappyCompressionBlockType = 1 137 | ) 138 | -------------------------------------------------------------------------------- /table/writer.go: -------------------------------------------------------------------------------- 1 | // Copyright 2011 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package table 6 | 7 | import ( 8 | "bufio" 9 | "encoding/binary" 10 | "errors" 11 | "fmt" 12 | "io" 13 | 14 | "github.com/golang/leveldb/crc" 15 | "github.com/golang/leveldb/db" 16 | "github.com/golang/snappy" 17 | ) 18 | 19 | // indexEntry is a block handle and the length of the separator key. 20 | type indexEntry struct { 21 | bh blockHandle 22 | keyLen int 23 | } 24 | 25 | // filterBaseLog being 11 means that we generate a new filter for every 2KiB of 26 | // data. 27 | // 28 | // It's a little unfortunate that this is 11, whilst the default db.Options 29 | // BlockSize is 1<<12 or 4KiB, so that in practice, every second filter is 30 | // empty, but both values match the C++ code. 31 | const filterBaseLog = 11 32 | 33 | type filterWriter struct { 34 | policy db.FilterPolicy 35 | // block holds the keys for the current block. The buffers are re-used for 36 | // each new block. 37 | block struct { 38 | data []byte 39 | lengths []int 40 | keys [][]byte 41 | } 42 | // data and offsets are the per-block filters for the overall table. 43 | data []byte 44 | offsets []uint32 45 | } 46 | 47 | func (f *filterWriter) hasKeys() bool { 48 | return len(f.block.lengths) != 0 49 | } 50 | 51 | func (f *filterWriter) appendKey(key []byte) { 52 | f.block.data = append(f.block.data, key...) 53 | f.block.lengths = append(f.block.lengths, len(key)) 54 | } 55 | 56 | func (f *filterWriter) appendOffset() error { 57 | o := len(f.data) 58 | if uint64(o) > 1<<32-1 { 59 | return errors.New("leveldb/table: filter data is too long") 60 | } 61 | f.offsets = append(f.offsets, uint32(o)) 62 | return nil 63 | } 64 | 65 | func (f *filterWriter) emit() error { 66 | if err := f.appendOffset(); err != nil { 67 | return err 68 | } 69 | if !f.hasKeys() { 70 | return nil 71 | } 72 | 73 | i, j := 0, 0 74 | for _, length := range f.block.lengths { 75 | j += length 76 | f.block.keys = append(f.block.keys, f.block.data[i:j]) 77 | i = j 78 | } 79 | f.data = f.policy.AppendFilter(f.data, f.block.keys) 80 | 81 | // Reset the per-block state. 82 | f.block.data = f.block.data[:0] 83 | f.block.lengths = f.block.lengths[:0] 84 | f.block.keys = f.block.keys[:0] 85 | return nil 86 | } 87 | 88 | func (f *filterWriter) finishBlock(blockOffset uint64) error { 89 | for i := blockOffset >> filterBaseLog; i > uint64(len(f.offsets)); { 90 | if err := f.emit(); err != nil { 91 | return err 92 | } 93 | } 94 | return nil 95 | } 96 | 97 | func (f *filterWriter) finish() ([]byte, error) { 98 | if f.hasKeys() { 99 | if err := f.emit(); err != nil { 100 | return nil, err 101 | } 102 | } 103 | if err := f.appendOffset(); err != nil { 104 | return nil, err 105 | } 106 | 107 | var b [4]byte 108 | for _, x := range f.offsets { 109 | binary.LittleEndian.PutUint32(b[:], x) 110 | f.data = append(f.data, b[0], b[1], b[2], b[3]) 111 | } 112 | f.data = append(f.data, filterBaseLog) 113 | return f.data, nil 114 | } 115 | 116 | // Writer is a table writer. It implements the DB interface, as documented 117 | // in the leveldb/db package. 118 | type Writer struct { 119 | writer io.Writer 120 | bufWriter *bufio.Writer 121 | closer io.Closer 122 | err error 123 | // The next four fields are copied from a db.Options. 124 | blockRestartInterval int 125 | blockSize int 126 | cmp db.Comparer 127 | compression db.Compression 128 | // A table is a series of blocks and a block's index entry contains a 129 | // separator key between one block and the next. Thus, a finished block 130 | // cannot be written until the first key in the next block is seen. 131 | // pendingBH is the blockHandle of a finished block that is waiting for 132 | // the next call to Set. If the writer is not in this state, pendingBH 133 | // is zero. 134 | pendingBH blockHandle 135 | // offset is the offset (relative to the table start) of the next block 136 | // to be written. 137 | offset uint64 138 | // prevKey is a copy of the key most recently passed to Set. 139 | prevKey []byte 140 | // indexKeys and indexEntries hold the separator keys between each block 141 | // and the successor key for the final block. indexKeys contains the key's 142 | // bytes concatenated together. The keyLen field of each indexEntries 143 | // element is the length of the respective separator key. 144 | indexKeys []byte 145 | indexEntries []indexEntry 146 | // The next three fields hold data for the current block: 147 | // - buf is the accumulated uncompressed bytes, 148 | // - nEntries is the number of entries, 149 | // - restarts are the offsets (relative to the block start) of each 150 | // restart point. 151 | buf []byte 152 | nEntries int 153 | restarts []uint32 154 | // compressedBuf is the destination buffer for snappy compression. It is 155 | // re-used over the lifetime of the writer, avoiding the allocation of a 156 | // temporary buffer for each block. 157 | compressedBuf []byte 158 | // filter accumulates the filter block. 159 | filter filterWriter 160 | // tmp is a scratch buffer, large enough to hold either footerLen bytes, 161 | // blockTrailerLen bytes, or (5 * binary.MaxVarintLen64) bytes. 162 | tmp [50]byte 163 | } 164 | 165 | // Writer implements the db.DB interface. 166 | var _ db.DB = (*Writer)(nil) 167 | 168 | // Get is provided to implement the DB interface, but returns an error, as a 169 | // Writer cannot read from a table. 170 | func (w *Writer) Get(key []byte, o *db.ReadOptions) ([]byte, error) { 171 | return nil, errors.New("leveldb/table: cannot Get from a write-only table") 172 | } 173 | 174 | // Delete is provided to implement the DB interface, but returns an error, as a 175 | // Writer can only append key/value pairs. 176 | func (w *Writer) Delete(key []byte, o *db.WriteOptions) error { 177 | return errors.New("leveldb/table: cannot Delete from a table") 178 | } 179 | 180 | // Find is provided to implement the DB interface, but returns an error, as a 181 | // Writer cannot read from a table. 182 | func (w *Writer) Find(key []byte, o *db.ReadOptions) db.Iterator { 183 | return &tableIter{ 184 | err: errors.New("leveldb/table: cannot Find from a write-only table"), 185 | } 186 | } 187 | 188 | // Set implements DB.Set, as documented in the leveldb/db package. For a given 189 | // Writer, the keys passed to Set must be in increasing order. 190 | func (w *Writer) Set(key, value []byte, o *db.WriteOptions) error { 191 | if w.err != nil { 192 | return w.err 193 | } 194 | if w.cmp.Compare(w.prevKey, key) >= 0 { 195 | w.err = fmt.Errorf("leveldb/table: Set called in non-increasing key order: %q, %q", w.prevKey, key) 196 | return w.err 197 | } 198 | if w.filter.policy != nil { 199 | w.filter.appendKey(key) 200 | } 201 | w.flushPendingBH(key) 202 | w.append(key, value, w.nEntries%w.blockRestartInterval == 0) 203 | // If the estimated block size is sufficiently large, finish the current block. 204 | if len(w.buf)+4*(len(w.restarts)+1) >= w.blockSize { 205 | bh, err := w.finishBlock() 206 | if err != nil { 207 | w.err = err 208 | return w.err 209 | } 210 | w.pendingBH = bh 211 | } 212 | return nil 213 | } 214 | 215 | // flushPendingBH adds any pending block handle to the index entries. 216 | func (w *Writer) flushPendingBH(key []byte) { 217 | if w.pendingBH.length == 0 { 218 | // A valid blockHandle must be non-zero. 219 | // In particular, it must have a non-zero length. 220 | return 221 | } 222 | n0 := len(w.indexKeys) 223 | w.indexKeys = w.cmp.AppendSeparator(w.indexKeys, w.prevKey, key) 224 | n1 := len(w.indexKeys) 225 | w.indexEntries = append(w.indexEntries, indexEntry{w.pendingBH, n1 - n0}) 226 | w.pendingBH = blockHandle{} 227 | } 228 | 229 | // append appends a key/value pair, which may also be a restart point. 230 | func (w *Writer) append(key, value []byte, restart bool) { 231 | nShared := 0 232 | if restart { 233 | w.restarts = append(w.restarts, uint32(len(w.buf))) 234 | } else { 235 | nShared = db.SharedPrefixLen(w.prevKey, key) 236 | } 237 | w.prevKey = append(w.prevKey[:0], key...) 238 | w.nEntries++ 239 | n := binary.PutUvarint(w.tmp[0:], uint64(nShared)) 240 | n += binary.PutUvarint(w.tmp[n:], uint64(len(key)-nShared)) 241 | n += binary.PutUvarint(w.tmp[n:], uint64(len(value))) 242 | w.buf = append(w.buf, w.tmp[:n]...) 243 | w.buf = append(w.buf, key[nShared:]...) 244 | w.buf = append(w.buf, value...) 245 | } 246 | 247 | // finishBlock finishes the current block and returns its block handle, which is 248 | // its offset and length in the table. 249 | func (w *Writer) finishBlock() (blockHandle, error) { 250 | // Write the restart points to the buffer. 251 | if w.nEntries == 0 { 252 | // Every block must have at least one restart point. 253 | w.restarts = w.restarts[:1] 254 | w.restarts[0] = 0 255 | } 256 | tmp4 := w.tmp[:4] 257 | for _, x := range w.restarts { 258 | binary.LittleEndian.PutUint32(tmp4, x) 259 | w.buf = append(w.buf, tmp4...) 260 | } 261 | binary.LittleEndian.PutUint32(tmp4, uint32(len(w.restarts))) 262 | w.buf = append(w.buf, tmp4...) 263 | 264 | // Compress the buffer, discarding the result if the improvement 265 | // isn't at least 12.5%. 266 | b := w.buf 267 | blockType := byte(noCompressionBlockType) 268 | if w.compression == db.SnappyCompression { 269 | compressed := snappy.Encode(w.compressedBuf, b) 270 | w.compressedBuf = compressed[:cap(compressed)] 271 | if len(compressed) < len(b)-len(b)/8 { 272 | blockType = snappyCompressionBlockType 273 | b = compressed 274 | } 275 | } 276 | bh, err := w.writeRawBlock(b, blockType) 277 | 278 | // Calculate filters. 279 | if w.filter.policy != nil { 280 | w.filter.finishBlock(w.offset) 281 | } 282 | 283 | // Reset the per-block state. 284 | w.buf = w.buf[:0] 285 | w.nEntries = 0 286 | w.restarts = w.restarts[:0] 287 | 288 | return bh, err 289 | } 290 | 291 | func (w *Writer) writeRawBlock(b []byte, blockType byte) (blockHandle, error) { 292 | w.tmp[0] = blockType 293 | 294 | // Calculate the checksum. 295 | checksum := crc.New(b).Update(w.tmp[:1]).Value() 296 | binary.LittleEndian.PutUint32(w.tmp[1:5], checksum) 297 | 298 | // Write the bytes to the file. 299 | if _, err := w.writer.Write(b); err != nil { 300 | return blockHandle{}, err 301 | } 302 | if _, err := w.writer.Write(w.tmp[:5]); err != nil { 303 | return blockHandle{}, err 304 | } 305 | bh := blockHandle{w.offset, uint64(len(b))} 306 | w.offset += uint64(len(b)) + blockTrailerLen 307 | return bh, nil 308 | } 309 | 310 | // Close implements DB.Close, as documented in the leveldb/db package. 311 | func (w *Writer) Close() (err error) { 312 | defer func() { 313 | if w.closer == nil { 314 | return 315 | } 316 | err1 := w.closer.Close() 317 | if err == nil { 318 | err = err1 319 | } 320 | w.closer = nil 321 | }() 322 | if w.err != nil { 323 | return w.err 324 | } 325 | 326 | // Finish the last data block, or force an empty data block if there 327 | // aren't any data blocks at all. 328 | w.flushPendingBH(nil) 329 | if w.nEntries > 0 || len(w.indexEntries) == 0 { 330 | bh, err := w.finishBlock() 331 | if err != nil { 332 | w.err = err 333 | return w.err 334 | } 335 | w.pendingBH = bh 336 | w.flushPendingBH(nil) 337 | } 338 | 339 | // Writer.append uses w.tmp[:3*binary.MaxVarintLen64]. Let tmp be the other 340 | // half of that slice. 341 | tmp := w.tmp[3*binary.MaxVarintLen64 : 5*binary.MaxVarintLen64] 342 | 343 | // Write the filter block. 344 | if w.filter.policy != nil { 345 | b, err := w.filter.finish() 346 | if err != nil { 347 | w.err = err 348 | return w.err 349 | } 350 | bh, err := w.writeRawBlock(b, noCompressionBlockType) 351 | if err != nil { 352 | w.err = err 353 | return w.err 354 | } 355 | n := encodeBlockHandle(tmp, bh) 356 | w.append([]byte("filter."+w.filter.policy.Name()), tmp[:n], true) 357 | } 358 | 359 | // Write the metaindex block. It might be an empty block, if the filter 360 | // policy is nil. 361 | metaindexBlockHandle, err := w.finishBlock() 362 | if err != nil { 363 | w.err = err 364 | return w.err 365 | } 366 | 367 | // Write the index block. 368 | i0 := 0 369 | for _, ie := range w.indexEntries { 370 | n := encodeBlockHandle(tmp, ie.bh) 371 | i1 := i0 + ie.keyLen 372 | w.append(w.indexKeys[i0:i1], tmp[:n], true) 373 | i0 = i1 374 | } 375 | indexBlockHandle, err := w.finishBlock() 376 | if err != nil { 377 | w.err = err 378 | return w.err 379 | } 380 | 381 | // Write the table footer. 382 | footer := w.tmp[:footerLen] 383 | for i := range footer { 384 | footer[i] = 0 385 | } 386 | n := encodeBlockHandle(footer, metaindexBlockHandle) 387 | encodeBlockHandle(footer[n:], indexBlockHandle) 388 | copy(footer[footerLen-len(magic):], magic) 389 | if _, err := w.writer.Write(footer); err != nil { 390 | w.err = err 391 | return w.err 392 | } 393 | 394 | // Flush the buffer. 395 | if w.bufWriter != nil { 396 | if err := w.bufWriter.Flush(); err != nil { 397 | w.err = err 398 | return err 399 | } 400 | } 401 | 402 | // Make any future calls to Set or Close return an error. 403 | w.err = errors.New("leveldb/table: writer is closed") 404 | return nil 405 | } 406 | 407 | // NewWriter returns a new table writer for the file. Closing the writer will 408 | // close the file. 409 | func NewWriter(f db.File, o *db.Options) *Writer { 410 | w := &Writer{ 411 | closer: f, 412 | blockRestartInterval: o.GetBlockRestartInterval(), 413 | blockSize: o.GetBlockSize(), 414 | cmp: o.GetComparer(), 415 | compression: o.GetCompression(), 416 | filter: filterWriter{ 417 | policy: o.GetFilterPolicy(), 418 | }, 419 | prevKey: make([]byte, 0, 256), 420 | restarts: make([]uint32, 0, 256), 421 | } 422 | if f == nil { 423 | w.err = errors.New("leveldb/table: nil file") 424 | return w 425 | } 426 | // If f does not have a Flush method, do our own buffering. 427 | type flusher interface { 428 | Flush() error 429 | } 430 | if _, ok := f.(flusher); ok { 431 | w.writer = f 432 | } else { 433 | w.bufWriter = bufio.NewWriter(f) 434 | w.writer = w.bufWriter 435 | } 436 | return w 437 | } 438 | -------------------------------------------------------------------------------- /table_cache.go: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package leveldb 6 | 7 | import ( 8 | "os" 9 | "sync" 10 | 11 | "github.com/golang/leveldb/db" 12 | "github.com/golang/leveldb/table" 13 | ) 14 | 15 | type tableCache struct { 16 | dirname string 17 | fs db.FileSystem 18 | opts *db.Options 19 | size int 20 | 21 | mu sync.Mutex 22 | nodes map[uint64]*tableCacheNode 23 | dummy tableCacheNode 24 | } 25 | 26 | func (c *tableCache) init(dirname string, fs db.FileSystem, opts *db.Options, size int) { 27 | c.dirname = dirname 28 | c.fs = fs 29 | c.opts = opts 30 | c.size = size 31 | c.nodes = make(map[uint64]*tableCacheNode) 32 | c.dummy.next = &c.dummy 33 | c.dummy.prev = &c.dummy 34 | } 35 | 36 | func (c *tableCache) find(fileNum uint64, ikey internalKey) (db.Iterator, error) { 37 | // Calling findNode gives us the responsibility of decrementing n's 38 | // refCount. If opening the underlying table resulted in error, then we 39 | // decrement this straight away. Otherwise, we pass that responsibility 40 | // to the tableCacheIter, which decrements when it is closed. 41 | n := c.findNode(fileNum) 42 | x := <-n.result 43 | if x.err != nil { 44 | c.mu.Lock() 45 | n.refCount-- 46 | if n.refCount == 0 { 47 | go n.release() 48 | } 49 | c.mu.Unlock() 50 | 51 | // Try loading the table again; the error may be transient. 52 | go n.load(c) 53 | return nil, x.err 54 | } 55 | n.result <- x 56 | return &tableCacheIter{ 57 | Iterator: x.reader.Find(ikey, nil), 58 | cache: c, 59 | node: n, 60 | }, nil 61 | } 62 | 63 | // releaseNode releases a node from the tableCache. 64 | // 65 | // c.mu must be held when calling this. 66 | func (c *tableCache) releaseNode(n *tableCacheNode) { 67 | delete(c.nodes, n.fileNum) 68 | n.next.prev = n.prev 69 | n.prev.next = n.next 70 | n.refCount-- 71 | if n.refCount == 0 { 72 | go n.release() 73 | } 74 | } 75 | 76 | // findNode returns the node for the table with the given file number, creating 77 | // that node if it didn't already exist. The caller is responsible for 78 | // decrementing the returned node's refCount. 79 | func (c *tableCache) findNode(fileNum uint64) *tableCacheNode { 80 | c.mu.Lock() 81 | defer c.mu.Unlock() 82 | 83 | n := c.nodes[fileNum] 84 | if n == nil { 85 | n = &tableCacheNode{ 86 | fileNum: fileNum, 87 | refCount: 1, 88 | result: make(chan tableReaderOrError, 1), 89 | } 90 | c.nodes[fileNum] = n 91 | if len(c.nodes) > c.size { 92 | // Release the tail node. 93 | c.releaseNode(c.dummy.prev) 94 | } 95 | go n.load(c) 96 | } else { 97 | // Remove n from the doubly-linked list. 98 | n.next.prev = n.prev 99 | n.prev.next = n.next 100 | } 101 | // Insert n at the front of the doubly-linked list. 102 | n.next = c.dummy.next 103 | n.prev = &c.dummy 104 | n.next.prev = n 105 | n.prev.next = n 106 | // The caller is responsible for decrementing the refCount. 107 | n.refCount++ 108 | return n 109 | } 110 | 111 | func (c *tableCache) evict(fileNum uint64) { 112 | c.mu.Lock() 113 | defer c.mu.Unlock() 114 | 115 | if n := c.nodes[fileNum]; n != nil { 116 | c.releaseNode(n) 117 | } 118 | } 119 | 120 | func (c *tableCache) Close() error { 121 | c.mu.Lock() 122 | defer c.mu.Unlock() 123 | 124 | for n := c.dummy.next; n != &c.dummy; n = n.next { 125 | n.refCount-- 126 | if n.refCount == 0 { 127 | go n.release() 128 | } 129 | } 130 | c.nodes = nil 131 | c.dummy.next = nil 132 | c.dummy.prev = nil 133 | return nil 134 | } 135 | 136 | type tableReaderOrError struct { 137 | reader *table.Reader 138 | err error 139 | } 140 | 141 | type tableCacheNode struct { 142 | fileNum uint64 143 | result chan tableReaderOrError 144 | 145 | // The remaining fields are protected by the tableCache mutex. 146 | 147 | next, prev *tableCacheNode 148 | refCount int 149 | } 150 | 151 | func (n *tableCacheNode) load(c *tableCache) { 152 | // Try opening the fileTypeTable first. If that file doesn't exist, 153 | // fall back onto the fileTypeOldFashionedTable. 154 | f, err := c.fs.Open(dbFilename(c.dirname, fileTypeTable, n.fileNum)) 155 | if os.IsNotExist(err) { 156 | f, err = c.fs.Open(dbFilename(c.dirname, fileTypeOldFashionedTable, n.fileNum)) 157 | } 158 | if err != nil { 159 | n.result <- tableReaderOrError{err: err} 160 | return 161 | } 162 | n.result <- tableReaderOrError{reader: table.NewReader(f, c.opts)} 163 | } 164 | 165 | func (n *tableCacheNode) release() { 166 | x := <-n.result 167 | if x.err != nil { 168 | return 169 | } 170 | x.reader.Close() 171 | } 172 | 173 | type tableCacheIter struct { 174 | db.Iterator 175 | cache *tableCache 176 | node *tableCacheNode 177 | closeErr error 178 | closed bool 179 | } 180 | 181 | func (i *tableCacheIter) Close() error { 182 | if i.closed { 183 | return i.closeErr 184 | } 185 | i.closed = true 186 | 187 | i.cache.mu.Lock() 188 | i.node.refCount-- 189 | if i.node.refCount == 0 { 190 | go i.node.release() 191 | } 192 | i.cache.mu.Unlock() 193 | 194 | i.closeErr = i.Iterator.Close() 195 | return i.closeErr 196 | } 197 | -------------------------------------------------------------------------------- /table_cache_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2013 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package leveldb 6 | 7 | import ( 8 | "bytes" 9 | "fmt" 10 | "math/rand" 11 | "sync" 12 | "testing" 13 | "time" 14 | 15 | "github.com/golang/leveldb/db" 16 | "github.com/golang/leveldb/memfs" 17 | "github.com/golang/leveldb/table" 18 | ) 19 | 20 | type tableCacheTestFile struct { 21 | db.File 22 | fs *tableCacheTestFS 23 | name string 24 | } 25 | 26 | func (f *tableCacheTestFile) Close() error { 27 | f.fs.mu.Lock() 28 | if f.fs.closeCounts != nil { 29 | f.fs.closeCounts[f.name]++ 30 | } 31 | f.fs.mu.Unlock() 32 | return f.File.Close() 33 | } 34 | 35 | type tableCacheTestFS struct { 36 | db.FileSystem 37 | 38 | mu sync.Mutex 39 | openCounts map[string]int 40 | closeCounts map[string]int 41 | } 42 | 43 | func (fs *tableCacheTestFS) Open(name string) (db.File, error) { 44 | fs.mu.Lock() 45 | if fs.openCounts != nil { 46 | fs.openCounts[name]++ 47 | } 48 | fs.mu.Unlock() 49 | f, err := fs.FileSystem.Open(name) 50 | if err != nil { 51 | return nil, err 52 | } 53 | return &tableCacheTestFile{f, fs, name}, nil 54 | } 55 | 56 | func (fs *tableCacheTestFS) validate(t *testing.T, c *tableCache, f func(i, gotO, gotC int) error) { 57 | if err := fs.validateOpenTables(f); err != nil { 58 | t.Error(err) 59 | return 60 | } 61 | c.Close() 62 | if err := fs.validateNoneStillOpen(); err != nil { 63 | t.Error(err) 64 | return 65 | } 66 | } 67 | 68 | // validateOpenTables validates that no tables in the cache are open twice, and 69 | // the number still open is no greater than tableCacheTestCacheSize. 70 | func (fs *tableCacheTestFS) validateOpenTables(f func(i, gotO, gotC int) error) error { 71 | // try backs off to let any clean-up goroutines do their work. 72 | return try(100*time.Microsecond, 20*time.Second, func() error { 73 | fs.mu.Lock() 74 | defer fs.mu.Unlock() 75 | 76 | numStillOpen := 0 77 | for i := 0; i < tableCacheTestNumTables; i++ { 78 | filename := dbFilename("", fileTypeTable, uint64(i)) 79 | gotO, gotC := fs.openCounts[filename], fs.closeCounts[filename] 80 | if gotO > gotC { 81 | numStillOpen++ 82 | } 83 | if gotC != gotO && gotC != gotO-1 { 84 | return fmt.Errorf("i=%d: table closed too many or too few times: opened %d times, closed %d times", 85 | i, gotO, gotC) 86 | } 87 | if f != nil { 88 | if err := f(i, gotO, gotC); err != nil { 89 | return err 90 | } 91 | } 92 | } 93 | if numStillOpen > tableCacheTestCacheSize { 94 | return fmt.Errorf("numStillOpen is %d, want <= %d", numStillOpen, tableCacheTestCacheSize) 95 | } 96 | return nil 97 | }) 98 | } 99 | 100 | // validateNoneStillOpen validates that no tables in the cache are open. 101 | func (fs *tableCacheTestFS) validateNoneStillOpen() error { 102 | // try backs off to let any clean-up goroutines do their work. 103 | return try(100*time.Microsecond, 20*time.Second, func() error { 104 | fs.mu.Lock() 105 | defer fs.mu.Unlock() 106 | 107 | for i := 0; i < tableCacheTestNumTables; i++ { 108 | filename := dbFilename("", fileTypeTable, uint64(i)) 109 | gotO, gotC := fs.openCounts[filename], fs.closeCounts[filename] 110 | if gotO != gotC { 111 | return fmt.Errorf("i=%d: opened %d times, closed %d times", i, gotO, gotC) 112 | } 113 | } 114 | return nil 115 | }) 116 | } 117 | 118 | const ( 119 | tableCacheTestNumTables = 300 120 | tableCacheTestCacheSize = 100 121 | ) 122 | 123 | func newTableCache() (*tableCache, *tableCacheTestFS, error) { 124 | xxx := bytes.Repeat([]byte("x"), tableCacheTestNumTables) 125 | fs := &tableCacheTestFS{ 126 | FileSystem: memfs.New(), 127 | } 128 | for i := 0; i < tableCacheTestNumTables; i++ { 129 | f, err := fs.Create(dbFilename("", fileTypeTable, uint64(i))) 130 | if err != nil { 131 | return nil, nil, fmt.Errorf("fs.Create: %v", err) 132 | } 133 | tw := table.NewWriter(f, &db.Options{ 134 | Comparer: internalKeyComparer{userCmp: db.DefaultComparer}, 135 | }) 136 | if err := tw.Set(makeIkey(fmt.Sprintf("k.SET.%d", i)), xxx[:i], nil); err != nil { 137 | return nil, nil, fmt.Errorf("tw.Set: %v", err) 138 | } 139 | if err := tw.Close(); err != nil { 140 | return nil, nil, fmt.Errorf("tw.Close: %v", err) 141 | } 142 | } 143 | 144 | fs.mu.Lock() 145 | fs.openCounts = map[string]int{} 146 | fs.closeCounts = map[string]int{} 147 | fs.mu.Unlock() 148 | 149 | c := &tableCache{} 150 | c.init("", fs, nil, tableCacheTestCacheSize) 151 | return c, fs, nil 152 | } 153 | 154 | func testTableCacheRandomAccess(t *testing.T, concurrent bool) { 155 | const N = 2000 156 | c, fs, err := newTableCache() 157 | if err != nil { 158 | t.Fatal(err) 159 | } 160 | 161 | rngMu := sync.Mutex{} 162 | rng := rand.New(rand.NewSource(1)) 163 | 164 | errc := make(chan error, N) 165 | for i := 0; i < N; i++ { 166 | go func(i int) { 167 | rngMu.Lock() 168 | fileNum, sleepTime := rng.Intn(tableCacheTestNumTables), rng.Intn(1000) 169 | rngMu.Unlock() 170 | iter, err := c.find(uint64(fileNum), []byte("k")) 171 | if err != nil { 172 | errc <- fmt.Errorf("i=%d, fileNum=%d: find: %v", i, fileNum, err) 173 | return 174 | } 175 | if concurrent { 176 | time.Sleep(time.Duration(sleepTime) * time.Microsecond) 177 | } 178 | if !iter.Next() { 179 | errc <- fmt.Errorf("i=%d, fileNum=%d: next.0: got false, want true", i, fileNum) 180 | return 181 | } 182 | if got := len(iter.Value()); got != fileNum { 183 | errc <- fmt.Errorf("i=%d, fileNum=%d: value: got %d bytes, want %d", i, fileNum, got, fileNum) 184 | return 185 | } 186 | if iter.Next() { 187 | errc <- fmt.Errorf("i=%d, fileNum=%d: next.1: got true, want false", i, fileNum) 188 | return 189 | } 190 | if err := iter.Close(); err != nil { 191 | errc <- fmt.Errorf("i=%d, fileNum=%d: close: %v", i, fileNum, err) 192 | return 193 | } 194 | errc <- nil 195 | }(i) 196 | if !concurrent { 197 | if err := <-errc; err != nil { 198 | t.Fatal(err) 199 | } 200 | } 201 | } 202 | if concurrent { 203 | for i := 0; i < N; i++ { 204 | if err := <-errc; err != nil { 205 | t.Fatal(err) 206 | } 207 | } 208 | } 209 | fs.validate(t, c, nil) 210 | } 211 | 212 | func TestTableCacheRandomAccessSequential(t *testing.T) { testTableCacheRandomAccess(t, false) } 213 | func TestTableCacheRandomAccessConcurrent(t *testing.T) { testTableCacheRandomAccess(t, true) } 214 | 215 | func TestTableCacheFrequentlyUsed(t *testing.T) { 216 | const ( 217 | N = 1000 218 | pinned0 = 7 219 | pinned1 = 11 220 | ) 221 | c, fs, err := newTableCache() 222 | if err != nil { 223 | t.Fatal(err) 224 | } 225 | 226 | for i := 0; i < N; i++ { 227 | for _, j := range [...]int{pinned0, i % tableCacheTestNumTables, pinned1} { 228 | iter, err := c.find(uint64(j), nil) 229 | if err != nil { 230 | t.Fatalf("i=%d, j=%d: find: %v", i, j, err) 231 | } 232 | if err := iter.Close(); err != nil { 233 | t.Fatalf("i=%d, j=%d: close: %v", i, j, err) 234 | } 235 | } 236 | } 237 | 238 | fs.validate(t, c, func(i, gotO, gotC int) error { 239 | if i == pinned0 || i == pinned1 { 240 | if gotO != 1 || gotC != 0 { 241 | return fmt.Errorf("i=%d: pinned table: got %d, %d, want %d, %d", i, gotO, gotC, 1, 0) 242 | } 243 | } else if gotO == 1 { 244 | return fmt.Errorf("i=%d: table only opened once", i) 245 | } 246 | return nil 247 | }) 248 | } 249 | 250 | func TestTableCacheEvictions(t *testing.T) { 251 | const ( 252 | N = 1000 253 | lo, hi = 10, 20 254 | ) 255 | c, fs, err := newTableCache() 256 | if err != nil { 257 | t.Fatal(err) 258 | } 259 | 260 | rng := rand.New(rand.NewSource(2)) 261 | for i := 0; i < N; i++ { 262 | j := rng.Intn(tableCacheTestNumTables) 263 | iter, err := c.find(uint64(j), nil) 264 | if err != nil { 265 | t.Fatalf("i=%d, j=%d: find: %v", i, j, err) 266 | } 267 | if err := iter.Close(); err != nil { 268 | t.Fatalf("i=%d, j=%d: close: %v", i, j, err) 269 | } 270 | 271 | c.evict(uint64(lo + rng.Intn(hi-lo))) 272 | } 273 | 274 | sumEvicted, nEvicted := 0, 0 275 | sumSafe, nSafe := 0, 0 276 | fs.validate(t, c, func(i, gotO, gotC int) error { 277 | if lo <= i && i < hi { 278 | sumEvicted += gotO 279 | nEvicted++ 280 | } else { 281 | sumSafe += gotO 282 | nSafe++ 283 | } 284 | return nil 285 | }) 286 | fEvicted := float64(sumEvicted) / float64(nEvicted) 287 | fSafe := float64(sumSafe) / float64(nSafe) 288 | // The magic 1.25 number isn't derived from formal modeling. It's just a guess. For 289 | // (lo, hi, tableCacheTestCacheSize, tableCacheTestNumTables) = (10, 20, 100, 300), 290 | // the ratio seems to converge on roughly 1.5 for large N, compared to 1.0 if we do 291 | // not evict any cache entries. 292 | if ratio := fEvicted / fSafe; ratio < 1.25 { 293 | t.Errorf("evicted tables were opened %.3f times on average, safe tables %.3f, ratio %.3f < 1.250", 294 | fEvicted, fSafe, ratio) 295 | } 296 | } 297 | -------------------------------------------------------------------------------- /testdata/db-stage-1/000003.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/golang/leveldb/259d9253d71996b7778a3efb4144fe4892342b18/testdata/db-stage-1/000003.log -------------------------------------------------------------------------------- /testdata/db-stage-1/CURRENT: -------------------------------------------------------------------------------- 1 | MANIFEST-000002 2 | -------------------------------------------------------------------------------- /testdata/db-stage-1/LOCK: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/golang/leveldb/259d9253d71996b7778a3efb4144fe4892342b18/testdata/db-stage-1/LOCK -------------------------------------------------------------------------------- /testdata/db-stage-1/LOG: -------------------------------------------------------------------------------- 1 | 2012/02/03-18:31:28.752463 7ff183bca740 Delete type=3 #1 2 | -------------------------------------------------------------------------------- /testdata/db-stage-1/MANIFEST-000002: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/golang/leveldb/259d9253d71996b7778a3efb4144fe4892342b18/testdata/db-stage-1/MANIFEST-000002 -------------------------------------------------------------------------------- /testdata/db-stage-2/000003.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/golang/leveldb/259d9253d71996b7778a3efb4144fe4892342b18/testdata/db-stage-2/000003.log -------------------------------------------------------------------------------- /testdata/db-stage-2/CURRENT: -------------------------------------------------------------------------------- 1 | MANIFEST-000002 2 | -------------------------------------------------------------------------------- /testdata/db-stage-2/LOCK: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/golang/leveldb/259d9253d71996b7778a3efb4144fe4892342b18/testdata/db-stage-2/LOCK -------------------------------------------------------------------------------- /testdata/db-stage-2/LOG: -------------------------------------------------------------------------------- 1 | 2012/02/03-18:32:06.283846 7fa954064740 Delete type=3 #1 2 | -------------------------------------------------------------------------------- /testdata/db-stage-2/MANIFEST-000002: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/golang/leveldb/259d9253d71996b7778a3efb4144fe4892342b18/testdata/db-stage-2/MANIFEST-000002 -------------------------------------------------------------------------------- /testdata/db-stage-3/000005.sst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/golang/leveldb/259d9253d71996b7778a3efb4144fe4892342b18/testdata/db-stage-3/000005.sst -------------------------------------------------------------------------------- /testdata/db-stage-3/000006.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/golang/leveldb/259d9253d71996b7778a3efb4144fe4892342b18/testdata/db-stage-3/000006.log -------------------------------------------------------------------------------- /testdata/db-stage-3/CURRENT: -------------------------------------------------------------------------------- 1 | MANIFEST-000004 2 | -------------------------------------------------------------------------------- /testdata/db-stage-3/LOCK: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/golang/leveldb/259d9253d71996b7778a3efb4144fe4892342b18/testdata/db-stage-3/LOCK -------------------------------------------------------------------------------- /testdata/db-stage-3/LOG: -------------------------------------------------------------------------------- 1 | 2012/02/03-18:32:34.790995 7f8f2d339740 Recovering log #3 2 | 2012/02/03-18:32:34.791037 7f8f2d339740 Level-0 table #5: started 3 | 2012/02/03-18:32:34.850300 7f8f2d339740 Level-0 table #5: 165 bytes OK 4 | 2012/02/03-18:32:34.917482 7f8f2d339740 Delete type=3 #2 5 | 2012/02/03-18:32:34.917520 7f8f2d339740 Delete type=0 #3 6 | -------------------------------------------------------------------------------- /testdata/db-stage-3/LOG.old: -------------------------------------------------------------------------------- 1 | 2012/02/03-18:32:34.790486 7f8f2d339740 Delete type=3 #1 2 | -------------------------------------------------------------------------------- /testdata/db-stage-3/MANIFEST-000004: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/golang/leveldb/259d9253d71996b7778a3efb4144fe4892342b18/testdata/db-stage-3/MANIFEST-000004 -------------------------------------------------------------------------------- /testdata/db-stage-4/000005.sst: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/golang/leveldb/259d9253d71996b7778a3efb4144fe4892342b18/testdata/db-stage-4/000005.sst -------------------------------------------------------------------------------- /testdata/db-stage-4/000006.log: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/golang/leveldb/259d9253d71996b7778a3efb4144fe4892342b18/testdata/db-stage-4/000006.log -------------------------------------------------------------------------------- /testdata/db-stage-4/CURRENT: -------------------------------------------------------------------------------- 1 | MANIFEST-000004 2 | -------------------------------------------------------------------------------- /testdata/db-stage-4/LOCK: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/golang/leveldb/259d9253d71996b7778a3efb4144fe4892342b18/testdata/db-stage-4/LOCK -------------------------------------------------------------------------------- /testdata/db-stage-4/LOG: -------------------------------------------------------------------------------- 1 | 2012/02/03-18:39:40.556778 7f7d66252740 Recovering log #3 2 | 2012/02/03-18:39:40.556810 7f7d66252740 Level-0 table #5: started 3 | 2012/02/03-18:39:40.614757 7f7d66252740 Level-0 table #5: 165 bytes OK 4 | 2012/02/03-18:39:40.715229 7f7d66252740 Delete type=3 #2 5 | 2012/02/03-18:39:40.715271 7f7d66252740 Delete type=0 #3 6 | -------------------------------------------------------------------------------- /testdata/db-stage-4/LOG.old: -------------------------------------------------------------------------------- 1 | 2012/02/03-18:39:40.556281 7f7d66252740 Delete type=3 #1 2 | -------------------------------------------------------------------------------- /testdata/db-stage-4/MANIFEST-000004: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/golang/leveldb/259d9253d71996b7778a3efb4144fe4892342b18/testdata/db-stage-4/MANIFEST-000004 -------------------------------------------------------------------------------- /testdata/h.bloom.no-compression.ldb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/golang/leveldb/259d9253d71996b7778a3efb4144fe4892342b18/testdata/h.bloom.no-compression.ldb -------------------------------------------------------------------------------- /testdata/h.ldb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/golang/leveldb/259d9253d71996b7778a3efb4144fe4892342b18/testdata/h.ldb -------------------------------------------------------------------------------- /testdata/h.no-compression.ldb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/golang/leveldb/259d9253d71996b7778a3efb4144fe4892342b18/testdata/h.no-compression.ldb -------------------------------------------------------------------------------- /testdata/make-db.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2012 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // This program creates a leveldb db at /tmp/db. 6 | // 7 | // To build and run: 8 | // g++ make-db.cc -lleveldb && ./a.out 9 | 10 | #include 11 | 12 | #include "leveldb/db.h" 13 | 14 | static const char* dbname = "/tmp/db"; 15 | 16 | // The program consists of up to 4 stages. If stage is in the range [1, 4], 17 | // the program will exit after the stage'th stage. 18 | // 1. create an empty DB. 19 | // 2. add some key/value pairs. 20 | // 3. close and re-open the DB, which forces a compaction. 21 | // 4. add some more key/value pairs. 22 | static const int stage = 4; 23 | 24 | int main(int argc, char** argv) { 25 | leveldb::Status status; 26 | leveldb::Options o; 27 | leveldb::WriteOptions wo; 28 | leveldb::DB* db; 29 | 30 | o.create_if_missing = true; 31 | o.error_if_exists = true; 32 | 33 | if (stage < 1) { 34 | return 0; 35 | } 36 | std::cout << "Stage 1" << std::endl; 37 | 38 | status = leveldb::DB::Open(o, dbname, &db); 39 | if (!status.ok()) { 40 | std::cerr << "DB::Open " << status.ToString() << std::endl; 41 | return 1; 42 | } 43 | 44 | if (stage < 2) { 45 | return 0; 46 | } 47 | std::cout << "Stage 2" << std::endl; 48 | 49 | status = db->Put(wo, "foo", "one"); 50 | if (!status.ok()) { 51 | std::cerr << "DB::Put " << status.ToString() << std::endl; 52 | return 1; 53 | } 54 | 55 | status = db->Put(wo, "bar", "two"); 56 | if (!status.ok()) { 57 | std::cerr << "DB::Put " << status.ToString() << std::endl; 58 | return 1; 59 | } 60 | 61 | status = db->Put(wo, "baz", "three"); 62 | if (!status.ok()) { 63 | std::cerr << "DB::Put " << status.ToString() << std::endl; 64 | return 1; 65 | } 66 | 67 | status = db->Put(wo, "foo", "four"); 68 | if (!status.ok()) { 69 | std::cerr << "DB::Put " << status.ToString() << std::endl; 70 | return 1; 71 | } 72 | 73 | status = db->Delete(wo, "bar"); 74 | if (!status.ok()) { 75 | std::cerr << "DB::Delete " << status.ToString() << std::endl; 76 | return 1; 77 | } 78 | 79 | if (stage < 3) { 80 | return 0; 81 | } 82 | std::cout << "Stage 3" << std::endl; 83 | 84 | delete db; 85 | db = NULL; 86 | o.create_if_missing = false; 87 | o.error_if_exists = false; 88 | 89 | status = leveldb::DB::Open(o, dbname, &db); 90 | if (!status.ok()) { 91 | std::cerr << "DB::Open " << status.ToString() << std::endl; 92 | return 1; 93 | } 94 | 95 | if (stage < 4) { 96 | return 0; 97 | } 98 | std::cout << "Stage 4" << std::endl; 99 | 100 | status = db->Put(wo, "foo", "five"); 101 | if (!status.ok()) { 102 | std::cerr << "DB::Put " << status.ToString() << std::endl; 103 | return 1; 104 | } 105 | 106 | status = db->Put(wo, "quux", "six"); 107 | if (!status.ok()) { 108 | std::cerr << "DB::Put " << status.ToString() << std::endl; 109 | return 1; 110 | } 111 | 112 | status = db->Delete(wo, "baz"); 113 | if (!status.ok()) { 114 | std::cerr << "DB::Delete " << status.ToString() << std::endl; 115 | return 1; 116 | } 117 | 118 | return 0; 119 | } 120 | -------------------------------------------------------------------------------- /testdata/make-table.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2011 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | // This program adds N lines from infile to a leveldb table at outfile. 6 | // The h.txt infile was generated via: 7 | // cat hamlet-act-1.txt | tr '[:upper:]' '[:lower:]' | grep -o -E '\w+' | sort | uniq -c > infile 8 | // 9 | // To build and run: 10 | // g++ make-table.cc -lleveldb && ./a.out 11 | 12 | #include 13 | #include 14 | #include 15 | 16 | #include "leveldb/env.h" 17 | #include "leveldb/filter_policy.h" 18 | #include "leveldb/table.h" 19 | #include "leveldb/table_builder.h" 20 | 21 | const int N = 1000000; 22 | const char* infile = "h.txt"; 23 | const char* outfile = "h.ldb"; 24 | 25 | int write() { 26 | leveldb::Status status; 27 | 28 | leveldb::WritableFile* wf; 29 | status = leveldb::Env::Default()->NewWritableFile(outfile, &wf); 30 | if (!status.ok()) { 31 | std::cerr << "Env::NewWritableFile: " << status.ToString() << std::endl; 32 | return 1; 33 | } 34 | 35 | leveldb::Options o; 36 | // o.compression = leveldb::kNoCompression; 37 | // o.filter_policy = leveldb::NewBloomFilterPolicy(10); 38 | leveldb::TableBuilder* tb = new leveldb::TableBuilder(o, wf); 39 | std::ifstream in(infile); 40 | std::string s; 41 | for (int i = 0; i < N && getline(in, s); i++) { 42 | std::string key(s, 8); 43 | std::string val(s, 0, 7); 44 | val = val.substr(1 + val.rfind(' ')); 45 | tb->Add(key.c_str(), val.c_str()); 46 | } 47 | 48 | status = tb->Finish(); 49 | if (!status.ok()) { 50 | std::cerr << "TableBuilder::Finish: " << status.ToString() << std::endl; 51 | return 1; 52 | } 53 | 54 | status = wf->Close(); 55 | if (!status.ok()) { 56 | std::cerr << "WritableFile::Close: " << status.ToString() << std::endl; 57 | return 1; 58 | } 59 | 60 | std::cout << "wrote " << tb->NumEntries() << " entries" << std::endl; 61 | delete tb; 62 | delete wf; 63 | return 0; 64 | } 65 | 66 | int read() { 67 | leveldb::Status status; 68 | 69 | leveldb::RandomAccessFile* raf; 70 | status = leveldb::Env::Default()->NewRandomAccessFile(outfile, &raf); 71 | if (!status.ok()) { 72 | std::cerr << "Env::NewRandomAccessFile: " << status.ToString() << std::endl; 73 | return 1; 74 | } 75 | 76 | uint64_t file_size; 77 | status = leveldb::Env::Default()->GetFileSize(outfile, &file_size); 78 | if (!status.ok()) { 79 | std::cerr << "Env::GetFileSize: " << status.ToString() << std::endl; 80 | return 1; 81 | } 82 | 83 | leveldb::Options o; 84 | leveldb::Table* t; 85 | status = leveldb::Table::Open(o, raf, file_size, &t); 86 | if (!status.ok()) { 87 | std::cerr << "Table::Open: " << status.ToString() << std::endl; 88 | return 1; 89 | } 90 | 91 | leveldb::ReadOptions ro; 92 | leveldb::Iterator* i = t->NewIterator(ro); 93 | uint64_t n = 0; 94 | for (i->SeekToFirst(); i->Valid(); i->Next()) { 95 | n++; 96 | } 97 | 98 | std::cout << "read " << n << " entries" << std::endl; 99 | delete i; 100 | delete t; 101 | delete raf; 102 | return 0; 103 | } 104 | 105 | int main(int argc, char** argv) { 106 | int ret = write(); 107 | if (ret != 0) { 108 | return ret; 109 | } 110 | return read(); 111 | } 112 | -------------------------------------------------------------------------------- /version.go: -------------------------------------------------------------------------------- 1 | // Copyright 2012 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package leveldb 6 | 7 | import ( 8 | "fmt" 9 | "sort" 10 | 11 | "github.com/golang/leveldb/db" 12 | ) 13 | 14 | // fileMetadata holds the metadata for an on-disk table. 15 | type fileMetadata struct { 16 | // fileNum is the file number. 17 | fileNum uint64 18 | // size is the size of the file, in bytes. 19 | size uint64 20 | // smallest and largest are the inclusive bounds for the internal keys 21 | // stored in the table. 22 | smallest, largest internalKey 23 | } 24 | 25 | // totalSize returns the total size of all the files in f. 26 | func totalSize(f []fileMetadata) (size uint64) { 27 | for _, x := range f { 28 | size += x.size 29 | } 30 | return size 31 | } 32 | 33 | // ikeyRange returns the minimum smallest and maximum largest internalKey for 34 | // all the fileMetadata in f0 and f1. 35 | func ikeyRange(icmp db.Comparer, f0, f1 []fileMetadata) (smallest, largest internalKey) { 36 | first := true 37 | for _, f := range [2][]fileMetadata{f0, f1} { 38 | for _, meta := range f { 39 | if first { 40 | first = false 41 | smallest, largest = meta.smallest, meta.largest 42 | continue 43 | } 44 | if icmp.Compare(meta.smallest, smallest) < 0 { 45 | smallest = meta.smallest 46 | } 47 | if icmp.Compare(meta.largest, largest) > 0 { 48 | largest = meta.largest 49 | } 50 | } 51 | } 52 | return smallest, largest 53 | } 54 | 55 | type byFileNum []fileMetadata 56 | 57 | func (b byFileNum) Len() int { return len(b) } 58 | func (b byFileNum) Less(i, j int) bool { return b[i].fileNum < b[j].fileNum } 59 | func (b byFileNum) Swap(i, j int) { b[i], b[j] = b[j], b[i] } 60 | 61 | type bySmallest struct { 62 | dat []fileMetadata 63 | cmp db.Comparer 64 | } 65 | 66 | func (b bySmallest) Len() int { return len(b.dat) } 67 | func (b bySmallest) Less(i, j int) bool { 68 | return b.cmp.Compare(b.dat[i].smallest, b.dat[j].smallest) < 0 69 | } 70 | func (b bySmallest) Swap(i, j int) { b.dat[i], b.dat[j] = b.dat[j], b.dat[i] } 71 | 72 | const numLevels = 7 73 | 74 | // version is a collection of file metadata for on-disk tables at various 75 | // levels. In-memory DBs are written to level-0 tables, and compactions 76 | // migrate data from level N to level N+1. The tables map internal keys (which 77 | // are a user key, a delete or set bit, and a sequence number) to user values. 78 | // 79 | // The tables at level 0 are sorted by increasing fileNum. If two level 0 80 | // tables have fileNums i and j and i < j, then the sequence numbers of every 81 | // internal key in table i are all less than those for table j. The range of 82 | // internal keys [fileMetadata.smallest, fileMetadata.largest] in each level 0 83 | // table may overlap. 84 | // 85 | // The tables at any non-0 level are sorted by their internal key range and any 86 | // two tables at the same non-0 level do not overlap. 87 | // 88 | // The internal key ranges of two tables at different levels X and Y may 89 | // overlap, for any X != Y. 90 | // 91 | // Finally, for every internal key in a table at level X, there is no internal 92 | // key in a higher level table that has both the same user key and a higher 93 | // sequence number. 94 | type version struct { 95 | files [numLevels][]fileMetadata 96 | // Every version is part of a circular doubly-linked list of versions. 97 | // One of those versions is a versionSet.dummyVersion. 98 | prev, next *version 99 | 100 | // These fields are the level that should be compacted next and its 101 | // compaction score. A score < 1 means that compaction is not strictly 102 | // needed. 103 | compactionScore float64 104 | compactionLevel int 105 | } 106 | 107 | // updateCompactionScore updates v's compaction score and level. 108 | func (v *version) updateCompactionScore() { 109 | // We treat level-0 specially by bounding the number of files instead of 110 | // number of bytes for two reasons: 111 | // 112 | // (1) With larger write-buffer sizes, it is nice not to do too many 113 | // level-0 compactions. 114 | // 115 | // (2) The files in level-0 are merged on every read and therefore we 116 | // wish to avoid too many files when the individual file size is small 117 | // (perhaps because of a small write-buffer setting, or very high 118 | // compression ratios, or lots of overwrites/deletions). 119 | v.compactionScore = float64(len(v.files[0])) / l0CompactionTrigger 120 | v.compactionLevel = 0 121 | 122 | maxBytes := float64(10 * 1024 * 1024) 123 | for level := 1; level < numLevels-1; level++ { 124 | score := float64(totalSize(v.files[level])) / maxBytes 125 | if score > v.compactionScore { 126 | v.compactionScore = score 127 | v.compactionLevel = level 128 | } 129 | maxBytes *= 10 130 | } 131 | } 132 | 133 | // overlaps returns all elements of v.files[level] whose user key range 134 | // intersects the inclusive range [ukey0, ukey1]. If level is non-zero then the 135 | // user key ranges of v.files[level] are assumed to not overlap (although they 136 | // may touch). If level is zero then that assumption cannot be made, and the 137 | // [ukey0, ukey1] range is expanded to the union of those matching ranges so 138 | // far and the computation is repeated until [ukey0, ukey1] stabilizes. 139 | func (v *version) overlaps(level int, ucmp db.Comparer, ukey0, ukey1 []byte) (ret []fileMetadata) { 140 | loop: 141 | for { 142 | for _, meta := range v.files[level] { 143 | m0 := meta.smallest.ukey() 144 | m1 := meta.largest.ukey() 145 | if ucmp.Compare(m1, ukey0) < 0 { 146 | // meta is completely before the specified range; skip it. 147 | continue 148 | } 149 | if ucmp.Compare(m0, ukey1) > 0 { 150 | // meta is completely after the specified range; skip it. 151 | continue 152 | } 153 | ret = append(ret, meta) 154 | 155 | // If level == 0, check if the newly added fileMetadata has 156 | // expanded the range. If so, restart the search. 157 | if level != 0 { 158 | continue 159 | } 160 | restart := false 161 | if ucmp.Compare(m0, ukey0) < 0 { 162 | ukey0 = m0 163 | restart = true 164 | } 165 | if ucmp.Compare(m1, ukey1) > 0 { 166 | ukey1 = m1 167 | restart = true 168 | } 169 | if restart { 170 | ret = ret[:0] 171 | continue loop 172 | } 173 | } 174 | return ret 175 | } 176 | } 177 | 178 | // checkOrdering checks that the files are consistent with respect to 179 | // increasing file numbers (for level 0 files) and increasing and non- 180 | // overlapping internal key ranges (for level non-0 files). 181 | func (v *version) checkOrdering(icmp db.Comparer) error { 182 | for level, ff := range v.files { 183 | if level == 0 { 184 | prevFileNum := uint64(0) 185 | for i, f := range ff { 186 | if i != 0 && prevFileNum >= f.fileNum { 187 | return fmt.Errorf("level 0 files are not in increasing fileNum order: %d, %d", prevFileNum, f.fileNum) 188 | } 189 | prevFileNum = f.fileNum 190 | } 191 | } else { 192 | prevLargest := internalKey(nil) 193 | for i, f := range ff { 194 | if i != 0 && icmp.Compare(prevLargest, f.smallest) >= 0 { 195 | return fmt.Errorf("level non-0 files are not in increasing ikey order: %q, %q", prevLargest, f.smallest) 196 | } 197 | if icmp.Compare(f.smallest, f.largest) > 0 { 198 | return fmt.Errorf("level non-0 file has inconsistent bounds: %q, %q", f.smallest, f.largest) 199 | } 200 | prevLargest = f.largest 201 | } 202 | } 203 | } 204 | return nil 205 | } 206 | 207 | // tableIkeyFinder finds the given ikey in the table of the given file number. 208 | type tableIkeyFinder interface { 209 | find(fileNum uint64, ikey internalKey) (db.Iterator, error) 210 | } 211 | 212 | // get looks up the internal key ikey0 in v's tables such that ikey and ikey0 213 | // have the same user key, and ikey0's sequence number is the highest such 214 | // sequence number that is less than or equal to ikey's sequence number. 215 | // 216 | // If ikey0's kind is set, the value for that previous set action is returned. 217 | // If ikey0's kind is delete, the db.ErrNotFound error is returned. 218 | // If there is no such ikey0, the db.ErrNotFound error is returned. 219 | func (v *version) get(ikey internalKey, tiFinder tableIkeyFinder, ucmp db.Comparer, ro *db.ReadOptions) ([]byte, error) { 220 | ukey := ikey.ukey() 221 | // Iterate through v's tables, calling internalGet if the table's bounds 222 | // might contain ikey. Due to the order in which we search the tables, and 223 | // the internalKeyComparer's ordering within a table, we stop after the 224 | // first conclusive result. 225 | 226 | // Search the level 0 files in decreasing fileNum order, 227 | // which is also decreasing sequence number order. 228 | icmp := internalKeyComparer{ucmp} 229 | for i := len(v.files[0]) - 1; i >= 0; i-- { 230 | f := v.files[0][i] 231 | // We compare user keys on the low end, as we do not want to reject a table 232 | // whose smallest internal key may have the same user key and a lower sequence 233 | // number. An internalKeyComparer sorts increasing by user key but then 234 | // descending by sequence number. 235 | if ucmp.Compare(ukey, f.smallest.ukey()) < 0 { 236 | continue 237 | } 238 | // We compare internal keys on the high end. It gives a tighter bound than 239 | // comparing user keys. 240 | if icmp.Compare(ikey, f.largest) > 0 { 241 | continue 242 | } 243 | iter, err := tiFinder.find(f.fileNum, ikey) 244 | if err != nil { 245 | return nil, fmt.Errorf("leveldb: could not open table %d: %v", f.fileNum, err) 246 | } 247 | value, conclusive, err := internalGet(iter, ucmp, ukey) 248 | if conclusive { 249 | return value, err 250 | } 251 | } 252 | 253 | // Search the remaining levels. 254 | for level := 1; level < len(v.files); level++ { 255 | n := len(v.files[level]) 256 | if n == 0 { 257 | continue 258 | } 259 | // Find the earliest file at that level whose largest key is >= ikey. 260 | index := sort.Search(n, func(i int) bool { 261 | return icmp.Compare(v.files[level][i].largest, ikey) >= 0 262 | }) 263 | if index == n { 264 | continue 265 | } 266 | f := v.files[level][index] 267 | if ucmp.Compare(ukey, f.smallest.ukey()) < 0 { 268 | continue 269 | } 270 | iter, err := tiFinder.find(f.fileNum, ikey) 271 | if err != nil { 272 | return nil, fmt.Errorf("leveldb: could not open table %d: %v", f.fileNum, err) 273 | } 274 | value, conclusive, err := internalGet(iter, ucmp, ukey) 275 | if conclusive { 276 | return value, err 277 | } 278 | } 279 | return nil, db.ErrNotFound 280 | } 281 | 282 | // internalGet looks up the first key/value pair whose (internal) key is >= 283 | // ikey, according to the internal key ordering, and also returns whether or 284 | // not that search was conclusive. 285 | // 286 | // If there is no such pair, or that pair's key and ikey do not share the same 287 | // user key (according to ucmp), then conclusive will be false. Otherwise, 288 | // conclusive will be true and: 289 | // * if that pair's key's kind is set, that pair's value will be returned, 290 | // * if that pair's key's kind is delete, db.ErrNotFound will be returned. 291 | // If the returned error is non-nil then conclusive will be true. 292 | func internalGet(t db.Iterator, ucmp db.Comparer, ukey []byte) (value []byte, conclusive bool, err error) { 293 | if !t.Next() { 294 | err = t.Close() 295 | return nil, err != nil, err 296 | } 297 | ikey0 := internalKey(t.Key()) 298 | if !ikey0.valid() { 299 | t.Close() 300 | return nil, true, fmt.Errorf("leveldb: corrupt table: invalid internal key") 301 | } 302 | if ucmp.Compare(ukey, ikey0.ukey()) != 0 { 303 | err = t.Close() 304 | return nil, err != nil, err 305 | } 306 | if ikey0.kind() == internalKeyKindDelete { 307 | t.Close() 308 | return nil, true, db.ErrNotFound 309 | } 310 | return t.Value(), true, t.Close() 311 | } 312 | -------------------------------------------------------------------------------- /version_edit.go: -------------------------------------------------------------------------------- 1 | // Copyright 2012 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package leveldb 6 | 7 | import ( 8 | "bufio" 9 | "bytes" 10 | "encoding/binary" 11 | "errors" 12 | "fmt" 13 | "io" 14 | "sort" 15 | 16 | "github.com/golang/leveldb/db" 17 | ) 18 | 19 | // TODO: describe the MANIFEST file format, independently of the C++ project. 20 | 21 | var errCorruptManifest = errors.New("leveldb: corrupt manifest") 22 | 23 | type byteReader interface { 24 | io.ByteReader 25 | io.Reader 26 | } 27 | 28 | // Tags for the versionEdit disk format. 29 | // Tag 8 is no longer used. 30 | const ( 31 | tagComparator = 1 32 | tagLogNumber = 2 33 | tagNextFileNumber = 3 34 | tagLastSequence = 4 35 | tagCompactPointer = 5 36 | tagDeletedFile = 6 37 | tagNewFile = 7 38 | tagPrevLogNumber = 9 39 | ) 40 | 41 | type compactPointerEntry struct { 42 | level int 43 | key internalKey 44 | } 45 | 46 | type deletedFileEntry struct { 47 | level int 48 | fileNum uint64 49 | } 50 | 51 | type newFileEntry struct { 52 | level int 53 | meta fileMetadata 54 | } 55 | 56 | type versionEdit struct { 57 | comparatorName string 58 | logNumber uint64 59 | prevLogNumber uint64 60 | nextFileNumber uint64 61 | lastSequence uint64 62 | compactPointers []compactPointerEntry 63 | deletedFiles map[deletedFileEntry]bool // A set of deletedFileEntry values. 64 | newFiles []newFileEntry 65 | } 66 | 67 | func (v *versionEdit) decode(r io.Reader) error { 68 | br, ok := r.(byteReader) 69 | if !ok { 70 | br = bufio.NewReader(r) 71 | } 72 | d := versionEditDecoder{br} 73 | for { 74 | tag, err := binary.ReadUvarint(br) 75 | if err == io.EOF { 76 | break 77 | } 78 | if err != nil { 79 | return err 80 | } 81 | switch tag { 82 | 83 | case tagComparator: 84 | s, err := d.readBytes() 85 | if err != nil { 86 | return err 87 | } 88 | v.comparatorName = string(s) 89 | 90 | case tagLogNumber: 91 | n, err := d.readUvarint() 92 | if err != nil { 93 | return err 94 | } 95 | v.logNumber = n 96 | 97 | case tagNextFileNumber: 98 | n, err := d.readUvarint() 99 | if err != nil { 100 | return err 101 | } 102 | v.nextFileNumber = n 103 | 104 | case tagLastSequence: 105 | n, err := d.readUvarint() 106 | if err != nil { 107 | return err 108 | } 109 | v.lastSequence = n 110 | 111 | case tagCompactPointer: 112 | level, err := d.readLevel() 113 | if err != nil { 114 | return err 115 | } 116 | key, err := d.readBytes() 117 | if err != nil { 118 | return err 119 | } 120 | v.compactPointers = append(v.compactPointers, compactPointerEntry{level, key}) 121 | 122 | case tagDeletedFile: 123 | level, err := d.readLevel() 124 | if err != nil { 125 | return err 126 | } 127 | fileNum, err := d.readUvarint() 128 | if err != nil { 129 | return err 130 | } 131 | if v.deletedFiles == nil { 132 | v.deletedFiles = make(map[deletedFileEntry]bool) 133 | } 134 | v.deletedFiles[deletedFileEntry{level, fileNum}] = true 135 | 136 | case tagNewFile: 137 | level, err := d.readLevel() 138 | if err != nil { 139 | return err 140 | } 141 | fileNum, err := d.readUvarint() 142 | if err != nil { 143 | return err 144 | } 145 | size, err := d.readUvarint() 146 | if err != nil { 147 | return err 148 | } 149 | smallest, err := d.readBytes() 150 | if err != nil { 151 | return err 152 | } 153 | largest, err := d.readBytes() 154 | if err != nil { 155 | return err 156 | } 157 | v.newFiles = append(v.newFiles, newFileEntry{ 158 | level: level, 159 | meta: fileMetadata{ 160 | fileNum: fileNum, 161 | size: size, 162 | smallest: smallest, 163 | largest: largest, 164 | }, 165 | }) 166 | 167 | case tagPrevLogNumber: 168 | n, err := d.readUvarint() 169 | if err != nil { 170 | return err 171 | } 172 | v.prevLogNumber = n 173 | 174 | default: 175 | return errCorruptManifest 176 | } 177 | } 178 | return nil 179 | } 180 | 181 | func (v *versionEdit) encode(w io.Writer) error { 182 | e := versionEditEncoder{new(bytes.Buffer)} 183 | if v.comparatorName != "" { 184 | e.writeUvarint(tagComparator) 185 | e.writeString(v.comparatorName) 186 | } 187 | if v.logNumber != 0 { 188 | e.writeUvarint(tagLogNumber) 189 | e.writeUvarint(v.logNumber) 190 | } 191 | if v.prevLogNumber != 0 { 192 | e.writeUvarint(tagPrevLogNumber) 193 | e.writeUvarint(v.prevLogNumber) 194 | } 195 | if v.nextFileNumber != 0 { 196 | e.writeUvarint(tagNextFileNumber) 197 | e.writeUvarint(v.nextFileNumber) 198 | } 199 | if v.lastSequence != 0 { 200 | e.writeUvarint(tagLastSequence) 201 | e.writeUvarint(v.lastSequence) 202 | } 203 | for _, x := range v.compactPointers { 204 | e.writeUvarint(tagCompactPointer) 205 | e.writeUvarint(uint64(x.level)) 206 | e.writeBytes(x.key) 207 | } 208 | for x := range v.deletedFiles { 209 | e.writeUvarint(tagDeletedFile) 210 | e.writeUvarint(uint64(x.level)) 211 | e.writeUvarint(x.fileNum) 212 | } 213 | for _, x := range v.newFiles { 214 | e.writeUvarint(tagNewFile) 215 | e.writeUvarint(uint64(x.level)) 216 | e.writeUvarint(x.meta.fileNum) 217 | e.writeUvarint(x.meta.size) 218 | e.writeBytes(x.meta.smallest) 219 | e.writeBytes(x.meta.largest) 220 | } 221 | _, err := w.Write(e.Bytes()) 222 | return err 223 | } 224 | 225 | type versionEditDecoder struct { 226 | byteReader 227 | } 228 | 229 | func (d versionEditDecoder) readBytes() ([]byte, error) { 230 | n, err := d.readUvarint() 231 | if err != nil { 232 | return nil, err 233 | } 234 | s := make([]byte, n) 235 | _, err = io.ReadFull(d, s) 236 | if err != nil { 237 | if err == io.ErrUnexpectedEOF { 238 | return nil, errCorruptManifest 239 | } 240 | return nil, err 241 | } 242 | return s, nil 243 | } 244 | 245 | func (d versionEditDecoder) readLevel() (int, error) { 246 | u, err := d.readUvarint() 247 | if err != nil { 248 | return 0, err 249 | } 250 | if u >= numLevels { 251 | return 0, errCorruptManifest 252 | } 253 | return int(u), nil 254 | } 255 | 256 | func (d versionEditDecoder) readUvarint() (uint64, error) { 257 | u, err := binary.ReadUvarint(d) 258 | if err != nil { 259 | if err == io.EOF { 260 | return 0, errCorruptManifest 261 | } 262 | return 0, err 263 | } 264 | return u, nil 265 | } 266 | 267 | type versionEditEncoder struct { 268 | *bytes.Buffer 269 | } 270 | 271 | func (e versionEditEncoder) writeBytes(p []byte) { 272 | e.writeUvarint(uint64(len(p))) 273 | e.Write(p) 274 | } 275 | 276 | func (e versionEditEncoder) writeString(s string) { 277 | e.writeUvarint(uint64(len(s))) 278 | e.WriteString(s) 279 | } 280 | 281 | func (e versionEditEncoder) writeUvarint(u uint64) { 282 | var buf [binary.MaxVarintLen64]byte 283 | n := binary.PutUvarint(buf[:], u) 284 | e.Write(buf[:n]) 285 | } 286 | 287 | // bulkVersionEdit summarizes the files added and deleted from a set of version 288 | // edits. 289 | // 290 | // The C++ LevelDB code calls this concept a VersionSet::Builder. 291 | type bulkVersionEdit struct { 292 | added [numLevels][]fileMetadata 293 | deleted [numLevels]map[uint64]bool // map[uint64]bool is a set of fileNums. 294 | } 295 | 296 | func (b *bulkVersionEdit) accumulate(ve *versionEdit) { 297 | for _, cp := range ve.compactPointers { 298 | // TODO: handle compaction pointers. 299 | _ = cp 300 | } 301 | 302 | for df := range ve.deletedFiles { 303 | dmap := b.deleted[df.level] 304 | if dmap == nil { 305 | dmap = make(map[uint64]bool) 306 | b.deleted[df.level] = dmap 307 | } 308 | dmap[df.fileNum] = true 309 | } 310 | 311 | for _, nf := range ve.newFiles { 312 | if dmap := b.deleted[nf.level]; dmap != nil { 313 | delete(dmap, nf.meta.fileNum) 314 | } 315 | // TODO: fiddle with nf.meta.allowedSeeks. 316 | b.added[nf.level] = append(b.added[nf.level], nf.meta) 317 | } 318 | } 319 | 320 | // apply applies the delta b to a base version to produce a new version. The 321 | // new version is consistent with respect to the internal key comparer icmp. 322 | // 323 | // base may be nil, which is equivalent to a pointer to a zero version. 324 | func (b *bulkVersionEdit) apply(base *version, icmp db.Comparer) (*version, error) { 325 | v := new(version) 326 | for level := range v.files { 327 | combined := [2][]fileMetadata{ 328 | nil, 329 | b.added[level], 330 | } 331 | if base != nil { 332 | combined[0] = base.files[level] 333 | } 334 | n := len(combined[0]) + len(combined[1]) 335 | if n == 0 { 336 | continue 337 | } 338 | v.files[level] = make([]fileMetadata, 0, n) 339 | dmap := b.deleted[level] 340 | 341 | for _, ff := range combined { 342 | for _, f := range ff { 343 | if dmap != nil && dmap[f.fileNum] { 344 | continue 345 | } 346 | v.files[level] = append(v.files[level], f) 347 | } 348 | } 349 | 350 | // TODO: base.files[level] is already sorted. Instead of appending 351 | // b.addFiles[level] to the end and sorting afterwards, it might be more 352 | // efficient to sort b.addFiles[level] and then merge the two sorted slices. 353 | if level == 0 { 354 | sort.Sort(byFileNum(v.files[level])) 355 | } else { 356 | sort.Sort(bySmallest{v.files[level], icmp}) 357 | } 358 | } 359 | if err := v.checkOrdering(icmp); err != nil { 360 | return nil, fmt.Errorf("leveldb: internal error: %v", err) 361 | } 362 | v.updateCompactionScore() 363 | return v, nil 364 | } 365 | -------------------------------------------------------------------------------- /version_edit_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2012 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package leveldb 6 | 7 | import ( 8 | "bytes" 9 | "fmt" 10 | "io" 11 | "io/ioutil" 12 | "os" 13 | "reflect" 14 | "testing" 15 | 16 | "github.com/golang/leveldb/record" 17 | ) 18 | 19 | func checkRoundTrip(e0 versionEdit) error { 20 | var e1 versionEdit 21 | buf := new(bytes.Buffer) 22 | if err := e0.encode(buf); err != nil { 23 | return fmt.Errorf("encode: %v", err) 24 | } 25 | if err := e1.decode(buf); err != nil { 26 | return fmt.Errorf("decode: %v", err) 27 | } 28 | if !reflect.DeepEqual(e1, e0) { 29 | return fmt.Errorf("\n\tgot %#v\n\twant %#v", e1, e0) 30 | } 31 | return nil 32 | } 33 | 34 | func TestVersionEditRoundTrip(t *testing.T) { 35 | testCases := []versionEdit{ 36 | // An empty version edit. 37 | {}, 38 | // A complete version edit. 39 | { 40 | comparatorName: "11", 41 | logNumber: 22, 42 | prevLogNumber: 33, 43 | nextFileNumber: 44, 44 | lastSequence: 55, 45 | compactPointers: []compactPointerEntry{ 46 | { 47 | level: 0, 48 | key: internalKey("600"), 49 | }, 50 | { 51 | level: 1, 52 | key: internalKey("601"), 53 | }, 54 | { 55 | level: 2, 56 | key: internalKey("602"), 57 | }, 58 | }, 59 | deletedFiles: map[deletedFileEntry]bool{ 60 | deletedFileEntry{ 61 | level: 3, 62 | fileNum: 703, 63 | }: true, 64 | deletedFileEntry{ 65 | level: 4, 66 | fileNum: 704, 67 | }: true, 68 | }, 69 | newFiles: []newFileEntry{ 70 | { 71 | level: 5, 72 | meta: fileMetadata{ 73 | fileNum: 805, 74 | size: 8050, 75 | smallest: internalKey("abc\x00\x01\x02\x03\x04\x05\x06\x07"), 76 | largest: internalKey("xyz\x01\xff\xfe\xfd\xfc\xfb\xfa\xf9"), 77 | }, 78 | }, 79 | { 80 | level: 6, 81 | meta: fileMetadata{ 82 | fileNum: 806, 83 | size: 8060, 84 | smallest: internalKey("A\x00\x01\x02\x03\x04\x05\x06\x07"), 85 | largest: internalKey("Z\x01\xff\xfe\xfd\xfc\xfb\xfa\xf9"), 86 | }, 87 | }, 88 | }, 89 | }, 90 | } 91 | for _, tc := range testCases { 92 | if err := checkRoundTrip(tc); err != nil { 93 | t.Error(err) 94 | } 95 | } 96 | } 97 | 98 | func TestVersionEditDecode(t *testing.T) { 99 | testCases := []struct { 100 | filename string 101 | encodedEdits []string 102 | edits []versionEdit 103 | }{ 104 | // db-stage-1 and db-stage-2 have the same manifest. 105 | { 106 | filename: "db-stage-1/MANIFEST-000002", 107 | encodedEdits: []string{ 108 | "\x01\x1aleveldb.BytewiseComparator", 109 | "\x02\x03\x09\x00\x03\x04\x04\x00", 110 | }, 111 | edits: []versionEdit{ 112 | { 113 | comparatorName: "leveldb.BytewiseComparator", 114 | }, 115 | { 116 | logNumber: 3, 117 | prevLogNumber: 0, 118 | nextFileNumber: 4, 119 | lastSequence: 0, 120 | }, 121 | }, 122 | }, 123 | // db-stage-3 and db-stage-4 have the same manifest. 124 | { 125 | filename: "db-stage-3/MANIFEST-000004", 126 | encodedEdits: []string{ 127 | "\x01\x1aleveldb.BytewiseComparator", 128 | "\x02\x06\x09\x00\x03\x07\x04\x05\x07\x00\x05\xa5\x01" + 129 | "\x0bbar\x00\x05\x00\x00\x00\x00\x00\x00" + 130 | "\x0bfoo\x01\x01\x00\x00\x00\x00\x00\x00", 131 | }, 132 | edits: []versionEdit{ 133 | { 134 | comparatorName: "leveldb.BytewiseComparator", 135 | }, 136 | { 137 | logNumber: 6, 138 | prevLogNumber: 0, 139 | nextFileNumber: 7, 140 | lastSequence: 5, 141 | newFiles: []newFileEntry{ 142 | { 143 | level: 0, 144 | meta: fileMetadata{ 145 | fileNum: 5, 146 | size: 165, 147 | smallest: internalKey("bar\x00\x05\x00\x00\x00\x00\x00\x00"), 148 | largest: internalKey("foo\x01\x01\x00\x00\x00\x00\x00\x00"), 149 | }, 150 | }, 151 | }, 152 | }, 153 | }, 154 | }, 155 | } 156 | 157 | loop: 158 | for _, tc := range testCases { 159 | f, err := os.Open("testdata/" + tc.filename) 160 | if err != nil { 161 | t.Errorf("filename=%q: open error: %v", tc.filename, err) 162 | continue 163 | } 164 | defer f.Close() 165 | i, r := 0, record.NewReader(f) 166 | for { 167 | rr, err := r.Next() 168 | if err == io.EOF { 169 | break 170 | } 171 | if err != nil { 172 | t.Errorf("filename=%q i=%d: record reader error: %v", tc.filename, i, err) 173 | continue loop 174 | } 175 | if i >= len(tc.edits) { 176 | t.Errorf("filename=%q i=%d: too many version edits", tc.filename, i+1) 177 | continue loop 178 | } 179 | 180 | encodedEdit, err := ioutil.ReadAll(rr) 181 | if err != nil { 182 | t.Errorf("filename=%q i=%d: read error: %v", tc.filename, i, err) 183 | continue loop 184 | } 185 | if s := string(encodedEdit); s != tc.encodedEdits[i] { 186 | t.Errorf("filename=%q i=%d: got encoded %q, want %q", tc.filename, i, s, tc.encodedEdits[i]) 187 | continue loop 188 | } 189 | 190 | var edit versionEdit 191 | err = edit.decode(bytes.NewReader(encodedEdit)) 192 | if err != nil { 193 | t.Errorf("filename=%q i=%d: decode error: %v", tc.filename, i, err) 194 | continue loop 195 | } 196 | if !reflect.DeepEqual(edit, tc.edits[i]) { 197 | t.Errorf("filename=%q i=%d: decode\n\tgot %#v\n\twant %#v", tc.filename, i, edit, tc.edits[i]) 198 | continue loop 199 | } 200 | if err := checkRoundTrip(edit); err != nil { 201 | t.Errorf("filename=%q i=%d: round trip: %v", tc.filename, i, err) 202 | continue loop 203 | } 204 | 205 | i++ 206 | } 207 | if i != len(tc.edits) { 208 | t.Errorf("filename=%q: got %d edits, want %d", tc.filename, i, len(tc.edits)) 209 | continue 210 | } 211 | } 212 | } 213 | -------------------------------------------------------------------------------- /version_set.go: -------------------------------------------------------------------------------- 1 | // Copyright 2012 The LevelDB-Go Authors. All rights reserved. 2 | // Use of this source code is governed by a BSD-style 3 | // license that can be found in the LICENSE file. 4 | 5 | package leveldb 6 | 7 | import ( 8 | "fmt" 9 | "io" 10 | "os" 11 | 12 | "github.com/golang/leveldb/db" 13 | "github.com/golang/leveldb/record" 14 | ) 15 | 16 | // TODO: describe what a versionSet is. 17 | type versionSet struct { 18 | dirname string 19 | opts *db.Options 20 | fs db.FileSystem 21 | ucmp, icmp db.Comparer 22 | 23 | // dummyVersion is the head of a circular doubly-linked list of versions. 24 | // dummyVersion.prev is the current version. 25 | dummyVersion version 26 | 27 | logNumber uint64 28 | prevLogNumber uint64 29 | nextFileNumber uint64 30 | lastSequence uint64 31 | manifestFileNumber uint64 32 | 33 | manifestFile db.File 34 | manifest *record.Writer 35 | } 36 | 37 | // load loads the version set from the manifest file. 38 | func (vs *versionSet) load(dirname string, opts *db.Options) error { 39 | vs.dirname = dirname 40 | vs.opts = opts 41 | vs.fs = opts.GetFileSystem() 42 | vs.ucmp = opts.GetComparer() 43 | vs.icmp = internalKeyComparer{vs.ucmp} 44 | vs.dummyVersion.prev = &vs.dummyVersion 45 | vs.dummyVersion.next = &vs.dummyVersion 46 | // For historical reasons, the next file number is initialized to 2. 47 | vs.nextFileNumber = 2 48 | 49 | // Read the CURRENT file to find the current manifest file. 50 | current, err := vs.fs.Open(dbFilename(dirname, fileTypeCurrent, 0)) 51 | if err != nil { 52 | return fmt.Errorf("leveldb: could not open CURRENT file for DB %q: %v", dirname, err) 53 | } 54 | defer current.Close() 55 | stat, err := current.Stat() 56 | if err != nil { 57 | return err 58 | } 59 | n := stat.Size() 60 | if n == 0 { 61 | return fmt.Errorf("leveldb: CURRENT file for DB %q is empty", dirname) 62 | } 63 | if n > 4096 { 64 | return fmt.Errorf("leveldb: CURRENT file for DB %q is too large", dirname) 65 | } 66 | b := make([]byte, n) 67 | _, err = current.ReadAt(b, 0) 68 | if err != nil { 69 | return err 70 | } 71 | if b[n-1] != '\n' { 72 | return fmt.Errorf("leveldb: CURRENT file for DB %q is malformed", dirname) 73 | } 74 | b = b[:n-1] 75 | 76 | // Read the versionEdits in the manifest file. 77 | var bve bulkVersionEdit 78 | manifest, err := vs.fs.Open(dirname + string(os.PathSeparator) + string(b)) 79 | if err != nil { 80 | return fmt.Errorf("leveldb: could not open manifest file %q for DB %q: %v", b, dirname, err) 81 | } 82 | defer manifest.Close() 83 | rr := record.NewReader(manifest) 84 | for { 85 | r, err := rr.Next() 86 | if err == io.EOF { 87 | break 88 | } 89 | if err != nil { 90 | return err 91 | } 92 | var ve versionEdit 93 | err = ve.decode(r) 94 | if err != nil { 95 | return err 96 | } 97 | if ve.comparatorName != "" { 98 | if ve.comparatorName != vs.ucmp.Name() { 99 | return fmt.Errorf("leveldb: manifest file %q for DB %q: "+ 100 | "comparer name from file %q != comparer name from db.Options %q", 101 | b, dirname, ve.comparatorName, vs.ucmp.Name()) 102 | } 103 | } 104 | bve.accumulate(&ve) 105 | if ve.logNumber != 0 { 106 | vs.logNumber = ve.logNumber 107 | } 108 | if ve.prevLogNumber != 0 { 109 | vs.prevLogNumber = ve.prevLogNumber 110 | } 111 | if ve.nextFileNumber != 0 { 112 | vs.nextFileNumber = ve.nextFileNumber 113 | } 114 | if ve.lastSequence != 0 { 115 | vs.lastSequence = ve.lastSequence 116 | } 117 | } 118 | if vs.logNumber == 0 || vs.nextFileNumber == 0 { 119 | if vs.nextFileNumber == 2 { 120 | // We have a freshly created DB. 121 | } else { 122 | return fmt.Errorf("leveldb: incomplete manifest file %q for DB %q", b, dirname) 123 | } 124 | } 125 | vs.markFileNumUsed(vs.logNumber) 126 | vs.markFileNumUsed(vs.prevLogNumber) 127 | vs.manifestFileNumber = vs.nextFileNum() 128 | 129 | newVersion, err := bve.apply(nil, vs.icmp) 130 | if err != nil { 131 | return err 132 | } 133 | vs.append(newVersion) 134 | return nil 135 | } 136 | 137 | // TODO: describe what this function does and how it interacts concurrently 138 | // with a running leveldb. 139 | // 140 | // d.mu must be held when calling this, for the enclosing *DB d. 141 | // TODO: actually pass d.mu, and drop and re-acquire it around the I/O. 142 | func (vs *versionSet) logAndApply(dirname string, ve *versionEdit) error { 143 | if ve.logNumber != 0 { 144 | if ve.logNumber < vs.logNumber || vs.nextFileNumber <= ve.logNumber { 145 | panic(fmt.Sprintf("leveldb: inconsistent versionEdit logNumber %d", ve.logNumber)) 146 | } 147 | } 148 | ve.nextFileNumber = vs.nextFileNumber 149 | ve.lastSequence = vs.lastSequence 150 | 151 | var bve bulkVersionEdit 152 | bve.accumulate(ve) 153 | newVersion, err := bve.apply(vs.currentVersion(), vs.icmp) 154 | if err != nil { 155 | return err 156 | } 157 | 158 | if vs.manifest == nil { 159 | if err := vs.createManifest(dirname); err != nil { 160 | return err 161 | } 162 | } 163 | 164 | w, err := vs.manifest.Next() 165 | if err != nil { 166 | return err 167 | } 168 | if err := ve.encode(w); err != nil { 169 | return err 170 | } 171 | if err := vs.manifest.Flush(); err != nil { 172 | return err 173 | } 174 | if err := vs.manifestFile.Sync(); err != nil { 175 | return err 176 | } 177 | if err := setCurrentFile(dirname, vs.opts.GetFileSystem(), vs.manifestFileNumber); err != nil { 178 | return err 179 | } 180 | 181 | // Install the new version. 182 | vs.append(newVersion) 183 | if ve.logNumber != 0 { 184 | vs.logNumber = ve.logNumber 185 | } 186 | if ve.prevLogNumber != 0 { 187 | vs.prevLogNumber = ve.prevLogNumber 188 | } 189 | return nil 190 | } 191 | 192 | // createManifest creates a manifest file that contains a snapshot of vs. 193 | func (vs *versionSet) createManifest(dirname string) (err error) { 194 | var ( 195 | filename = dbFilename(dirname, fileTypeManifest, vs.manifestFileNumber) 196 | manifestFile db.File 197 | manifest *record.Writer 198 | ) 199 | defer func() { 200 | if manifest != nil { 201 | manifest.Close() 202 | } 203 | if manifestFile != nil { 204 | manifestFile.Close() 205 | } 206 | if err != nil { 207 | vs.fs.Remove(filename) 208 | } 209 | }() 210 | manifestFile, err = vs.fs.Create(filename) 211 | if err != nil { 212 | return err 213 | } 214 | manifest = record.NewWriter(manifestFile) 215 | 216 | snapshot := versionEdit{ 217 | comparatorName: vs.ucmp.Name(), 218 | } 219 | // TODO: save compaction pointers. 220 | for level, fileMetadata := range vs.currentVersion().files { 221 | for _, meta := range fileMetadata { 222 | snapshot.newFiles = append(snapshot.newFiles, newFileEntry{ 223 | level: level, 224 | meta: meta, 225 | }) 226 | } 227 | } 228 | 229 | w, err1 := manifest.Next() 230 | if err1 != nil { 231 | return err1 232 | } 233 | err1 = snapshot.encode(w) 234 | if err1 != nil { 235 | return err1 236 | } 237 | 238 | vs.manifest, manifest = manifest, nil 239 | vs.manifestFile, manifestFile = manifestFile, nil 240 | return nil 241 | } 242 | 243 | func (vs *versionSet) markFileNumUsed(fileNum uint64) { 244 | if vs.nextFileNumber <= fileNum { 245 | vs.nextFileNumber = fileNum + 1 246 | } 247 | } 248 | 249 | func (vs *versionSet) nextFileNum() uint64 { 250 | x := vs.nextFileNumber 251 | vs.nextFileNumber++ 252 | return x 253 | } 254 | 255 | func (vs *versionSet) append(v *version) { 256 | if v.prev != nil || v.next != nil { 257 | panic("leveldb: version linked list is inconsistent") 258 | } 259 | v.prev = vs.dummyVersion.prev 260 | v.prev.next = v 261 | v.next = &vs.dummyVersion 262 | v.next.prev = v 263 | } 264 | 265 | func (vs *versionSet) currentVersion() *version { 266 | return vs.dummyVersion.prev 267 | } 268 | 269 | func (vs *versionSet) addLiveFileNums(m map[uint64]struct{}) { 270 | for v := vs.dummyVersion.next; v != &vs.dummyVersion; v = v.next { 271 | for _, ff := range v.files { 272 | for _, f := range ff { 273 | m[f.fileNum] = struct{}{} 274 | } 275 | } 276 | } 277 | } 278 | --------------------------------------------------------------------------------