├── LICENSE
├── README.md
├── basic_test.go
├── compression.go
├── compression_test.go
├── diskv.go
├── examples
├── advanced-transform
│ └── advanced-transform.go
├── content-addressable-store
│ └── cas.go
├── git-like-store
│ └── git-like-store.go
└── super-simple-store
│ └── super-simple-store.go
├── go.mod
├── go.sum
├── import_test.go
├── index.go
├── index_test.go
├── issues_test.go
├── keys_test.go
├── speed_test.go
└── stream_test.go
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2011-2012 Peter Bourgon
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining a copy
4 | of this software and associated documentation files (the "Software"), to deal
5 | in the Software without restriction, including without limitation the rights
6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7 | copies of the Software, and to permit persons to whom the Software is
8 | furnished to do so, subject to the following conditions:
9 |
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 |
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.
20 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # What is diskv?
2 |
3 | Diskv (disk-vee) is a simple, persistent key-value store written in the Go
4 | language. It starts with an incredibly simple API for storing arbitrary data on
5 | a filesystem by key, and builds several layers of performance-enhancing
6 | abstraction on top. The end result is a conceptually simple, but highly
7 | performant, disk-backed storage system.
8 |
9 | [![Build Status][1]][2]
10 |
11 | [1]: https://drone.io/github.com/peterbourgon/diskv/status.png
12 | [2]: https://drone.io/github.com/peterbourgon/diskv/latest
13 |
14 |
15 | # Installing
16 |
17 | Install [Go 1][3], either [from source][4] or [with a prepackaged binary][5].
18 | Then,
19 |
20 | ```bash
21 | $ go get github.com/peterbourgon/diskv/v3
22 | ```
23 |
24 | [3]: http://golang.org
25 | [4]: http://golang.org/doc/install/source
26 | [5]: http://golang.org/doc/install
27 |
28 |
29 | # Usage
30 |
31 | ```go
32 | package main
33 |
34 | import (
35 | "fmt"
36 | "github.com/peterbourgon/diskv/v3"
37 | )
38 |
39 | func main() {
40 | // Simplest transform function: put all the data files into the base dir.
41 | flatTransform := func(s string) []string { return []string{} }
42 |
43 | // Initialize a new diskv store, rooted at "my-data-dir", with a 1MB cache.
44 | d := diskv.New(diskv.Options{
45 | BasePath: "my-data-dir",
46 | Transform: flatTransform,
47 | CacheSizeMax: 1024 * 1024,
48 | })
49 |
50 | // Write three bytes to the key "alpha".
51 | key := "alpha"
52 | d.Write(key, []byte{'1', '2', '3'})
53 |
54 | // Read the value back out of the store.
55 | value, _ := d.Read(key)
56 | fmt.Printf("%v\n", value)
57 |
58 | // Erase the key+value from the store (and the disk).
59 | d.Erase(key)
60 | }
61 | ```
62 |
63 | More complex examples can be found in the "examples" subdirectory.
64 |
65 |
66 | # Theory
67 |
68 | ## Basic idea
69 |
70 | At its core, diskv is a map of a key (`string`) to arbitrary data (`[]byte`).
71 | The data is written to a single file on disk, with the same name as the key.
72 | The key determines where that file will be stored, via a user-provided
73 | `TransformFunc`, which takes a key and returns a slice (`[]string`)
74 | corresponding to a path list where the key file will be stored. The simplest
75 | TransformFunc,
76 |
77 | ```go
78 | func SimpleTransform (key string) []string {
79 | return []string{}
80 | }
81 | ```
82 |
83 | will place all keys in the same, base directory. The design is inspired by
84 | [Redis diskstore][6]; a TransformFunc which emulates the default diskstore
85 | behavior is available in the content-addressable-storage example.
86 |
87 | [6]: http://groups.google.com/group/redis-db/browse_thread/thread/d444bc786689bde9?pli=1
88 |
89 | **Note** that your TransformFunc should ensure that one valid key doesn't
90 | transform to a subset of another valid key. That is, it shouldn't be possible
91 | to construct valid keys that resolve to directory names. As a concrete example,
92 | if your TransformFunc splits on every 3 characters, then
93 |
94 | ```go
95 | d.Write("abcabc", val) // OK: written to /abc/abc/abcabc
96 | d.Write("abc", val) // Error: attempted write to /abc/abc, but it's a directory
97 | ```
98 |
99 | This will be addressed in an upcoming version of diskv.
100 |
101 | Probably the most important design principle behind diskv is that your data is
102 | always flatly available on the disk. diskv will never do anything that would
103 | prevent you from accessing, copying, backing up, or otherwise interacting with
104 | your data via common UNIX commandline tools.
105 |
106 | ## Advanced path transformation
107 |
108 | If you need more control over the file name written to disk or if you want to support
109 | slashes in your key name or special characters in the keys, you can use the
110 | AdvancedTransform property. You must supply a function that returns
111 | a special PathKey structure, which is a breakdown of a path and a file name. Strings
112 | returned must be clean of any slashes or special characters:
113 |
114 | ```go
115 | func AdvancedTransformExample(key string) *diskv.PathKey {
116 | path := strings.Split(key, "/")
117 | last := len(path) - 1
118 | return &diskv.PathKey{
119 | Path: path[:last],
120 | FileName: path[last] + ".txt",
121 | }
122 | }
123 |
124 | // If you provide an AdvancedTransform, you must also provide its
125 | // inverse:
126 |
127 | func InverseTransformExample(pathKey *diskv.PathKey) (key string) {
128 | txt := pathKey.FileName[len(pathKey.FileName)-4:]
129 | if txt != ".txt" {
130 | panic("Invalid file found in storage folder!")
131 | }
132 | return strings.Join(pathKey.Path, "/") + pathKey.FileName[:len(pathKey.FileName)-4]
133 | }
134 |
135 | func main() {
136 | d := diskv.New(diskv.Options{
137 | BasePath: "my-data-dir",
138 | AdvancedTransform: AdvancedTransformExample,
139 | InverseTransform: InverseTransformExample,
140 | CacheSizeMax: 1024 * 1024,
141 | })
142 | // Write some text to the key "alpha/beta/gamma".
143 | key := "alpha/beta/gamma"
144 | d.WriteString(key, "¡Hola!") // will be stored in "/alpha/beta/gamma.txt"
145 | fmt.Println(d.ReadString("alpha/beta/gamma"))
146 | }
147 | ```
148 |
149 |
150 | ## Adding a cache
151 |
152 | An in-memory caching layer is provided by combining the BasicStore
153 | functionality with a simple map structure, and keeping it up-to-date as
154 | appropriate. Since the map structure in Go is not threadsafe, it's combined
155 | with a RWMutex to provide safe concurrent access.
156 |
157 | ## Adding order
158 |
159 | diskv is a key-value store and therefore inherently unordered. An ordering
160 | system can be injected into the store by passing something which satisfies the
161 | diskv.Index interface. (A default implementation, using Google's
162 | [btree][7] package, is provided.) Basically, diskv keeps an ordered (by a
163 | user-provided Less function) index of the keys, which can be queried.
164 |
165 | [7]: https://github.com/google/btree
166 |
167 | ## Adding compression
168 |
169 | Something which implements the diskv.Compression interface may be passed
170 | during store creation, so that all Writes and Reads are filtered through
171 | a compression/decompression pipeline. Several default implementations,
172 | using stdlib compression algorithms, are provided. Note that data is cached
173 | compressed; the cost of decompression is borne with each Read.
174 |
175 | ## Streaming
176 |
177 | diskv also now provides ReadStream and WriteStream methods, to allow very large
178 | data to be handled efficiently.
179 |
180 |
181 | # Future plans
182 |
183 | * Needs plenty of robust testing: huge datasets, etc...
184 | * More thorough benchmarking
185 | * Your suggestions for use-cases I haven't thought of
186 |
187 |
188 | # Credits and contributions
189 |
190 | Original idea, design and implementation: [Peter Bourgon](https://github.com/peterbourgon)
191 | Other collaborations: [Javier Peletier](https://github.com/jpeletier) ([Epic Labs](https://www.epiclabs.io))
192 |
--------------------------------------------------------------------------------
/basic_test.go:
--------------------------------------------------------------------------------
1 | package diskv
2 |
3 | import (
4 | "bytes"
5 | "errors"
6 | "math/rand"
7 | "regexp"
8 | "strings"
9 | "testing"
10 | "time"
11 | )
12 |
13 | func cmpBytes(a, b []byte) bool {
14 | if len(a) != len(b) {
15 | return false
16 | }
17 | for i := 0; i < len(a); i++ {
18 | if a[i] != b[i] {
19 | return false
20 | }
21 | }
22 | return true
23 | }
24 |
25 | func (d *Diskv) isCached(key string) bool {
26 | d.mu.RLock()
27 | defer d.mu.RUnlock()
28 | _, ok := d.cache[key]
29 | return ok
30 | }
31 |
32 | func TestWriteReadErase(t *testing.T) {
33 | d := New(Options{
34 | BasePath: "test-data",
35 | CacheSizeMax: 1024,
36 | })
37 | defer d.EraseAll()
38 | k, v := "a", []byte{'b'}
39 | if err := d.Write(k, v); err != nil {
40 | t.Fatalf("write: %s", err)
41 | }
42 | if readVal, err := d.Read(k); err != nil {
43 | t.Fatalf("read: %s", err)
44 | } else if bytes.Compare(v, readVal) != 0 {
45 | t.Fatalf("read: expected %s, got %s", v, readVal)
46 | }
47 | if err := d.Erase(k); err != nil {
48 | t.Fatalf("erase: %s", err)
49 | }
50 | }
51 |
52 | func TestWRECache(t *testing.T) {
53 | d := New(Options{
54 | BasePath: "test-data",
55 | CacheSizeMax: 1024,
56 | })
57 | defer d.EraseAll()
58 | k, v := "xxx", []byte{' ', ' ', ' '}
59 | if d.isCached(k) {
60 | t.Fatalf("key cached before Write and Read")
61 | }
62 | if err := d.Write(k, v); err != nil {
63 | t.Fatalf("write: %s", err)
64 | }
65 | if d.isCached(k) {
66 | t.Fatalf("key cached before Read")
67 | }
68 | if readVal, err := d.Read(k); err != nil {
69 | t.Fatalf("read: %s", err)
70 | } else if bytes.Compare(v, readVal) != 0 {
71 | t.Fatalf("read: expected %s, got %s", v, readVal)
72 | }
73 | for i := 0; i < 10 && !d.isCached(k); i++ {
74 | time.Sleep(10 * time.Millisecond)
75 | }
76 | if !d.isCached(k) {
77 | t.Fatalf("key not cached after Read")
78 | }
79 | if err := d.Erase(k); err != nil {
80 | t.Fatalf("erase: %s", err)
81 | }
82 | if d.isCached(k) {
83 | t.Fatalf("key cached after Erase")
84 | }
85 | }
86 |
87 | func TestStrings(t *testing.T) {
88 | d := New(Options{
89 | BasePath: "test-data",
90 | CacheSizeMax: 1024,
91 | })
92 | defer d.EraseAll()
93 |
94 | keys := map[string]bool{"a": false, "b": false, "c": false, "d": false}
95 | v := []byte{'1'}
96 | for k := range keys {
97 | if err := d.Write(k, v); err != nil {
98 | t.Fatalf("write: %s: %s", k, err)
99 | }
100 | }
101 |
102 | for k := range d.Keys(nil) {
103 | if _, present := keys[k]; present {
104 | t.Logf("got: %s", k)
105 | keys[k] = true
106 | } else {
107 | t.Fatalf("strings() returns unknown key: %s", k)
108 | }
109 | }
110 |
111 | for k, found := range keys {
112 | if !found {
113 | t.Errorf("never got %s", k)
114 | }
115 | }
116 | }
117 |
118 | func TestZeroByteCache(t *testing.T) {
119 | d := New(Options{
120 | BasePath: "test-data",
121 | CacheSizeMax: 0,
122 | })
123 | defer d.EraseAll()
124 |
125 | k, v := "a", []byte{'1', '2', '3'}
126 | if err := d.Write(k, v); err != nil {
127 | t.Fatalf("Write: %s", err)
128 | }
129 |
130 | if d.isCached(k) {
131 | t.Fatalf("key cached, expected not-cached")
132 | }
133 |
134 | if _, err := d.Read(k); err != nil {
135 | t.Fatalf("Read: %s", err)
136 | }
137 |
138 | if d.isCached(k) {
139 | t.Fatalf("key cached, expected not-cached")
140 | }
141 | }
142 |
143 | func TestOneByteCache(t *testing.T) {
144 | d := New(Options{
145 | BasePath: "test-data",
146 | CacheSizeMax: 1,
147 | })
148 | defer d.EraseAll()
149 |
150 | k1, k2, v1, v2 := "a", "b", []byte{'1'}, []byte{'1', '2'}
151 | if err := d.Write(k1, v1); err != nil {
152 | t.Fatal(err)
153 | }
154 |
155 | if v, err := d.Read(k1); err != nil {
156 | t.Fatal(err)
157 | } else if !cmpBytes(v, v1) {
158 | t.Fatalf("Read: expected %s, got %s", string(v1), string(v))
159 | }
160 |
161 | for i := 0; i < 10 && !d.isCached(k1); i++ {
162 | time.Sleep(10 * time.Millisecond)
163 | }
164 | if !d.isCached(k1) {
165 | t.Fatalf("expected 1-byte value to be cached, but it wasn't")
166 | }
167 |
168 | if err := d.Write(k2, v2); err != nil {
169 | t.Fatal(err)
170 | }
171 | if _, err := d.Read(k2); err != nil {
172 | t.Fatalf("--> %s", err)
173 | }
174 |
175 | for i := 0; i < 10 && (!d.isCached(k1) || d.isCached(k2)); i++ {
176 | time.Sleep(10 * time.Millisecond) // just wait for lazy-cache
177 | }
178 | if !d.isCached(k1) {
179 | t.Fatalf("1-byte value was uncached for no reason")
180 | }
181 |
182 | if d.isCached(k2) {
183 | t.Fatalf("2-byte value was cached, but cache max size is 1")
184 | }
185 | }
186 |
187 | func TestStaleCache(t *testing.T) {
188 | d := New(Options{
189 | BasePath: "test-data",
190 | CacheSizeMax: 1,
191 | })
192 | defer d.EraseAll()
193 |
194 | k, first, second := "a", "first", "second"
195 | if err := d.Write(k, []byte(first)); err != nil {
196 | t.Fatal(err)
197 | }
198 |
199 | v, err := d.Read(k)
200 | if err != nil {
201 | t.Fatal(err)
202 | }
203 | if string(v) != first {
204 | t.Errorf("expected '%s', got '%s'", first, v)
205 | }
206 |
207 | if err := d.Write(k, []byte(second)); err != nil {
208 | t.Fatal(err)
209 | }
210 |
211 | v, err = d.Read(k)
212 | if err != nil {
213 | t.Fatal(err)
214 | }
215 |
216 | if string(v) != second {
217 | t.Errorf("expected '%s', got '%s'", second, v)
218 | }
219 | }
220 |
221 | func TestHas(t *testing.T) {
222 | d := New(Options{
223 | BasePath: "test-data",
224 | CacheSizeMax: 1024,
225 | })
226 | defer d.EraseAll()
227 |
228 | for k, v := range map[string]string{
229 | "a": "1",
230 | "foo": "2",
231 | "012345": "3",
232 | } {
233 | d.Write(k, []byte(v))
234 | }
235 |
236 | d.Read("foo") // cache one of them
237 | if !d.isCached("foo") {
238 | t.Errorf("'foo' didn't get cached")
239 | }
240 |
241 | for _, tuple := range []struct {
242 | key string
243 | expected bool
244 | }{
245 | {"a", true},
246 | {"b", false},
247 | {"foo", true},
248 | {"bar", false},
249 | {"01234", false},
250 | {"012345", true},
251 | {"0123456", false},
252 | } {
253 | if expected, got := tuple.expected, d.Has(tuple.key); expected != got {
254 | t.Errorf("Has(%s): expected %v, got %v", tuple.key, expected, got)
255 | }
256 | }
257 | }
258 |
259 | type BrokenReader struct{}
260 |
261 | func (BrokenReader) Read(p []byte) (n int, err error) {
262 | return 0, errors.New("failed to read")
263 | }
264 |
265 | func TestRemovesIncompleteFiles(t *testing.T) {
266 | opts := Options{
267 | BasePath: "test-data",
268 | CacheSizeMax: 1024,
269 | }
270 | d := New(opts)
271 | defer d.EraseAll()
272 |
273 | key, stream, sync := "key", BrokenReader{}, false
274 |
275 | if err := d.WriteStream(key, stream, sync); err == nil {
276 | t.Fatalf("Expected i/o copy error, none received.")
277 | }
278 |
279 | if _, err := d.Read(key); err == nil {
280 | t.Fatal("Could read the key, but it shouldn't exist")
281 | }
282 | }
283 |
284 | func TestTempDir(t *testing.T) {
285 | opts := Options{
286 | BasePath: "test-data",
287 | TempDir: "test-data-temp",
288 | CacheSizeMax: 1024,
289 | }
290 | d := New(opts)
291 | defer d.EraseAll()
292 |
293 | k, v := "a", []byte{'b'}
294 | if err := d.Write(k, v); err != nil {
295 | t.Fatalf("write: %s", err)
296 | }
297 | if readVal, err := d.Read(k); err != nil {
298 | t.Fatalf("read: %s", err)
299 | } else if bytes.Compare(v, readVal) != 0 {
300 | t.Fatalf("read: expected %s, got %s", v, readVal)
301 | }
302 | if err := d.Erase(k); err != nil {
303 | t.Fatalf("erase: %s", err)
304 | }
305 | }
306 |
307 | type CrashingReader struct{}
308 |
309 | func (CrashingReader) Read(p []byte) (n int, err error) {
310 | panic("System has crashed while reading the stream")
311 | }
312 |
313 | func TestAtomicWrite(t *testing.T) {
314 | opts := Options{
315 | BasePath: "test-data",
316 | // Test would fail if TempDir is not set here.
317 | TempDir: "test-data-temp",
318 | CacheSizeMax: 1024,
319 | }
320 | d := New(opts)
321 | defer d.EraseAll()
322 |
323 | key := "key"
324 | func() {
325 | defer func() {
326 | recover() // Ignore panicking error
327 | }()
328 |
329 | stream := CrashingReader{}
330 | d.WriteStream(key, stream, false)
331 | }()
332 |
333 | if d.Has(key) {
334 | t.Fatal("Has key, but it shouldn't exist")
335 | }
336 | if _, ok := <-d.Keys(nil); ok {
337 | t.Fatal("Store isn't empty")
338 | }
339 | }
340 |
341 | const letterBytes = "abcdef0123456789"
342 |
343 | func randStringBytes(n int) string {
344 | b := make([]byte, n)
345 | for i := range b {
346 | b[i] = letterBytes[rand.Intn(len(letterBytes))]
347 | }
348 | return string(b)
349 | }
350 |
351 | func TestHybridStore(t *testing.T) {
352 | regex := regexp.MustCompile("[0-9a-fA-F]{64}")
353 |
354 | transformFunc := func(s string) *PathKey {
355 |
356 | if regex.MatchString(s) {
357 | return &PathKey{Path: []string{"objects", s[0:2]},
358 | FileName: s,
359 | }
360 | }
361 |
362 | folders := strings.Split(s, "/")
363 | lfolders := len(folders)
364 | if lfolders > 1 {
365 | return &PathKey{Path: folders[:lfolders-1],
366 | FileName: folders[lfolders-1],
367 | }
368 | }
369 |
370 | return &PathKey{Path: []string{},
371 | FileName: s,
372 | }
373 | }
374 |
375 | inverseTransformFunc := func(pathKey *PathKey) string {
376 |
377 | if regex.MatchString(pathKey.FileName) {
378 | return pathKey.FileName
379 |
380 | }
381 |
382 | if len(pathKey.Path) == 0 {
383 | return pathKey.FileName
384 | }
385 |
386 | return strings.Join(pathKey.Path, "/") + "/" + pathKey.FileName
387 |
388 | }
389 | opts := Options{
390 | BasePath: "test-data",
391 | CacheSizeMax: 1024,
392 | AdvancedTransform: transformFunc,
393 | InverseTransform: inverseTransformFunc,
394 | }
395 | d := New(opts)
396 | defer d.EraseAll()
397 |
398 | testData := map[string]string{}
399 |
400 | for i := 0; i < 100; i++ {
401 | testData[randStringBytes(64)] = randStringBytes(100)
402 | }
403 |
404 | for i := 0; i < 100; i++ {
405 | testData[randStringBytes(20)] = randStringBytes(100)
406 | }
407 |
408 | for i := 0; i < 100; i++ {
409 | numsep := rand.Intn(10) + 1
410 | key := ""
411 | for j := 0; j < numsep; j++ {
412 | key += randStringBytes(10) + "/"
413 | }
414 | key += randStringBytes(40)
415 | testData[key] = randStringBytes(100)
416 | }
417 |
418 | for k, v := range testData {
419 | d.WriteString(k, v)
420 | }
421 |
422 | for k, v := range testData {
423 | readVal := d.ReadString(k)
424 |
425 | if v != readVal {
426 | t.Fatalf("read: expected %s, got %s", v, readVal)
427 | }
428 | }
429 |
430 | }
431 |
--------------------------------------------------------------------------------
/compression.go:
--------------------------------------------------------------------------------
1 | package diskv
2 |
3 | import (
4 | "compress/flate"
5 | "compress/gzip"
6 | "compress/zlib"
7 | "io"
8 | )
9 |
10 | // Compression is an interface that Diskv uses to implement compression of
11 | // data. Writer takes a destination io.Writer and returns a WriteCloser that
12 | // compresses all data written through it. Reader takes a source io.Reader and
13 | // returns a ReadCloser that decompresses all data read through it. You may
14 | // define these methods on your own type, or use one of the NewCompression
15 | // helpers.
16 | type Compression interface {
17 | Writer(dst io.Writer) (io.WriteCloser, error)
18 | Reader(src io.Reader) (io.ReadCloser, error)
19 | }
20 |
21 | // NewGzipCompression returns a Gzip-based Compression.
22 | func NewGzipCompression() Compression {
23 | return NewGzipCompressionLevel(flate.DefaultCompression)
24 | }
25 |
26 | // NewGzipCompressionLevel returns a Gzip-based Compression with the given level.
27 | func NewGzipCompressionLevel(level int) Compression {
28 | return &genericCompression{
29 | wf: func(w io.Writer) (io.WriteCloser, error) { return gzip.NewWriterLevel(w, level) },
30 | rf: func(r io.Reader) (io.ReadCloser, error) { return gzip.NewReader(r) },
31 | }
32 | }
33 |
34 | // NewZlibCompression returns a Zlib-based Compression.
35 | func NewZlibCompression() Compression {
36 | return NewZlibCompressionLevel(flate.DefaultCompression)
37 | }
38 |
39 | // NewZlibCompressionLevel returns a Zlib-based Compression with the given level.
40 | func NewZlibCompressionLevel(level int) Compression {
41 | return NewZlibCompressionLevelDict(level, nil)
42 | }
43 |
44 | // NewZlibCompressionLevelDict returns a Zlib-based Compression with the given
45 | // level, based on the given dictionary.
46 | func NewZlibCompressionLevelDict(level int, dict []byte) Compression {
47 | return &genericCompression{
48 | func(w io.Writer) (io.WriteCloser, error) { return zlib.NewWriterLevelDict(w, level, dict) },
49 | func(r io.Reader) (io.ReadCloser, error) { return zlib.NewReaderDict(r, dict) },
50 | }
51 | }
52 |
53 | type genericCompression struct {
54 | wf func(w io.Writer) (io.WriteCloser, error)
55 | rf func(r io.Reader) (io.ReadCloser, error)
56 | }
57 |
58 | func (g *genericCompression) Writer(dst io.Writer) (io.WriteCloser, error) {
59 | return g.wf(dst)
60 | }
61 |
62 | func (g *genericCompression) Reader(src io.Reader) (io.ReadCloser, error) {
63 | return g.rf(src)
64 | }
65 |
--------------------------------------------------------------------------------
/compression_test.go:
--------------------------------------------------------------------------------
1 | package diskv
2 |
3 | import (
4 | "compress/flate"
5 | "fmt"
6 | "math/rand"
7 | "os"
8 | "testing"
9 | "time"
10 | )
11 |
12 | func init() {
13 | rand.Seed(time.Now().UnixNano())
14 | }
15 |
16 | func testCompressionWith(t *testing.T, c Compression, name string) {
17 | d := New(Options{
18 | BasePath: "compression-test",
19 | CacheSizeMax: 0,
20 | Compression: c,
21 | })
22 | defer d.EraseAll()
23 |
24 | sz := 4096
25 | val := make([]byte, sz)
26 | for i := 0; i < sz; i++ {
27 | val[i] = byte('a' + rand.Intn(26)) // {a-z}; should compress some
28 | }
29 |
30 | key := "a"
31 | if err := d.Write(key, val); err != nil {
32 | t.Fatalf("write failed: %s", err)
33 | }
34 |
35 | targetFile := fmt.Sprintf("%s%c%s", d.BasePath, os.PathSeparator, key)
36 | fi, err := os.Stat(targetFile)
37 | if err != nil {
38 | t.Fatalf("%s: %s", targetFile, err)
39 | }
40 |
41 | if fi.Size() >= int64(sz) {
42 | t.Fatalf("%s: size=%d, expected smaller", targetFile, fi.Size())
43 | }
44 | t.Logf("%s compressed %d to %d", name, sz, fi.Size())
45 |
46 | readVal, err := d.Read(key)
47 | if len(readVal) != sz {
48 | t.Fatalf("read: expected size=%d, got size=%d", sz, len(readVal))
49 | }
50 |
51 | for i := 0; i < sz; i++ {
52 | if readVal[i] != val[i] {
53 | t.Fatalf("i=%d: expected %v, got %v", i, val[i], readVal[i])
54 | }
55 | }
56 | }
57 |
58 | func TestGzipDefault(t *testing.T) {
59 | testCompressionWith(t, NewGzipCompression(), "gzip")
60 | }
61 |
62 | func TestGzipBestCompression(t *testing.T) {
63 | testCompressionWith(t, NewGzipCompressionLevel(flate.BestCompression), "gzip-max")
64 | }
65 |
66 | func TestGzipBestSpeed(t *testing.T) {
67 | testCompressionWith(t, NewGzipCompressionLevel(flate.BestSpeed), "gzip-min")
68 | }
69 |
70 | func TestZlib(t *testing.T) {
71 | testCompressionWith(t, NewZlibCompression(), "zlib")
72 | }
73 |
--------------------------------------------------------------------------------
/diskv.go:
--------------------------------------------------------------------------------
1 | // Diskv (disk-vee) is a simple, persistent, key-value store.
2 | // It stores all data flatly on the filesystem.
3 |
4 | package diskv
5 |
6 | import (
7 | "bytes"
8 | "errors"
9 | "fmt"
10 | "io"
11 | "io/ioutil"
12 | "os"
13 | "path/filepath"
14 | "strings"
15 | "sync"
16 | "syscall"
17 | )
18 |
19 | const (
20 | defaultBasePath = "diskv"
21 | defaultFilePerm os.FileMode = 0666
22 | defaultPathPerm os.FileMode = 0777
23 | )
24 |
25 | // PathKey represents a string key that has been transformed to
26 | // a directory and file name where the content will eventually
27 | // be stored
28 | type PathKey struct {
29 | Path []string
30 | FileName string
31 | originalKey string
32 | }
33 |
34 | var (
35 | defaultAdvancedTransform = func(s string) *PathKey { return &PathKey{Path: []string{}, FileName: s} }
36 | defaultInverseTransform = func(pathKey *PathKey) string { return pathKey.FileName }
37 | errCanceled = errors.New("canceled")
38 | errEmptyKey = errors.New("empty key")
39 | errBadKey = errors.New("bad key")
40 | errImportDirectory = errors.New("can't import a directory")
41 | )
42 |
43 | // TransformFunction transforms a key into a slice of strings, with each
44 | // element in the slice representing a directory in the file path where the
45 | // key's entry will eventually be stored.
46 | //
47 | // For example, if TransformFunc transforms "abcdef" to ["ab", "cde", "f"],
48 | // the final location of the data file will be /ab/cde/f/abcdef
49 | type TransformFunction func(s string) []string
50 |
51 | // AdvancedTransformFunction transforms a key into a PathKey.
52 | //
53 | // A PathKey contains a slice of strings, where each element in the slice
54 | // represents a directory in the file path where the key's entry will eventually
55 | // be stored, as well as the filename.
56 | //
57 | // For example, if AdvancedTransformFunc transforms "abcdef/file.txt" to the
58 | // PathKey {Path: ["ab", "cde", "f"], FileName: "file.txt"}, the final location
59 | // of the data file will be /ab/cde/f/file.txt.
60 | //
61 | // You must provide an InverseTransformFunction if you use an
62 | // AdvancedTransformFunction.
63 | type AdvancedTransformFunction func(s string) *PathKey
64 |
65 | // InverseTransformFunction takes a PathKey and converts it back to a Diskv key.
66 | // In effect, it's the opposite of an AdvancedTransformFunction.
67 | type InverseTransformFunction func(pathKey *PathKey) string
68 |
69 | // Options define a set of properties that dictate Diskv behavior.
70 | // All values are optional.
71 | type Options struct {
72 | BasePath string
73 | Transform TransformFunction
74 | AdvancedTransform AdvancedTransformFunction
75 | InverseTransform InverseTransformFunction
76 | CacheSizeMax uint64 // bytes
77 | PathPerm os.FileMode
78 | FilePerm os.FileMode
79 | // If TempDir is set, it will enable filesystem atomic writes by
80 | // writing temporary files to that location before being moved
81 | // to BasePath.
82 | // Note that TempDir MUST be on the same device/partition as
83 | // BasePath.
84 | TempDir string
85 |
86 | Index Index
87 | IndexLess LessFunction
88 |
89 | Compression Compression
90 | }
91 |
92 | // Diskv implements the Diskv interface. You shouldn't construct Diskv
93 | // structures directly; instead, use the New constructor.
94 | type Diskv struct {
95 | Options
96 | mu sync.RWMutex
97 | cache map[string][]byte
98 | cacheSize uint64
99 | }
100 |
101 | // New returns an initialized Diskv structure, ready to use.
102 | // If the path identified by baseDir already contains data,
103 | // it will be accessible, but not yet cached.
104 | func New(o Options) *Diskv {
105 | if o.BasePath == "" {
106 | o.BasePath = defaultBasePath
107 | }
108 |
109 | if o.AdvancedTransform == nil {
110 | if o.Transform == nil {
111 | o.AdvancedTransform = defaultAdvancedTransform
112 | } else {
113 | o.AdvancedTransform = convertToAdvancedTransform(o.Transform)
114 | }
115 | if o.InverseTransform == nil {
116 | o.InverseTransform = defaultInverseTransform
117 | }
118 | } else {
119 | if o.InverseTransform == nil {
120 | panic("You must provide an InverseTransform function in advanced mode")
121 | }
122 | }
123 |
124 | if o.PathPerm == 0 {
125 | o.PathPerm = defaultPathPerm
126 | }
127 | if o.FilePerm == 0 {
128 | o.FilePerm = defaultFilePerm
129 | }
130 |
131 | d := &Diskv{
132 | Options: o,
133 | cache: map[string][]byte{},
134 | cacheSize: 0,
135 | }
136 |
137 | if d.Index != nil && d.IndexLess != nil {
138 | d.Index.Initialize(d.IndexLess, d.Keys(nil))
139 | }
140 |
141 | return d
142 | }
143 |
144 | // convertToAdvancedTransform takes a classic Transform function and
145 | // converts it to the new AdvancedTransform
146 | func convertToAdvancedTransform(oldFunc func(s string) []string) AdvancedTransformFunction {
147 | return func(s string) *PathKey {
148 | return &PathKey{Path: oldFunc(s), FileName: s}
149 | }
150 | }
151 |
152 | // Write synchronously writes the key-value pair to disk, making it immediately
153 | // available for reads. Write relies on the filesystem to perform an eventual
154 | // sync to physical media. If you need stronger guarantees, see WriteStream.
155 | func (d *Diskv) Write(key string, val []byte) error {
156 | return d.WriteStream(key, bytes.NewReader(val), false)
157 | }
158 |
159 | // WriteString writes a string key-value pair to disk
160 | func (d *Diskv) WriteString(key string, val string) error {
161 | return d.Write(key, []byte(val))
162 | }
163 |
164 | func (d *Diskv) transform(key string) (pathKey *PathKey) {
165 | pathKey = d.AdvancedTransform(key)
166 | pathKey.originalKey = key
167 | return pathKey
168 | }
169 |
170 | // WriteStream writes the data represented by the io.Reader to the disk, under
171 | // the provided key. If sync is true, WriteStream performs an explicit sync on
172 | // the file as soon as it's written.
173 | //
174 | // bytes.Buffer provides io.Reader semantics for basic data types.
175 | func (d *Diskv) WriteStream(key string, r io.Reader, sync bool) error {
176 | if len(key) <= 0 {
177 | return errEmptyKey
178 | }
179 |
180 | pathKey := d.transform(key)
181 |
182 | // Ensure keys cannot evaluate to paths that would not exist
183 | for _, pathPart := range pathKey.Path {
184 | if strings.ContainsRune(pathPart, os.PathSeparator) {
185 | return errBadKey
186 | }
187 | }
188 |
189 | if strings.ContainsRune(pathKey.FileName, os.PathSeparator) {
190 | return errBadKey
191 | }
192 |
193 | d.mu.Lock()
194 | defer d.mu.Unlock()
195 |
196 | return d.writeStreamWithLock(pathKey, r, sync)
197 | }
198 |
199 | // createKeyFileWithLock either creates the key file directly, or
200 | // creates a temporary file in TempDir if it is set.
201 | func (d *Diskv) createKeyFileWithLock(pathKey *PathKey) (*os.File, error) {
202 | if d.TempDir != "" {
203 | if err := os.MkdirAll(d.TempDir, d.PathPerm); err != nil {
204 | return nil, fmt.Errorf("temp mkdir: %s", err)
205 | }
206 | f, err := ioutil.TempFile(d.TempDir, "")
207 | if err != nil {
208 | return nil, fmt.Errorf("temp file: %s", err)
209 | }
210 |
211 | if err := os.Chmod(f.Name(), d.FilePerm); err != nil {
212 | f.Close() // error deliberately ignored
213 | os.Remove(f.Name()) // error deliberately ignored
214 | return nil, fmt.Errorf("chmod: %s", err)
215 | }
216 | return f, nil
217 | }
218 |
219 | mode := os.O_WRONLY | os.O_CREATE | os.O_TRUNC // overwrite if exists
220 | f, err := os.OpenFile(d.completeFilename(pathKey), mode, d.FilePerm)
221 | if err != nil {
222 | return nil, fmt.Errorf("open file: %s", err)
223 | }
224 | return f, nil
225 | }
226 |
227 | // writeStream does no input validation checking.
228 | func (d *Diskv) writeStreamWithLock(pathKey *PathKey, r io.Reader, sync bool) error {
229 | if err := d.ensurePathWithLock(pathKey); err != nil {
230 | return fmt.Errorf("ensure path: %s", err)
231 | }
232 |
233 | f, err := d.createKeyFileWithLock(pathKey)
234 | if err != nil {
235 | return fmt.Errorf("create key file: %s", err)
236 | }
237 |
238 | wc := io.WriteCloser(&nopWriteCloser{f})
239 | if d.Compression != nil {
240 | wc, err = d.Compression.Writer(f)
241 | if err != nil {
242 | f.Close() // error deliberately ignored
243 | os.Remove(f.Name()) // error deliberately ignored
244 | return fmt.Errorf("compression writer: %s", err)
245 | }
246 | }
247 |
248 | if _, err := io.Copy(wc, r); err != nil {
249 | f.Close() // error deliberately ignored
250 | os.Remove(f.Name()) // error deliberately ignored
251 | return fmt.Errorf("i/o copy: %s", err)
252 | }
253 |
254 | if err := wc.Close(); err != nil {
255 | f.Close() // error deliberately ignored
256 | os.Remove(f.Name()) // error deliberately ignored
257 | return fmt.Errorf("compression close: %s", err)
258 | }
259 |
260 | if sync {
261 | if err := f.Sync(); err != nil {
262 | f.Close() // error deliberately ignored
263 | os.Remove(f.Name()) // error deliberately ignored
264 | return fmt.Errorf("file sync: %s", err)
265 | }
266 | }
267 |
268 | if err := f.Close(); err != nil {
269 | return fmt.Errorf("file close: %s", err)
270 | }
271 |
272 | fullPath := d.completeFilename(pathKey)
273 | if f.Name() != fullPath {
274 | if err := os.Rename(f.Name(), fullPath); err != nil {
275 | os.Remove(f.Name()) // error deliberately ignored
276 | return fmt.Errorf("rename: %s", err)
277 | }
278 | }
279 |
280 | if d.Index != nil {
281 | d.Index.Insert(pathKey.originalKey)
282 | }
283 |
284 | d.bustCacheWithLock(pathKey.originalKey) // cache only on read
285 |
286 | return nil
287 | }
288 |
289 | // Import imports the source file into diskv under the destination key. If the
290 | // destination key already exists, it's overwritten. If move is true, the
291 | // source file is removed after a successful import.
292 | func (d *Diskv) Import(srcFilename, dstKey string, move bool) (err error) {
293 | if dstKey == "" {
294 | return errEmptyKey
295 | }
296 |
297 | if fi, err := os.Stat(srcFilename); err != nil {
298 | return err
299 | } else if fi.IsDir() {
300 | return errImportDirectory
301 | }
302 |
303 | dstPathKey := d.transform(dstKey)
304 |
305 | d.mu.Lock()
306 | defer d.mu.Unlock()
307 |
308 | if err := d.ensurePathWithLock(dstPathKey); err != nil {
309 | return fmt.Errorf("ensure path: %s", err)
310 | }
311 |
312 | if move {
313 | if err := syscall.Rename(srcFilename, d.completeFilename(dstPathKey)); err == nil {
314 | d.bustCacheWithLock(dstPathKey.originalKey)
315 | return nil
316 | } else if err != syscall.EXDEV {
317 | // If it failed due to being on a different device, fall back to copying
318 | return err
319 | }
320 | }
321 |
322 | f, err := os.Open(srcFilename)
323 | if err != nil {
324 | return err
325 | }
326 | defer f.Close()
327 | err = d.writeStreamWithLock(dstPathKey, f, false)
328 | if err == nil && move {
329 | err = os.Remove(srcFilename)
330 | }
331 | return err
332 | }
333 |
334 | // Read reads the key and returns the value.
335 | // If the key is available in the cache, Read won't touch the disk.
336 | // If the key is not in the cache, Read will have the side-effect of
337 | // lazily caching the value.
338 | func (d *Diskv) Read(key string) ([]byte, error) {
339 | rc, err := d.ReadStream(key, false)
340 | if err != nil {
341 | return []byte{}, err
342 | }
343 | defer rc.Close()
344 | return ioutil.ReadAll(rc)
345 | }
346 |
347 | // ReadString reads the key and returns a string value
348 | // In case of error, an empty string is returned
349 | func (d *Diskv) ReadString(key string) string {
350 | value, _ := d.Read(key)
351 | return string(value)
352 | }
353 |
354 | // ReadStream reads the key and returns the value (data) as an io.ReadCloser.
355 | // If the value is cached from a previous read, and direct is false,
356 | // ReadStream will use the cached value. Otherwise, it will return a handle to
357 | // the file on disk, and cache the data on read.
358 | //
359 | // If direct is true, ReadStream will lazily delete any cached value for the
360 | // key, and return a direct handle to the file on disk.
361 | //
362 | // If compression is enabled, ReadStream taps into the io.Reader stream prior
363 | // to decompression, and caches the compressed data.
364 | func (d *Diskv) ReadStream(key string, direct bool) (io.ReadCloser, error) {
365 |
366 | pathKey := d.transform(key)
367 | d.mu.RLock()
368 | defer d.mu.RUnlock()
369 |
370 | if val, ok := d.cache[key]; ok {
371 | if !direct {
372 | buf := bytes.NewReader(val)
373 | if d.Compression != nil {
374 | return d.Compression.Reader(buf)
375 | }
376 | return ioutil.NopCloser(buf), nil
377 | }
378 |
379 | go func() {
380 | d.mu.Lock()
381 | defer d.mu.Unlock()
382 | d.uncacheWithLock(key, uint64(len(val)))
383 | }()
384 | }
385 |
386 | return d.readWithRLock(pathKey)
387 | }
388 |
389 | // read ignores the cache, and returns an io.ReadCloser representing the
390 | // decompressed data for the given key, streamed from the disk. Clients should
391 | // acquire a read lock on the Diskv and check the cache themselves before
392 | // calling read.
393 | func (d *Diskv) readWithRLock(pathKey *PathKey) (io.ReadCloser, error) {
394 | filename := d.completeFilename(pathKey)
395 |
396 | fi, err := os.Stat(filename)
397 | if err != nil {
398 | return nil, err
399 | }
400 | if fi.IsDir() {
401 | return nil, os.ErrNotExist
402 | }
403 |
404 | f, err := os.Open(filename)
405 | if err != nil {
406 | return nil, err
407 | }
408 |
409 | var r io.Reader
410 | if d.CacheSizeMax > 0 {
411 | r = newSiphon(f, d, pathKey.originalKey)
412 | } else {
413 | r = &closingReader{f}
414 | }
415 |
416 | var rc = io.ReadCloser(ioutil.NopCloser(r))
417 | if d.Compression != nil {
418 | rc, err = d.Compression.Reader(r)
419 | if err != nil {
420 | return nil, err
421 | }
422 | }
423 |
424 | return rc, nil
425 | }
426 |
427 | // closingReader provides a Reader that automatically closes the
428 | // embedded ReadCloser when it reaches EOF
429 | type closingReader struct {
430 | rc io.ReadCloser
431 | }
432 |
433 | func (cr closingReader) Read(p []byte) (int, error) {
434 | n, err := cr.rc.Read(p)
435 | if err == io.EOF {
436 | if closeErr := cr.rc.Close(); closeErr != nil {
437 | return n, closeErr // close must succeed for Read to succeed
438 | }
439 | }
440 | return n, err
441 | }
442 |
443 | // siphon is like a TeeReader: it copies all data read through it to an
444 | // internal buffer, and moves that buffer to the cache at EOF.
445 | type siphon struct {
446 | f *os.File
447 | d *Diskv
448 | key string
449 | buf *bytes.Buffer
450 | }
451 |
452 | // newSiphon constructs a siphoning reader that represents the passed file.
453 | // When a successful series of reads ends in an EOF, the siphon will write
454 | // the buffered data to Diskv's cache under the given key.
455 | func newSiphon(f *os.File, d *Diskv, key string) io.Reader {
456 | return &siphon{
457 | f: f,
458 | d: d,
459 | key: key,
460 | buf: &bytes.Buffer{},
461 | }
462 | }
463 |
464 | // Read implements the io.Reader interface for siphon.
465 | func (s *siphon) Read(p []byte) (int, error) {
466 | n, err := s.f.Read(p)
467 |
468 | if err == nil {
469 | return s.buf.Write(p[0:n]) // Write must succeed for Read to succeed
470 | }
471 |
472 | if err == io.EOF {
473 | s.d.cacheWithoutLock(s.key, s.buf.Bytes()) // cache may fail
474 | if closeErr := s.f.Close(); closeErr != nil {
475 | return n, closeErr // close must succeed for Read to succeed
476 | }
477 | return n, err
478 | }
479 |
480 | return n, err
481 | }
482 |
483 | // Erase synchronously erases the given key from the disk and the cache.
484 | func (d *Diskv) Erase(key string) error {
485 | pathKey := d.transform(key)
486 | d.mu.Lock()
487 | defer d.mu.Unlock()
488 |
489 | d.bustCacheWithLock(key)
490 |
491 | // erase from index
492 | if d.Index != nil {
493 | d.Index.Delete(key)
494 | }
495 |
496 | // erase from disk
497 | filename := d.completeFilename(pathKey)
498 | if s, err := os.Stat(filename); err == nil {
499 | if s.IsDir() {
500 | return errBadKey
501 | }
502 | if err = os.Remove(filename); err != nil {
503 | return err
504 | }
505 | } else {
506 | // Return err as-is so caller can do os.IsNotExist(err).
507 | return err
508 | }
509 |
510 | // clean up and return
511 | d.pruneDirsWithLock(key)
512 | return nil
513 | }
514 |
515 | // EraseAll will delete all of the data from the store, both in the cache and on
516 | // the disk. Note that EraseAll doesn't distinguish diskv-related data from non-
517 | // diskv-related data. Care should be taken to always specify a diskv base
518 | // directory that is exclusively for diskv data.
519 | func (d *Diskv) EraseAll() error {
520 | d.mu.Lock()
521 | defer d.mu.Unlock()
522 | d.cache = make(map[string][]byte)
523 | d.cacheSize = 0
524 | if d.TempDir != "" {
525 | os.RemoveAll(d.TempDir) // errors ignored
526 | }
527 | return os.RemoveAll(d.BasePath)
528 | }
529 |
530 | // Has returns true if the given key exists.
531 | func (d *Diskv) Has(key string) bool {
532 | pathKey := d.transform(key)
533 | d.mu.Lock()
534 | defer d.mu.Unlock()
535 |
536 | if _, ok := d.cache[key]; ok {
537 | return true
538 | }
539 |
540 | filename := d.completeFilename(pathKey)
541 | s, err := os.Stat(filename)
542 | if err != nil {
543 | return false
544 | }
545 | if s.IsDir() {
546 | return false
547 | }
548 |
549 | return true
550 | }
551 |
552 | // Keys returns a channel that will yield every key accessible by the store,
553 | // in undefined order. If a cancel channel is provided, closing it will
554 | // terminate and close the keys channel.
555 | func (d *Diskv) Keys(cancel <-chan struct{}) <-chan string {
556 | return d.KeysPrefix("", cancel)
557 | }
558 |
559 | // KeysPrefix returns a channel that will yield every key accessible by the
560 | // store with the given prefix, in undefined order. If a cancel channel is
561 | // provided, closing it will terminate and close the keys channel. If the
562 | // provided prefix is the empty string, all keys will be yielded.
563 | func (d *Diskv) KeysPrefix(prefix string, cancel <-chan struct{}) <-chan string {
564 | var prepath string
565 | if prefix == "" {
566 | prepath = d.BasePath
567 | } else {
568 | prefixKey := d.transform(prefix)
569 | prepath = d.pathFor(prefixKey)
570 | }
571 | c := make(chan string)
572 | go func() {
573 | filepath.Walk(prepath, d.walker(c, prefix, cancel))
574 | close(c)
575 | }()
576 | return c
577 | }
578 |
579 | // walker returns a function which satisfies the filepath.WalkFunc interface.
580 | // It sends every non-directory file entry down the channel c.
581 | func (d *Diskv) walker(c chan<- string, prefix string, cancel <-chan struct{}) filepath.WalkFunc {
582 | return func(path string, info os.FileInfo, err error) error {
583 | if err != nil {
584 | return err
585 | }
586 |
587 | relPath, _ := filepath.Rel(d.BasePath, path)
588 | dir, file := filepath.Split(relPath)
589 | pathSplit := strings.Split(dir, string(filepath.Separator))
590 | pathSplit = pathSplit[:len(pathSplit)-1]
591 |
592 | pathKey := &PathKey{
593 | Path: pathSplit,
594 | FileName: file,
595 | }
596 |
597 | key := d.InverseTransform(pathKey)
598 |
599 | if info.IsDir() || !strings.HasPrefix(key, prefix) {
600 | return nil // "pass"
601 | }
602 |
603 | select {
604 | case c <- key:
605 | case <-cancel:
606 | return errCanceled
607 | }
608 |
609 | return nil
610 | }
611 | }
612 |
613 | // pathFor returns the absolute path for location on the filesystem where the
614 | // data for the given key will be stored.
615 | func (d *Diskv) pathFor(pathKey *PathKey) string {
616 | return filepath.Join(d.BasePath, filepath.Join(pathKey.Path...))
617 | }
618 |
619 | // ensurePathWithLock is a helper function that generates all necessary
620 | // directories on the filesystem for the given key.
621 | func (d *Diskv) ensurePathWithLock(pathKey *PathKey) error {
622 | return os.MkdirAll(d.pathFor(pathKey), d.PathPerm)
623 | }
624 |
625 | // completeFilename returns the absolute path to the file for the given key.
626 | func (d *Diskv) completeFilename(pathKey *PathKey) string {
627 | return filepath.Join(d.pathFor(pathKey), pathKey.FileName)
628 | }
629 |
630 | // cacheWithLock attempts to cache the given key-value pair in the store's
631 | // cache. It can fail if the value is larger than the cache's maximum size.
632 | func (d *Diskv) cacheWithLock(key string, val []byte) error {
633 | // If the key already exists, delete it.
634 | d.bustCacheWithLock(key)
635 |
636 | valueSize := uint64(len(val))
637 | if err := d.ensureCacheSpaceWithLock(valueSize); err != nil {
638 | return fmt.Errorf("%s; not caching", err)
639 | }
640 |
641 | // be very strict about memory guarantees
642 | if (d.cacheSize + valueSize) > d.CacheSizeMax {
643 | panic(fmt.Sprintf("failed to make room for value (%d/%d)", valueSize, d.CacheSizeMax))
644 | }
645 |
646 | d.cache[key] = val
647 | d.cacheSize += valueSize
648 | return nil
649 | }
650 |
651 | // cacheWithoutLock acquires the store's (write) mutex and calls cacheWithLock.
652 | func (d *Diskv) cacheWithoutLock(key string, val []byte) error {
653 | d.mu.Lock()
654 | defer d.mu.Unlock()
655 | return d.cacheWithLock(key, val)
656 | }
657 |
658 | func (d *Diskv) bustCacheWithLock(key string) {
659 | if val, ok := d.cache[key]; ok {
660 | d.uncacheWithLock(key, uint64(len(val)))
661 | }
662 | }
663 |
664 | func (d *Diskv) uncacheWithLock(key string, sz uint64) {
665 | d.cacheSize -= sz
666 | delete(d.cache, key)
667 | }
668 |
669 | // pruneDirsWithLock deletes empty directories in the path walk leading to the
670 | // key k. Typically this function is called after an Erase is made.
671 | func (d *Diskv) pruneDirsWithLock(key string) error {
672 | pathlist := d.transform(key).Path
673 | for i := range pathlist {
674 | dir := filepath.Join(d.BasePath, filepath.Join(pathlist[:len(pathlist)-i]...))
675 |
676 | // thanks to Steven Blenkinsop for this snippet
677 | switch fi, err := os.Stat(dir); true {
678 | case err != nil:
679 | return err
680 | case !fi.IsDir():
681 | panic(fmt.Sprintf("corrupt dirstate at %s", dir))
682 | }
683 |
684 | nlinks, err := filepath.Glob(filepath.Join(dir, "*"))
685 | if err != nil {
686 | return err
687 | } else if len(nlinks) > 0 {
688 | return nil // has subdirs -- do not prune
689 | }
690 | if err = os.Remove(dir); err != nil {
691 | return err
692 | }
693 | }
694 |
695 | return nil
696 | }
697 |
698 | // ensureCacheSpaceWithLock deletes entries from the cache in arbitrary order
699 | // until the cache has at least valueSize bytes available.
700 | func (d *Diskv) ensureCacheSpaceWithLock(valueSize uint64) error {
701 | if valueSize > d.CacheSizeMax {
702 | return fmt.Errorf("value size (%d bytes) too large for cache (%d bytes)", valueSize, d.CacheSizeMax)
703 | }
704 |
705 | safe := func() bool { return (d.cacheSize + valueSize) <= d.CacheSizeMax }
706 |
707 | for key, val := range d.cache {
708 | if safe() {
709 | break
710 | }
711 |
712 | d.uncacheWithLock(key, uint64(len(val)))
713 | }
714 |
715 | if !safe() {
716 | panic(fmt.Sprintf("%d bytes still won't fit in the cache! (max %d bytes)", valueSize, d.CacheSizeMax))
717 | }
718 |
719 | return nil
720 | }
721 |
722 | // nopWriteCloser wraps an io.Writer and provides a no-op Close method to
723 | // satisfy the io.WriteCloser interface.
724 | type nopWriteCloser struct {
725 | io.Writer
726 | }
727 |
728 | func (wc *nopWriteCloser) Write(p []byte) (int, error) { return wc.Writer.Write(p) }
729 | func (wc *nopWriteCloser) Close() error { return nil }
730 |
--------------------------------------------------------------------------------
/examples/advanced-transform/advanced-transform.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 | "strings"
6 |
7 | "github.com/peterbourgon/diskv/v3"
8 | )
9 |
10 | func AdvancedTransformExample(key string) *diskv.PathKey {
11 | path := strings.Split(key, "/")
12 | last := len(path) - 1
13 | return &diskv.PathKey{
14 | Path: path[:last],
15 | FileName: path[last] + ".txt",
16 | }
17 | }
18 |
19 | // If you provide an AdvancedTransform, you must also provide its
20 | // inverse:
21 |
22 | func InverseTransformExample(pathKey *diskv.PathKey) (key string) {
23 | txt := pathKey.FileName[len(pathKey.FileName)-4:]
24 | if txt != ".txt" {
25 | panic("Invalid file found in storage folder!")
26 | }
27 | return strings.Join(pathKey.Path, "/") + pathKey.FileName[:len(pathKey.FileName)-4]
28 | }
29 |
30 | func main() {
31 | d := diskv.New(diskv.Options{
32 | BasePath: "my-data-dir",
33 | AdvancedTransform: AdvancedTransformExample,
34 | InverseTransform: InverseTransformExample,
35 | CacheSizeMax: 1024 * 1024,
36 | })
37 | // Write some text to the key "alpha/beta/gamma".
38 | key := "alpha/beta/gamma"
39 | d.WriteString(key, "¡Hola!") // will be stored in "/alpha/beta/gamma.txt"
40 | fmt.Println(d.ReadString("alpha/beta/gamma"))
41 | }
42 |
--------------------------------------------------------------------------------
/examples/content-addressable-store/cas.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "crypto/md5"
5 | "fmt"
6 | "io"
7 |
8 | "github.com/peterbourgon/diskv/v3"
9 | )
10 |
11 | const transformBlockSize = 2 // grouping of chars per directory depth
12 |
13 | func blockTransform(s string) []string {
14 | var (
15 | sliceSize = len(s) / transformBlockSize
16 | pathSlice = make([]string, sliceSize)
17 | )
18 | for i := 0; i < sliceSize; i++ {
19 | from, to := i*transformBlockSize, (i*transformBlockSize)+transformBlockSize
20 | pathSlice[i] = s[from:to]
21 | }
22 | return pathSlice
23 | }
24 |
25 | func main() {
26 | d := diskv.New(diskv.Options{
27 | BasePath: "data",
28 | Transform: blockTransform,
29 | CacheSizeMax: 1024 * 1024, // 1MB
30 | })
31 |
32 | for _, valueStr := range []string{
33 | "I am the very model of a modern Major-General",
34 | "I've information vegetable, animal, and mineral",
35 | "I know the kings of England, and I quote the fights historical",
36 | "From Marathon to Waterloo, in order categorical",
37 | "I'm very well acquainted, too, with matters mathematical",
38 | "I understand equations, both the simple and quadratical",
39 | "About binomial theorem I'm teeming with a lot o' news",
40 | "With many cheerful facts about the square of the hypotenuse",
41 | } {
42 | d.Write(md5sum(valueStr), []byte(valueStr))
43 | }
44 |
45 | var keyCount int
46 | for key := range d.Keys(nil) {
47 | val, err := d.Read(key)
48 | if err != nil {
49 | panic(fmt.Sprintf("key %s had no value", key))
50 | }
51 | fmt.Printf("%s: %s\n", key, val)
52 | keyCount++
53 | }
54 | fmt.Printf("%d total keys\n", keyCount)
55 |
56 | // d.EraseAll() // leave it commented out to see how data is kept on disk
57 | }
58 |
59 | func md5sum(s string) string {
60 | h := md5.New()
61 | io.WriteString(h, s)
62 | return fmt.Sprintf("%x", h.Sum(nil))
63 | }
64 |
--------------------------------------------------------------------------------
/examples/git-like-store/git-like-store.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | /* This example uses a more advanced transform function that simulates a bit
4 | how Git stores objects:
5 |
6 | * places hash-like keys under the objects directory
7 | * any other key is placed in the base directory. If the key
8 | * contains slashes, these are converted to subdirectories
9 |
10 | */
11 |
12 | import (
13 | "fmt"
14 | "regexp"
15 | "strings"
16 |
17 | "github.com/peterbourgon/diskv/v3"
18 | )
19 |
20 | var hex40 = regexp.MustCompile("[0-9a-fA-F]{40}")
21 |
22 | func hexTransform(s string) *diskv.PathKey {
23 | if hex40.MatchString(s) {
24 | return &diskv.PathKey{Path: []string{"objects", s[0:2]},
25 | FileName: s,
26 | }
27 | }
28 |
29 | folders := strings.Split(s, "/")
30 | lfolders := len(folders)
31 | if lfolders > 1 {
32 | return &diskv.PathKey{Path: folders[:lfolders-1],
33 | FileName: folders[lfolders-1],
34 | }
35 | }
36 |
37 | return &diskv.PathKey{Path: []string{},
38 | FileName: s,
39 | }
40 | }
41 |
42 | func hexInverseTransform(pathKey *diskv.PathKey) string {
43 | if hex40.MatchString(pathKey.FileName) {
44 | return pathKey.FileName
45 | }
46 |
47 | if len(pathKey.Path) == 0 {
48 | return pathKey.FileName
49 | }
50 |
51 | return strings.Join(pathKey.Path, "/") + "/" + pathKey.FileName
52 | }
53 |
54 | func main() {
55 | d := diskv.New(diskv.Options{
56 | BasePath: "my-data-dir",
57 | AdvancedTransform: hexTransform,
58 | InverseTransform: hexInverseTransform,
59 | CacheSizeMax: 1024 * 1024,
60 | })
61 |
62 | // Write some text to the key "alpha/beta/gamma".
63 | key := "1bd88421b055327fcc8660c76c4894c4ea4c95d7"
64 | d.WriteString(key, "¡Hola!") // will be stored in "/objects/1b/1bd88421b055327fcc8660c76c4894c4ea4c95d7"
65 |
66 | d.WriteString("refs/heads/master", "some text") // will be stored in "/refs/heads/master"
67 |
68 | fmt.Println("Enumerating All keys:")
69 | c := d.Keys(nil)
70 |
71 | for key := range c {
72 | value := d.ReadString(key)
73 | fmt.Printf("Key: %s, Value: %s\n", key, value)
74 | }
75 | }
76 |
--------------------------------------------------------------------------------
/examples/super-simple-store/super-simple-store.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "fmt"
5 |
6 | "github.com/peterbourgon/diskv/v3"
7 | )
8 |
9 | func main() {
10 | d := diskv.New(diskv.Options{
11 | BasePath: "my-diskv-data-directory",
12 | CacheSizeMax: 1024 * 1024, // 1MB
13 | })
14 |
15 | key := "alpha"
16 | if err := d.Write(key, []byte{'1', '2', '3'}); err != nil {
17 | panic(err)
18 | }
19 |
20 | value, err := d.Read(key)
21 | if err != nil {
22 | panic(err)
23 | }
24 | fmt.Printf("%v\n", value)
25 |
26 | if err := d.Erase(key); err != nil {
27 | panic(err)
28 | }
29 | }
30 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/peterbourgon/diskv/v3
2 |
3 | go 1.12
4 |
5 | require github.com/google/btree v1.0.0
6 |
--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
1 | github.com/google/btree v1.0.0 h1:0udJVsspx3VBr5FwtLhQQtuAsVc79tTq0ocGIPAU6qo=
2 | github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ=
3 |
--------------------------------------------------------------------------------
/import_test.go:
--------------------------------------------------------------------------------
1 | package diskv_test
2 |
3 | import (
4 | "bytes"
5 | "io/ioutil"
6 | "os"
7 |
8 | "github.com/peterbourgon/diskv/v3"
9 |
10 | "testing"
11 | )
12 |
13 | func TestImportMove(t *testing.T) {
14 | b := []byte(`0123456789`)
15 | f, err := ioutil.TempFile("", "temp-test")
16 | if err != nil {
17 | t.Fatal(err)
18 | }
19 | if _, err := f.Write(b); err != nil {
20 | t.Fatal(err)
21 | }
22 | f.Close()
23 |
24 | d := diskv.New(diskv.Options{
25 | BasePath: "test-import-move",
26 | })
27 | defer d.EraseAll()
28 |
29 | key := "key"
30 |
31 | if err := d.Write(key, []byte(`TBD`)); err != nil {
32 | t.Fatal(err)
33 | }
34 |
35 | if err := d.Import(f.Name(), key, true); err != nil {
36 | t.Fatal(err)
37 | }
38 |
39 | if _, err := os.Stat(f.Name()); err == nil || !os.IsNotExist(err) {
40 | t.Errorf("expected temp file to be gone, but err = %v", err)
41 | }
42 |
43 | if !d.Has(key) {
44 | t.Errorf("%q not present", key)
45 | }
46 |
47 | if buf, err := d.Read(key); err != nil || bytes.Compare(b, buf) != 0 {
48 | t.Errorf("want %q, have %q (err = %v)", string(b), string(buf), err)
49 | }
50 | }
51 |
52 | func TestImportCopy(t *testing.T) {
53 | b := []byte(`¡åéîòü!`)
54 |
55 | f, err := ioutil.TempFile("", "temp-test")
56 | if err != nil {
57 | t.Fatal(err)
58 | }
59 | if _, err := f.Write(b); err != nil {
60 | t.Fatal(err)
61 | }
62 | f.Close()
63 |
64 | d := diskv.New(diskv.Options{
65 | BasePath: "test-import-copy",
66 | })
67 | defer d.EraseAll()
68 |
69 | if err := d.Import(f.Name(), "key", false); err != nil {
70 | t.Fatal(err)
71 | }
72 |
73 | if _, err := os.Stat(f.Name()); err != nil {
74 | t.Errorf("expected temp file to remain, but got err = %v", err)
75 | }
76 | }
77 |
--------------------------------------------------------------------------------
/index.go:
--------------------------------------------------------------------------------
1 | package diskv
2 |
3 | import (
4 | "sync"
5 |
6 | "github.com/google/btree"
7 | )
8 |
9 | // Index is a generic interface for things that can
10 | // provide an ordered list of keys.
11 | type Index interface {
12 | Initialize(less LessFunction, keys <-chan string)
13 | Insert(key string)
14 | Delete(key string)
15 | Keys(from string, n int) []string
16 | }
17 |
18 | // LessFunction is used to initialize an Index of keys in a specific order.
19 | type LessFunction func(string, string) bool
20 |
21 | // btreeString is a custom data type that satisfies the BTree Less interface,
22 | // making the strings it wraps sortable by the BTree package.
23 | type btreeString struct {
24 | s string
25 | l LessFunction
26 | }
27 |
28 | // Less satisfies the BTree.Less interface using the btreeString's LessFunction.
29 | func (s btreeString) Less(i btree.Item) bool {
30 | return s.l(s.s, i.(btreeString).s)
31 | }
32 |
33 | // BTreeIndex is an implementation of the Index interface using google/btree.
34 | type BTreeIndex struct {
35 | sync.RWMutex
36 | LessFunction
37 | *btree.BTree
38 | }
39 |
40 | // Initialize populates the BTree tree with data from the keys channel,
41 | // according to the passed less function. It's destructive to the BTreeIndex.
42 | func (i *BTreeIndex) Initialize(less LessFunction, keys <-chan string) {
43 | i.Lock()
44 | defer i.Unlock()
45 | i.LessFunction = less
46 | i.BTree = rebuild(less, keys)
47 | }
48 |
49 | // Insert inserts the given key (only) into the BTree tree.
50 | func (i *BTreeIndex) Insert(key string) {
51 | i.Lock()
52 | defer i.Unlock()
53 | if i.BTree == nil || i.LessFunction == nil {
54 | panic("uninitialized index")
55 | }
56 | i.BTree.ReplaceOrInsert(btreeString{s: key, l: i.LessFunction})
57 | }
58 |
59 | // Delete removes the given key (only) from the BTree tree.
60 | func (i *BTreeIndex) Delete(key string) {
61 | i.Lock()
62 | defer i.Unlock()
63 | if i.BTree == nil || i.LessFunction == nil {
64 | panic("uninitialized index")
65 | }
66 | i.BTree.Delete(btreeString{s: key, l: i.LessFunction})
67 | }
68 |
69 | // Keys yields a maximum of n keys in order. If the passed 'from' key is empty,
70 | // Keys will return the first n keys. If the passed 'from' key is non-empty, the
71 | // first key in the returned slice will be the key that immediately follows the
72 | // passed key, in key order.
73 | func (i *BTreeIndex) Keys(from string, n int) []string {
74 | i.RLock()
75 | defer i.RUnlock()
76 |
77 | if i.BTree == nil || i.LessFunction == nil {
78 | panic("uninitialized index")
79 | }
80 |
81 | if i.BTree.Len() <= 0 {
82 | return []string{}
83 | }
84 |
85 | btreeFrom := btreeString{s: from, l: i.LessFunction}
86 | skipFirst := true
87 | if len(from) <= 0 || !i.BTree.Has(btreeFrom) {
88 | // no such key, so fabricate an always-smallest item
89 | btreeFrom = btreeString{s: "", l: func(string, string) bool { return true }}
90 | skipFirst = false
91 | }
92 |
93 | keys := []string{}
94 | iterator := func(i btree.Item) bool {
95 | keys = append(keys, i.(btreeString).s)
96 | return len(keys) < n
97 | }
98 | i.BTree.AscendGreaterOrEqual(btreeFrom, iterator)
99 |
100 | if skipFirst && len(keys) > 0 {
101 | keys = keys[1:]
102 | }
103 |
104 | return keys
105 | }
106 |
107 | // rebuildIndex does the work of regenerating the index
108 | // with the given keys.
109 | func rebuild(less LessFunction, keys <-chan string) *btree.BTree {
110 | tree := btree.New(2)
111 | for key := range keys {
112 | tree.ReplaceOrInsert(btreeString{s: key, l: less})
113 | }
114 | return tree
115 | }
116 |
--------------------------------------------------------------------------------
/index_test.go:
--------------------------------------------------------------------------------
1 | package diskv
2 |
3 | import (
4 | "bytes"
5 | "reflect"
6 | "testing"
7 | "time"
8 | )
9 |
10 | func strLess(a, b string) bool { return a < b }
11 |
12 | func cmpStrings(a, b []string) bool {
13 | if len(a) != len(b) {
14 | return false
15 | }
16 | for i := 0; i < len(a); i++ {
17 | if a[i] != b[i] {
18 | return false
19 | }
20 | }
21 | return true
22 | }
23 |
24 | func (d *Diskv) isIndexed(key string) bool {
25 | if d.Index == nil {
26 | return false
27 | }
28 |
29 | for _, got := range d.Index.Keys("", 1000) {
30 | if got == key {
31 | return true
32 | }
33 | }
34 | return false
35 | }
36 |
37 | func TestIndexOrder(t *testing.T) {
38 | d := New(Options{
39 | BasePath: "index-test",
40 | CacheSizeMax: 1024,
41 | Index: &BTreeIndex{},
42 | IndexLess: strLess,
43 | })
44 | defer d.EraseAll()
45 |
46 | v := []byte{'1', '2', '3'}
47 | d.Write("a", v)
48 | if !d.isIndexed("a") {
49 | t.Fatalf("'a' not indexed after write")
50 | }
51 | d.Write("1", v)
52 | d.Write("m", v)
53 | d.Write("-", v)
54 | d.Write("A", v)
55 |
56 | expectedKeys := []string{"-", "1", "A", "a", "m"}
57 | keys := []string{}
58 | for _, key := range d.Index.Keys("", 100) {
59 | keys = append(keys, key)
60 | }
61 |
62 | if !cmpStrings(keys, expectedKeys) {
63 | t.Fatalf("got %s, expected %s", keys, expectedKeys)
64 | }
65 | }
66 |
67 | func TestIndexLoad(t *testing.T) {
68 | d1 := New(Options{
69 | BasePath: "index-test",
70 | CacheSizeMax: 1024,
71 | })
72 | defer d1.EraseAll()
73 |
74 | val := []byte{'1', '2', '3'}
75 | keys := []string{"a", "b", "c", "d", "e", "f", "g"}
76 | for _, key := range keys {
77 | d1.Write(key, val)
78 | }
79 |
80 | d2 := New(Options{
81 | BasePath: "index-test",
82 | CacheSizeMax: 1024,
83 | Index: &BTreeIndex{},
84 | IndexLess: strLess,
85 | })
86 | defer d2.EraseAll()
87 |
88 | // check d2 has properly loaded existing d1 data
89 | for _, key := range keys {
90 | if !d2.isIndexed(key) {
91 | t.Fatalf("key '%s' not indexed on secondary", key)
92 | }
93 | }
94 |
95 | // cache one
96 | if readValue, err := d2.Read(keys[0]); err != nil {
97 | t.Fatalf("%s", err)
98 | } else if bytes.Compare(val, readValue) != 0 {
99 | t.Fatalf("%s: got %s, expected %s", keys[0], readValue, val)
100 | }
101 |
102 | // make sure it got cached
103 | for i := 0; i < 10 && !d2.isCached(keys[0]); i++ {
104 | time.Sleep(10 * time.Millisecond)
105 | }
106 | if !d2.isCached(keys[0]) {
107 | t.Fatalf("key '%s' not cached", keys[0])
108 | }
109 |
110 | // kill the disk
111 | d1.EraseAll()
112 |
113 | // cached value should still be there in the second
114 | if readValue, err := d2.Read(keys[0]); err != nil {
115 | t.Fatalf("%s", err)
116 | } else if bytes.Compare(val, readValue) != 0 {
117 | t.Fatalf("%s: got %s, expected %s", keys[0], readValue, val)
118 | }
119 |
120 | // but not in the original
121 | if _, err := d1.Read(keys[0]); err == nil {
122 | t.Fatalf("expected error reading from flushed store")
123 | }
124 | }
125 |
126 | func TestIndexKeysEmptyFrom(t *testing.T) {
127 | d := New(Options{
128 | BasePath: "index-test",
129 | CacheSizeMax: 1024,
130 | Index: &BTreeIndex{},
131 | IndexLess: strLess,
132 | })
133 | defer d.EraseAll()
134 |
135 | for _, k := range []string{"a", "c", "z", "b", "x", "b", "y"} {
136 | d.Write(k, []byte("1"))
137 | }
138 |
139 | want := []string{"a", "b", "c", "x", "y", "z"}
140 | have := d.Index.Keys("", 99)
141 | if !reflect.DeepEqual(want, have) {
142 | t.Errorf("want %v, have %v", want, have)
143 | }
144 | }
145 |
146 | func TestBadKeys(t *testing.T) {
147 | d := New(Options{
148 | BasePath: "index-test",
149 | CacheSizeMax: 1024,
150 | Index: &BTreeIndex{},
151 | IndexLess: strLess,
152 | })
153 | defer d.EraseAll()
154 |
155 | for _, k := range []string{"a/a"} {
156 | err := d.Write(k, []byte("1"))
157 | if err != errBadKey {
158 | t.Errorf("Expected bad key error, got: %v", err)
159 | }
160 | }
161 | }
162 |
--------------------------------------------------------------------------------
/issues_test.go:
--------------------------------------------------------------------------------
1 | package diskv
2 |
3 | import (
4 | "bytes"
5 | "io/ioutil"
6 | "math/rand"
7 | "sync"
8 | "testing"
9 | "time"
10 | )
11 |
12 | // ReadStream from cache shouldn't panic on a nil dereference from a nonexistent
13 | // Compression :)
14 | func TestIssue2A(t *testing.T) {
15 | d := New(Options{
16 | BasePath: "test-issue-2a",
17 | CacheSizeMax: 1024,
18 | })
19 | defer d.EraseAll()
20 |
21 | input := "abcdefghijklmnopqrstuvwxy"
22 | key, writeBuf, sync := "a", bytes.NewBufferString(input), false
23 | if err := d.WriteStream(key, writeBuf, sync); err != nil {
24 | t.Fatal(err)
25 | }
26 |
27 | for i := 0; i < 2; i++ {
28 | began := time.Now()
29 | rc, err := d.ReadStream(key, false)
30 | if err != nil {
31 | t.Fatal(err)
32 | }
33 | buf, err := ioutil.ReadAll(rc)
34 | if err != nil {
35 | t.Fatal(err)
36 | }
37 | if !cmpBytes(buf, []byte(input)) {
38 | t.Fatalf("read #%d: '%s' != '%s'", i+1, string(buf), input)
39 | }
40 | rc.Close()
41 | t.Logf("read #%d in %s", i+1, time.Since(began))
42 | }
43 | }
44 |
45 | // ReadStream on a key that resolves to a directory should return an error.
46 | func TestIssue2B(t *testing.T) {
47 | blockTransform := func(s string) []string {
48 | transformBlockSize := 3
49 | sliceSize := len(s) / transformBlockSize
50 | pathSlice := make([]string, sliceSize)
51 | for i := 0; i < sliceSize; i++ {
52 | from, to := i*transformBlockSize, (i*transformBlockSize)+transformBlockSize
53 | pathSlice[i] = s[from:to]
54 | }
55 | return pathSlice
56 | }
57 |
58 | d := New(Options{
59 | BasePath: "test-issue-2b",
60 | Transform: blockTransform,
61 | CacheSizeMax: 0,
62 | })
63 | defer d.EraseAll()
64 |
65 | v := []byte{'1', '2', '3'}
66 | if err := d.Write("abcabc", v); err != nil {
67 | t.Fatal(err)
68 | }
69 |
70 | _, err := d.ReadStream("abc", false)
71 | if err == nil {
72 | t.Fatal("ReadStream('abc') should return error")
73 | }
74 | t.Logf("ReadStream('abc') returned error: %v", err)
75 | }
76 |
77 | // Ensure ReadStream with direct=true isn't racy.
78 | func TestIssue17(t *testing.T) {
79 | var (
80 | basePath = "test-data"
81 | )
82 |
83 | dWrite := New(Options{
84 | BasePath: basePath,
85 | CacheSizeMax: 0,
86 | })
87 | defer dWrite.EraseAll()
88 |
89 | dRead := New(Options{
90 | BasePath: basePath,
91 | CacheSizeMax: 50,
92 | })
93 |
94 | cases := map[string]string{
95 | "a": `1234567890`,
96 | "b": `2345678901`,
97 | "c": `3456789012`,
98 | "d": `4567890123`,
99 | "e": `5678901234`,
100 | }
101 |
102 | for k, v := range cases {
103 | if err := dWrite.Write(k, []byte(v)); err != nil {
104 | t.Fatalf("during write: %s", err)
105 | }
106 | dRead.Read(k) // ensure it's added to cache
107 | }
108 |
109 | var wg sync.WaitGroup
110 | start := make(chan struct{})
111 | for k, v := range cases {
112 | wg.Add(1)
113 | go func(k, v string) {
114 | <-start
115 | dRead.ReadStream(k, true)
116 | wg.Done()
117 | }(k, v)
118 | }
119 | close(start)
120 | wg.Wait()
121 | }
122 |
123 | // Test for issue #40, where acquiring two stream readers on the same k/v pair
124 | // caused the value to be written into the cache twice, messing up the
125 | // size calculations.
126 | func TestIssue40(t *testing.T) {
127 | var (
128 | basePath = "test-data"
129 | )
130 | // Simplest transform function: put all the data files into the base dir.
131 | flatTransform := func(s string) []string { return []string{} }
132 |
133 | // Initialize a new diskv store, rooted at "my-data-dir",
134 | // with a 100 byte cache.
135 | d := New(Options{
136 | BasePath: basePath,
137 | Transform: flatTransform,
138 | CacheSizeMax: 100,
139 | })
140 |
141 | defer d.EraseAll()
142 |
143 | // Write a 50 byte value, filling the cache half-way
144 | k1 := "key1"
145 | d1 := make([]byte, 50)
146 | rand.Read(d1)
147 | d.Write(k1, d1)
148 |
149 | // Get *two* read streams on it. Because the key is not yet in the cache,
150 | // and will not be in the cache until a stream is fully read, both
151 | // readers use the 'siphon' object, which always writes to the cache
152 | // after reading.
153 | s1, err := d.ReadStream(k1, false)
154 | if err != nil {
155 | t.Fatal(err)
156 | }
157 | s2, err := d.ReadStream(k1, false)
158 | if err != nil {
159 | t.Fatal(err)
160 | }
161 | // When each stream is drained, the underlying siphon will write
162 | // the value into the cache's map and increment the cache size.
163 | // This means we will have 1 entry in the cache map
164 | // ("key1" mapping to a 50 byte slice) but the cache size will be 100,
165 | // because the buggy code does not check if an entry already exists
166 | // in the map.
167 | // s1 drains:
168 | // cache[k] = v
169 | // cacheSize += len(v)
170 | // s2 drains:
171 | // cache[k] = v /* overwrites existing */
172 | // cacheSize += len(v) /* blindly adds to the cache size */
173 | ioutil.ReadAll(s1)
174 | ioutil.ReadAll(s2)
175 |
176 | // Now write a different k/v pair, with a 60 byte array.
177 | k2 := "key2"
178 | d2 := make([]byte, 60)
179 | rand.Read(d2)
180 | d.Write(k2, d2)
181 | // The act of reading the k/v pair back out causes it to be cached.
182 | // Because the cache is only 100 bytes, it needs to delete existing
183 | // entries to make room.
184 | // If the cache is buggy, it will delete the single 50-byte entry
185 | // from the cache map & decrement cacheSize by 50... but because
186 | // cacheSize was improperly incremented twice earlier, this will
187 | // leave us with no entries in the cacheMap but with cacheSize==50.
188 | // Since CacheSizeMax-cacheSize (100-50) is less than 60, there
189 | // is no room in the cache for this entry and it panics.
190 | d.Read(k2)
191 | }
192 |
--------------------------------------------------------------------------------
/keys_test.go:
--------------------------------------------------------------------------------
1 | package diskv
2 |
3 | import (
4 | "reflect"
5 | "runtime"
6 | "strings"
7 | "testing"
8 | )
9 |
10 | var (
11 | keysTestData = map[string]string{
12 | "ab01cd01": "When we started building CoreOS",
13 | "ab01cd02": "we looked at all the various components available to us",
14 | "ab01cd03": "re-using the best tools",
15 | "ef01gh04": "and building the ones that did not exist",
16 | "ef02gh05": "We believe strongly in the Unix philosophy",
17 | "xxxxxxxx": "tools should be independently useful",
18 | }
19 |
20 | prefixes = []string{
21 | "", // all
22 | "a",
23 | "ab",
24 | "ab0",
25 | "ab01",
26 | "ab01cd0",
27 | "ab01cd01",
28 | "ab01cd01x", // none
29 | "b", // none
30 | "b0", // none
31 | "0", // none
32 | "01", // none
33 | "e",
34 | "ef",
35 | "efx", // none
36 | "ef01gh0",
37 | "ef01gh04",
38 | "ef01gh05",
39 | "ef01gh06", // none
40 | }
41 | )
42 |
43 | func TestKeysFlat(t *testing.T) {
44 | transform := func(s string) []string {
45 | if s == "" {
46 | t.Fatalf(`transform should not be called with ""`)
47 | }
48 | return []string{}
49 | }
50 | d := New(Options{
51 | BasePath: "test-data",
52 | Transform: transform,
53 | })
54 | defer d.EraseAll()
55 |
56 | for k, v := range keysTestData {
57 | d.Write(k, []byte(v))
58 | }
59 |
60 | checkKeys(t, d.Keys(nil), keysTestData)
61 | }
62 |
63 | func TestKeysNested(t *testing.T) {
64 | d := New(Options{
65 | BasePath: "test-data",
66 | Transform: blockTransform(2),
67 | })
68 | defer d.EraseAll()
69 |
70 | for k, v := range keysTestData {
71 | d.Write(k, []byte(v))
72 | }
73 |
74 | checkKeys(t, d.Keys(nil), keysTestData)
75 | }
76 |
77 | func TestKeysPrefixFlat(t *testing.T) {
78 | d := New(Options{
79 | BasePath: "test-data",
80 | })
81 | defer d.EraseAll()
82 |
83 | for k, v := range keysTestData {
84 | d.Write(k, []byte(v))
85 | }
86 |
87 | for _, prefix := range prefixes {
88 | checkKeys(t, d.KeysPrefix(prefix, nil), filterPrefix(keysTestData, prefix))
89 | }
90 | }
91 |
92 | func TestKeysPrefixNested(t *testing.T) {
93 | d := New(Options{
94 | BasePath: "test-data",
95 | Transform: blockTransform(2),
96 | })
97 | defer d.EraseAll()
98 |
99 | for k, v := range keysTestData {
100 | d.Write(k, []byte(v))
101 | }
102 |
103 | for _, prefix := range prefixes {
104 | checkKeys(t, d.KeysPrefix(prefix, nil), filterPrefix(keysTestData, prefix))
105 | }
106 | }
107 |
108 | func TestKeysCancel(t *testing.T) {
109 | d := New(Options{
110 | BasePath: "test-data",
111 | })
112 | defer d.EraseAll()
113 |
114 | for k, v := range keysTestData {
115 | d.Write(k, []byte(v))
116 | }
117 |
118 | var (
119 | cancel = make(chan struct{})
120 | received = 0
121 | cancelAfter = len(keysTestData) / 2
122 | )
123 |
124 | for key := range d.Keys(cancel) {
125 | received++
126 |
127 | if received >= cancelAfter {
128 | close(cancel)
129 | runtime.Gosched() // allow walker to detect cancel
130 | }
131 |
132 | t.Logf("received %d: %q", received, key)
133 | }
134 |
135 | if want, have := cancelAfter, received; want != have {
136 | t.Errorf("want %d, have %d", want, have)
137 | }
138 | }
139 |
140 | func checkKeys(t *testing.T, c <-chan string, want map[string]string) {
141 | for k := range c {
142 | if _, ok := want[k]; !ok {
143 | t.Errorf("%q yielded but not expected", k)
144 | continue
145 | }
146 |
147 | delete(want, k)
148 | t.Logf("%q yielded OK", k)
149 | }
150 |
151 | if len(want) != 0 {
152 | t.Errorf("%d expected key(s) not yielded: %s", len(want), strings.Join(flattenKeys(want), ", "))
153 | }
154 | }
155 |
156 | func blockTransform(blockSize int) func(string) []string {
157 | return func(s string) []string {
158 | var (
159 | sliceSize = len(s) / blockSize
160 | pathSlice = make([]string, sliceSize)
161 | )
162 | for i := 0; i < sliceSize; i++ {
163 | from, to := i*blockSize, (i*blockSize)+blockSize
164 | pathSlice[i] = s[from:to]
165 | }
166 | return pathSlice
167 | }
168 | }
169 |
170 | func filterPrefix(in map[string]string, prefix string) map[string]string {
171 | out := map[string]string{}
172 | for k, v := range in {
173 | if strings.HasPrefix(k, prefix) {
174 | out[k] = v
175 | }
176 | }
177 | return out
178 | }
179 |
180 | func TestFilterPrefix(t *testing.T) {
181 | input := map[string]string{
182 | "all": "",
183 | "and": "",
184 | "at": "",
185 | "available": "",
186 | "best": "",
187 | "building": "",
188 | "components": "",
189 | "coreos": "",
190 | "did": "",
191 | "exist": "",
192 | "looked": "",
193 | "not": "",
194 | "ones": "",
195 | "re-using": "",
196 | "started": "",
197 | "that": "",
198 | "the": "",
199 | "to": "",
200 | "tools": "",
201 | "us": "",
202 | "various": "",
203 | "we": "",
204 | "when": "",
205 | }
206 |
207 | for prefix, want := range map[string]map[string]string{
208 | "a": map[string]string{"all": "", "and": "", "at": "", "available": ""},
209 | "al": map[string]string{"all": ""},
210 | "all": map[string]string{"all": ""},
211 | "alll": map[string]string{},
212 | "c": map[string]string{"components": "", "coreos": ""},
213 | "co": map[string]string{"components": "", "coreos": ""},
214 | "com": map[string]string{"components": ""},
215 | } {
216 | have := filterPrefix(input, prefix)
217 | if !reflect.DeepEqual(want, have) {
218 | t.Errorf("%q: want %v, have %v", prefix, flattenKeys(want), flattenKeys(have))
219 | }
220 | }
221 | }
222 |
223 | func flattenKeys(m map[string]string) []string {
224 | a := make([]string, 0, len(m))
225 | for k := range m {
226 | a = append(a, k)
227 | }
228 | return a
229 | }
230 |
--------------------------------------------------------------------------------
/speed_test.go:
--------------------------------------------------------------------------------
1 | package diskv
2 |
3 | import (
4 | "fmt"
5 | "math/rand"
6 | "testing"
7 | )
8 |
9 | func shuffle(keys []string) {
10 | ints := rand.Perm(len(keys))
11 | for i := range keys {
12 | keys[i], keys[ints[i]] = keys[ints[i]], keys[i]
13 | }
14 | }
15 |
16 | func genValue(size int) []byte {
17 | v := make([]byte, size)
18 | for i := 0; i < size; i++ {
19 | v[i] = uint8((rand.Int() % 26) + 97) // a-z
20 | }
21 | return v
22 | }
23 |
24 | const (
25 | keyCount = 1000
26 | )
27 |
28 | func genKeys() []string {
29 | keys := make([]string, keyCount)
30 | for i := 0; i < keyCount; i++ {
31 | keys[i] = fmt.Sprintf("%d", i)
32 | }
33 | return keys
34 | }
35 |
36 | func (d *Diskv) load(keys []string, val []byte) {
37 | for _, key := range keys {
38 | d.Write(key, val)
39 | }
40 | }
41 |
42 | func benchRead(b *testing.B, size, cachesz int) {
43 | b.StopTimer()
44 | d := New(Options{
45 | BasePath: "speed-test",
46 | CacheSizeMax: uint64(cachesz),
47 | })
48 | defer d.EraseAll()
49 |
50 | keys := genKeys()
51 | value := genValue(size)
52 | d.load(keys, value)
53 | shuffle(keys)
54 | b.SetBytes(int64(size))
55 |
56 | b.StartTimer()
57 | for i := 0; i < b.N; i++ {
58 | _, _ = d.Read(keys[i%len(keys)])
59 | }
60 | b.StopTimer()
61 | }
62 |
63 | func benchWrite(b *testing.B, size int, withIndex bool) {
64 | b.StopTimer()
65 |
66 | options := Options{
67 | BasePath: "speed-test",
68 | CacheSizeMax: 0,
69 | }
70 | if withIndex {
71 | options.Index = &BTreeIndex{}
72 | options.IndexLess = strLess
73 | }
74 |
75 | d := New(options)
76 | defer d.EraseAll()
77 | keys := genKeys()
78 | value := genValue(size)
79 | shuffle(keys)
80 | b.SetBytes(int64(size))
81 |
82 | b.StartTimer()
83 | for i := 0; i < b.N; i++ {
84 | d.Write(keys[i%len(keys)], value)
85 | }
86 | b.StopTimer()
87 | }
88 |
89 | func BenchmarkWrite__32B_NoIndex(b *testing.B) {
90 | benchWrite(b, 32, false)
91 | }
92 |
93 | func BenchmarkWrite__1KB_NoIndex(b *testing.B) {
94 | benchWrite(b, 1024, false)
95 | }
96 |
97 | func BenchmarkWrite__4KB_NoIndex(b *testing.B) {
98 | benchWrite(b, 4096, false)
99 | }
100 |
101 | func BenchmarkWrite_10KB_NoIndex(b *testing.B) {
102 | benchWrite(b, 10240, false)
103 | }
104 |
105 | func BenchmarkWrite__32B_WithIndex(b *testing.B) {
106 | benchWrite(b, 32, true)
107 | }
108 |
109 | func BenchmarkWrite__1KB_WithIndex(b *testing.B) {
110 | benchWrite(b, 1024, true)
111 | }
112 |
113 | func BenchmarkWrite__4KB_WithIndex(b *testing.B) {
114 | benchWrite(b, 4096, true)
115 | }
116 |
117 | func BenchmarkWrite_10KB_WithIndex(b *testing.B) {
118 | benchWrite(b, 10240, true)
119 | }
120 |
121 | func BenchmarkRead__32B_NoCache(b *testing.B) {
122 | benchRead(b, 32, 0)
123 | }
124 |
125 | func BenchmarkRead__1KB_NoCache(b *testing.B) {
126 | benchRead(b, 1024, 0)
127 | }
128 |
129 | func BenchmarkRead__4KB_NoCache(b *testing.B) {
130 | benchRead(b, 4096, 0)
131 | }
132 |
133 | func BenchmarkRead_10KB_NoCache(b *testing.B) {
134 | benchRead(b, 10240, 0)
135 | }
136 |
137 | func BenchmarkRead__32B_WithCache(b *testing.B) {
138 | benchRead(b, 32, keyCount*32*2)
139 | }
140 |
141 | func BenchmarkRead__1KB_WithCache(b *testing.B) {
142 | benchRead(b, 1024, keyCount*1024*2)
143 | }
144 |
145 | func BenchmarkRead__4KB_WithCache(b *testing.B) {
146 | benchRead(b, 4096, keyCount*4096*2)
147 | }
148 |
149 | func BenchmarkRead_10KB_WithCache(b *testing.B) {
150 | benchRead(b, 10240, keyCount*4096*2)
151 | }
152 |
--------------------------------------------------------------------------------
/stream_test.go:
--------------------------------------------------------------------------------
1 | package diskv
2 |
3 | import (
4 | "bytes"
5 | "io/ioutil"
6 | "testing"
7 | )
8 |
9 | func TestBasicStreamCaching(t *testing.T) {
10 | d := New(Options{
11 | BasePath: "test-data",
12 | CacheSizeMax: 1024,
13 | })
14 | defer d.EraseAll()
15 |
16 | input := "a1b2c3"
17 | key, writeBuf, sync := "a", bytes.NewBufferString(input), true
18 | if err := d.WriteStream(key, writeBuf, sync); err != nil {
19 | t.Fatal(err)
20 | }
21 |
22 | if d.isCached(key) {
23 | t.Fatalf("'%s' cached, but shouldn't be (yet)", key)
24 | }
25 |
26 | rc, err := d.ReadStream(key, false)
27 | if err != nil {
28 | t.Fatal(err)
29 | }
30 |
31 | readBuf, err := ioutil.ReadAll(rc)
32 | if err != nil {
33 | t.Fatal(err)
34 | }
35 |
36 | if !cmpBytes(readBuf, []byte(input)) {
37 | t.Fatalf("'%s' != '%s'", string(readBuf), input)
38 | }
39 |
40 | if !d.isCached(key) {
41 | t.Fatalf("'%s' isn't cached, but should be", key)
42 | }
43 | }
44 |
45 | func TestReadStreamDirect(t *testing.T) {
46 | var (
47 | basePath = "test-data"
48 | )
49 | dWrite := New(Options{
50 | BasePath: basePath,
51 | CacheSizeMax: 0,
52 | })
53 | defer dWrite.EraseAll()
54 | dRead := New(Options{
55 | BasePath: basePath,
56 | CacheSizeMax: 1024,
57 | })
58 |
59 | // Write
60 | key, val1, val2 := "a", []byte(`1234567890`), []byte(`aaaaaaaaaa`)
61 | if err := dWrite.Write(key, val1); err != nil {
62 | t.Fatalf("during first write: %s", err)
63 | }
64 |
65 | // First, caching read.
66 | val, err := dRead.Read(key)
67 | if err != nil {
68 | t.Fatalf("during initial read: %s", err)
69 | }
70 | t.Logf("read 1: %s => %s", key, string(val))
71 | if !cmpBytes(val1, val) {
72 | t.Errorf("expected %q, got %q", string(val1), string(val))
73 | }
74 | if !dRead.isCached(key) {
75 | t.Errorf("%q should be cached, but isn't", key)
76 | }
77 |
78 | // Write a different value.
79 | if err := dWrite.Write(key, val2); err != nil {
80 | t.Fatalf("during second write: %s", err)
81 | }
82 |
83 | // Second read, should hit cache and get the old value.
84 | val, err = dRead.Read(key)
85 | if err != nil {
86 | t.Fatalf("during second (cache-hit) read: %s", err)
87 | }
88 | t.Logf("read 2: %s => %s", key, string(val))
89 | if !cmpBytes(val1, val) {
90 | t.Errorf("expected %q, got %q", string(val1), string(val))
91 | }
92 |
93 | // Third, direct read, should get the updated value.
94 | rc, err := dRead.ReadStream(key, true)
95 | if err != nil {
96 | t.Fatalf("during third (direct) read, ReadStream: %s", err)
97 | }
98 | defer rc.Close()
99 | val, err = ioutil.ReadAll(rc)
100 | if err != nil {
101 | t.Fatalf("during third (direct) read, ReadAll: %s", err)
102 | }
103 | t.Logf("read 3: %s => %s", key, string(val))
104 | if !cmpBytes(val2, val) {
105 | t.Errorf("expected %q, got %q", string(val1), string(val))
106 | }
107 |
108 | // Fourth read, should hit cache and get the new value.
109 | val, err = dRead.Read(key)
110 | if err != nil {
111 | t.Fatalf("during fourth (cache-hit) read: %s", err)
112 | }
113 | t.Logf("read 4: %s => %s", key, string(val))
114 | if !cmpBytes(val2, val) {
115 | t.Errorf("expected %q, got %q", string(val1), string(val))
116 | }
117 | }
118 |
--------------------------------------------------------------------------------