├── LICENSE ├── README.md ├── basic_test.go ├── compression.go ├── compression_test.go ├── diskv.go ├── examples ├── advanced-transform │ └── advanced-transform.go ├── content-addressable-store │ └── cas.go ├── git-like-store │ └── git-like-store.go └── super-simple-store │ └── super-simple-store.go ├── go.mod ├── go.sum ├── import_test.go ├── index.go ├── index_test.go ├── issues_test.go ├── keys_test.go ├── speed_test.go └── stream_test.go /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011-2012 Peter Bourgon 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # What is diskv? 2 | 3 | Diskv (disk-vee) is a simple, persistent key-value store written in the Go 4 | language. It starts with an incredibly simple API for storing arbitrary data on 5 | a filesystem by key, and builds several layers of performance-enhancing 6 | abstraction on top. The end result is a conceptually simple, but highly 7 | performant, disk-backed storage system. 8 | 9 | [![Build Status][1]][2] 10 | 11 | [1]: https://drone.io/github.com/peterbourgon/diskv/status.png 12 | [2]: https://drone.io/github.com/peterbourgon/diskv/latest 13 | 14 | 15 | # Installing 16 | 17 | Install [Go 1][3], either [from source][4] or [with a prepackaged binary][5]. 18 | Then, 19 | 20 | ```bash 21 | $ go get github.com/peterbourgon/diskv/v3 22 | ``` 23 | 24 | [3]: http://golang.org 25 | [4]: http://golang.org/doc/install/source 26 | [5]: http://golang.org/doc/install 27 | 28 | 29 | # Usage 30 | 31 | ```go 32 | package main 33 | 34 | import ( 35 | "fmt" 36 | "github.com/peterbourgon/diskv/v3" 37 | ) 38 | 39 | func main() { 40 | // Simplest transform function: put all the data files into the base dir. 41 | flatTransform := func(s string) []string { return []string{} } 42 | 43 | // Initialize a new diskv store, rooted at "my-data-dir", with a 1MB cache. 44 | d := diskv.New(diskv.Options{ 45 | BasePath: "my-data-dir", 46 | Transform: flatTransform, 47 | CacheSizeMax: 1024 * 1024, 48 | }) 49 | 50 | // Write three bytes to the key "alpha". 51 | key := "alpha" 52 | d.Write(key, []byte{'1', '2', '3'}) 53 | 54 | // Read the value back out of the store. 55 | value, _ := d.Read(key) 56 | fmt.Printf("%v\n", value) 57 | 58 | // Erase the key+value from the store (and the disk). 59 | d.Erase(key) 60 | } 61 | ``` 62 | 63 | More complex examples can be found in the "examples" subdirectory. 64 | 65 | 66 | # Theory 67 | 68 | ## Basic idea 69 | 70 | At its core, diskv is a map of a key (`string`) to arbitrary data (`[]byte`). 71 | The data is written to a single file on disk, with the same name as the key. 72 | The key determines where that file will be stored, via a user-provided 73 | `TransformFunc`, which takes a key and returns a slice (`[]string`) 74 | corresponding to a path list where the key file will be stored. The simplest 75 | TransformFunc, 76 | 77 | ```go 78 | func SimpleTransform (key string) []string { 79 | return []string{} 80 | } 81 | ``` 82 | 83 | will place all keys in the same, base directory. The design is inspired by 84 | [Redis diskstore][6]; a TransformFunc which emulates the default diskstore 85 | behavior is available in the content-addressable-storage example. 86 | 87 | [6]: http://groups.google.com/group/redis-db/browse_thread/thread/d444bc786689bde9?pli=1 88 | 89 | **Note** that your TransformFunc should ensure that one valid key doesn't 90 | transform to a subset of another valid key. That is, it shouldn't be possible 91 | to construct valid keys that resolve to directory names. As a concrete example, 92 | if your TransformFunc splits on every 3 characters, then 93 | 94 | ```go 95 | d.Write("abcabc", val) // OK: written to /abc/abc/abcabc 96 | d.Write("abc", val) // Error: attempted write to /abc/abc, but it's a directory 97 | ``` 98 | 99 | This will be addressed in an upcoming version of diskv. 100 | 101 | Probably the most important design principle behind diskv is that your data is 102 | always flatly available on the disk. diskv will never do anything that would 103 | prevent you from accessing, copying, backing up, or otherwise interacting with 104 | your data via common UNIX commandline tools. 105 | 106 | ## Advanced path transformation 107 | 108 | If you need more control over the file name written to disk or if you want to support 109 | slashes in your key name or special characters in the keys, you can use the 110 | AdvancedTransform property. You must supply a function that returns 111 | a special PathKey structure, which is a breakdown of a path and a file name. Strings 112 | returned must be clean of any slashes or special characters: 113 | 114 | ```go 115 | func AdvancedTransformExample(key string) *diskv.PathKey { 116 | path := strings.Split(key, "/") 117 | last := len(path) - 1 118 | return &diskv.PathKey{ 119 | Path: path[:last], 120 | FileName: path[last] + ".txt", 121 | } 122 | } 123 | 124 | // If you provide an AdvancedTransform, you must also provide its 125 | // inverse: 126 | 127 | func InverseTransformExample(pathKey *diskv.PathKey) (key string) { 128 | txt := pathKey.FileName[len(pathKey.FileName)-4:] 129 | if txt != ".txt" { 130 | panic("Invalid file found in storage folder!") 131 | } 132 | return strings.Join(pathKey.Path, "/") + pathKey.FileName[:len(pathKey.FileName)-4] 133 | } 134 | 135 | func main() { 136 | d := diskv.New(diskv.Options{ 137 | BasePath: "my-data-dir", 138 | AdvancedTransform: AdvancedTransformExample, 139 | InverseTransform: InverseTransformExample, 140 | CacheSizeMax: 1024 * 1024, 141 | }) 142 | // Write some text to the key "alpha/beta/gamma". 143 | key := "alpha/beta/gamma" 144 | d.WriteString(key, "¡Hola!") // will be stored in "/alpha/beta/gamma.txt" 145 | fmt.Println(d.ReadString("alpha/beta/gamma")) 146 | } 147 | ``` 148 | 149 | 150 | ## Adding a cache 151 | 152 | An in-memory caching layer is provided by combining the BasicStore 153 | functionality with a simple map structure, and keeping it up-to-date as 154 | appropriate. Since the map structure in Go is not threadsafe, it's combined 155 | with a RWMutex to provide safe concurrent access. 156 | 157 | ## Adding order 158 | 159 | diskv is a key-value store and therefore inherently unordered. An ordering 160 | system can be injected into the store by passing something which satisfies the 161 | diskv.Index interface. (A default implementation, using Google's 162 | [btree][7] package, is provided.) Basically, diskv keeps an ordered (by a 163 | user-provided Less function) index of the keys, which can be queried. 164 | 165 | [7]: https://github.com/google/btree 166 | 167 | ## Adding compression 168 | 169 | Something which implements the diskv.Compression interface may be passed 170 | during store creation, so that all Writes and Reads are filtered through 171 | a compression/decompression pipeline. Several default implementations, 172 | using stdlib compression algorithms, are provided. Note that data is cached 173 | compressed; the cost of decompression is borne with each Read. 174 | 175 | ## Streaming 176 | 177 | diskv also now provides ReadStream and WriteStream methods, to allow very large 178 | data to be handled efficiently. 179 | 180 | 181 | # Future plans 182 | 183 | * Needs plenty of robust testing: huge datasets, etc... 184 | * More thorough benchmarking 185 | * Your suggestions for use-cases I haven't thought of 186 | 187 | 188 | # Credits and contributions 189 | 190 | Original idea, design and implementation: [Peter Bourgon](https://github.com/peterbourgon) 191 | Other collaborations: [Javier Peletier](https://github.com/jpeletier) ([Epic Labs](https://www.epiclabs.io)) 192 | -------------------------------------------------------------------------------- /basic_test.go: -------------------------------------------------------------------------------- 1 | package diskv 2 | 3 | import ( 4 | "bytes" 5 | "errors" 6 | "math/rand" 7 | "regexp" 8 | "strings" 9 | "testing" 10 | "time" 11 | ) 12 | 13 | func cmpBytes(a, b []byte) bool { 14 | if len(a) != len(b) { 15 | return false 16 | } 17 | for i := 0; i < len(a); i++ { 18 | if a[i] != b[i] { 19 | return false 20 | } 21 | } 22 | return true 23 | } 24 | 25 | func (d *Diskv) isCached(key string) bool { 26 | d.mu.RLock() 27 | defer d.mu.RUnlock() 28 | _, ok := d.cache[key] 29 | return ok 30 | } 31 | 32 | func TestWriteReadErase(t *testing.T) { 33 | d := New(Options{ 34 | BasePath: "test-data", 35 | CacheSizeMax: 1024, 36 | }) 37 | defer d.EraseAll() 38 | k, v := "a", []byte{'b'} 39 | if err := d.Write(k, v); err != nil { 40 | t.Fatalf("write: %s", err) 41 | } 42 | if readVal, err := d.Read(k); err != nil { 43 | t.Fatalf("read: %s", err) 44 | } else if bytes.Compare(v, readVal) != 0 { 45 | t.Fatalf("read: expected %s, got %s", v, readVal) 46 | } 47 | if err := d.Erase(k); err != nil { 48 | t.Fatalf("erase: %s", err) 49 | } 50 | } 51 | 52 | func TestWRECache(t *testing.T) { 53 | d := New(Options{ 54 | BasePath: "test-data", 55 | CacheSizeMax: 1024, 56 | }) 57 | defer d.EraseAll() 58 | k, v := "xxx", []byte{' ', ' ', ' '} 59 | if d.isCached(k) { 60 | t.Fatalf("key cached before Write and Read") 61 | } 62 | if err := d.Write(k, v); err != nil { 63 | t.Fatalf("write: %s", err) 64 | } 65 | if d.isCached(k) { 66 | t.Fatalf("key cached before Read") 67 | } 68 | if readVal, err := d.Read(k); err != nil { 69 | t.Fatalf("read: %s", err) 70 | } else if bytes.Compare(v, readVal) != 0 { 71 | t.Fatalf("read: expected %s, got %s", v, readVal) 72 | } 73 | for i := 0; i < 10 && !d.isCached(k); i++ { 74 | time.Sleep(10 * time.Millisecond) 75 | } 76 | if !d.isCached(k) { 77 | t.Fatalf("key not cached after Read") 78 | } 79 | if err := d.Erase(k); err != nil { 80 | t.Fatalf("erase: %s", err) 81 | } 82 | if d.isCached(k) { 83 | t.Fatalf("key cached after Erase") 84 | } 85 | } 86 | 87 | func TestStrings(t *testing.T) { 88 | d := New(Options{ 89 | BasePath: "test-data", 90 | CacheSizeMax: 1024, 91 | }) 92 | defer d.EraseAll() 93 | 94 | keys := map[string]bool{"a": false, "b": false, "c": false, "d": false} 95 | v := []byte{'1'} 96 | for k := range keys { 97 | if err := d.Write(k, v); err != nil { 98 | t.Fatalf("write: %s: %s", k, err) 99 | } 100 | } 101 | 102 | for k := range d.Keys(nil) { 103 | if _, present := keys[k]; present { 104 | t.Logf("got: %s", k) 105 | keys[k] = true 106 | } else { 107 | t.Fatalf("strings() returns unknown key: %s", k) 108 | } 109 | } 110 | 111 | for k, found := range keys { 112 | if !found { 113 | t.Errorf("never got %s", k) 114 | } 115 | } 116 | } 117 | 118 | func TestZeroByteCache(t *testing.T) { 119 | d := New(Options{ 120 | BasePath: "test-data", 121 | CacheSizeMax: 0, 122 | }) 123 | defer d.EraseAll() 124 | 125 | k, v := "a", []byte{'1', '2', '3'} 126 | if err := d.Write(k, v); err != nil { 127 | t.Fatalf("Write: %s", err) 128 | } 129 | 130 | if d.isCached(k) { 131 | t.Fatalf("key cached, expected not-cached") 132 | } 133 | 134 | if _, err := d.Read(k); err != nil { 135 | t.Fatalf("Read: %s", err) 136 | } 137 | 138 | if d.isCached(k) { 139 | t.Fatalf("key cached, expected not-cached") 140 | } 141 | } 142 | 143 | func TestOneByteCache(t *testing.T) { 144 | d := New(Options{ 145 | BasePath: "test-data", 146 | CacheSizeMax: 1, 147 | }) 148 | defer d.EraseAll() 149 | 150 | k1, k2, v1, v2 := "a", "b", []byte{'1'}, []byte{'1', '2'} 151 | if err := d.Write(k1, v1); err != nil { 152 | t.Fatal(err) 153 | } 154 | 155 | if v, err := d.Read(k1); err != nil { 156 | t.Fatal(err) 157 | } else if !cmpBytes(v, v1) { 158 | t.Fatalf("Read: expected %s, got %s", string(v1), string(v)) 159 | } 160 | 161 | for i := 0; i < 10 && !d.isCached(k1); i++ { 162 | time.Sleep(10 * time.Millisecond) 163 | } 164 | if !d.isCached(k1) { 165 | t.Fatalf("expected 1-byte value to be cached, but it wasn't") 166 | } 167 | 168 | if err := d.Write(k2, v2); err != nil { 169 | t.Fatal(err) 170 | } 171 | if _, err := d.Read(k2); err != nil { 172 | t.Fatalf("--> %s", err) 173 | } 174 | 175 | for i := 0; i < 10 && (!d.isCached(k1) || d.isCached(k2)); i++ { 176 | time.Sleep(10 * time.Millisecond) // just wait for lazy-cache 177 | } 178 | if !d.isCached(k1) { 179 | t.Fatalf("1-byte value was uncached for no reason") 180 | } 181 | 182 | if d.isCached(k2) { 183 | t.Fatalf("2-byte value was cached, but cache max size is 1") 184 | } 185 | } 186 | 187 | func TestStaleCache(t *testing.T) { 188 | d := New(Options{ 189 | BasePath: "test-data", 190 | CacheSizeMax: 1, 191 | }) 192 | defer d.EraseAll() 193 | 194 | k, first, second := "a", "first", "second" 195 | if err := d.Write(k, []byte(first)); err != nil { 196 | t.Fatal(err) 197 | } 198 | 199 | v, err := d.Read(k) 200 | if err != nil { 201 | t.Fatal(err) 202 | } 203 | if string(v) != first { 204 | t.Errorf("expected '%s', got '%s'", first, v) 205 | } 206 | 207 | if err := d.Write(k, []byte(second)); err != nil { 208 | t.Fatal(err) 209 | } 210 | 211 | v, err = d.Read(k) 212 | if err != nil { 213 | t.Fatal(err) 214 | } 215 | 216 | if string(v) != second { 217 | t.Errorf("expected '%s', got '%s'", second, v) 218 | } 219 | } 220 | 221 | func TestHas(t *testing.T) { 222 | d := New(Options{ 223 | BasePath: "test-data", 224 | CacheSizeMax: 1024, 225 | }) 226 | defer d.EraseAll() 227 | 228 | for k, v := range map[string]string{ 229 | "a": "1", 230 | "foo": "2", 231 | "012345": "3", 232 | } { 233 | d.Write(k, []byte(v)) 234 | } 235 | 236 | d.Read("foo") // cache one of them 237 | if !d.isCached("foo") { 238 | t.Errorf("'foo' didn't get cached") 239 | } 240 | 241 | for _, tuple := range []struct { 242 | key string 243 | expected bool 244 | }{ 245 | {"a", true}, 246 | {"b", false}, 247 | {"foo", true}, 248 | {"bar", false}, 249 | {"01234", false}, 250 | {"012345", true}, 251 | {"0123456", false}, 252 | } { 253 | if expected, got := tuple.expected, d.Has(tuple.key); expected != got { 254 | t.Errorf("Has(%s): expected %v, got %v", tuple.key, expected, got) 255 | } 256 | } 257 | } 258 | 259 | type BrokenReader struct{} 260 | 261 | func (BrokenReader) Read(p []byte) (n int, err error) { 262 | return 0, errors.New("failed to read") 263 | } 264 | 265 | func TestRemovesIncompleteFiles(t *testing.T) { 266 | opts := Options{ 267 | BasePath: "test-data", 268 | CacheSizeMax: 1024, 269 | } 270 | d := New(opts) 271 | defer d.EraseAll() 272 | 273 | key, stream, sync := "key", BrokenReader{}, false 274 | 275 | if err := d.WriteStream(key, stream, sync); err == nil { 276 | t.Fatalf("Expected i/o copy error, none received.") 277 | } 278 | 279 | if _, err := d.Read(key); err == nil { 280 | t.Fatal("Could read the key, but it shouldn't exist") 281 | } 282 | } 283 | 284 | func TestTempDir(t *testing.T) { 285 | opts := Options{ 286 | BasePath: "test-data", 287 | TempDir: "test-data-temp", 288 | CacheSizeMax: 1024, 289 | } 290 | d := New(opts) 291 | defer d.EraseAll() 292 | 293 | k, v := "a", []byte{'b'} 294 | if err := d.Write(k, v); err != nil { 295 | t.Fatalf("write: %s", err) 296 | } 297 | if readVal, err := d.Read(k); err != nil { 298 | t.Fatalf("read: %s", err) 299 | } else if bytes.Compare(v, readVal) != 0 { 300 | t.Fatalf("read: expected %s, got %s", v, readVal) 301 | } 302 | if err := d.Erase(k); err != nil { 303 | t.Fatalf("erase: %s", err) 304 | } 305 | } 306 | 307 | type CrashingReader struct{} 308 | 309 | func (CrashingReader) Read(p []byte) (n int, err error) { 310 | panic("System has crashed while reading the stream") 311 | } 312 | 313 | func TestAtomicWrite(t *testing.T) { 314 | opts := Options{ 315 | BasePath: "test-data", 316 | // Test would fail if TempDir is not set here. 317 | TempDir: "test-data-temp", 318 | CacheSizeMax: 1024, 319 | } 320 | d := New(opts) 321 | defer d.EraseAll() 322 | 323 | key := "key" 324 | func() { 325 | defer func() { 326 | recover() // Ignore panicking error 327 | }() 328 | 329 | stream := CrashingReader{} 330 | d.WriteStream(key, stream, false) 331 | }() 332 | 333 | if d.Has(key) { 334 | t.Fatal("Has key, but it shouldn't exist") 335 | } 336 | if _, ok := <-d.Keys(nil); ok { 337 | t.Fatal("Store isn't empty") 338 | } 339 | } 340 | 341 | const letterBytes = "abcdef0123456789" 342 | 343 | func randStringBytes(n int) string { 344 | b := make([]byte, n) 345 | for i := range b { 346 | b[i] = letterBytes[rand.Intn(len(letterBytes))] 347 | } 348 | return string(b) 349 | } 350 | 351 | func TestHybridStore(t *testing.T) { 352 | regex := regexp.MustCompile("[0-9a-fA-F]{64}") 353 | 354 | transformFunc := func(s string) *PathKey { 355 | 356 | if regex.MatchString(s) { 357 | return &PathKey{Path: []string{"objects", s[0:2]}, 358 | FileName: s, 359 | } 360 | } 361 | 362 | folders := strings.Split(s, "/") 363 | lfolders := len(folders) 364 | if lfolders > 1 { 365 | return &PathKey{Path: folders[:lfolders-1], 366 | FileName: folders[lfolders-1], 367 | } 368 | } 369 | 370 | return &PathKey{Path: []string{}, 371 | FileName: s, 372 | } 373 | } 374 | 375 | inverseTransformFunc := func(pathKey *PathKey) string { 376 | 377 | if regex.MatchString(pathKey.FileName) { 378 | return pathKey.FileName 379 | 380 | } 381 | 382 | if len(pathKey.Path) == 0 { 383 | return pathKey.FileName 384 | } 385 | 386 | return strings.Join(pathKey.Path, "/") + "/" + pathKey.FileName 387 | 388 | } 389 | opts := Options{ 390 | BasePath: "test-data", 391 | CacheSizeMax: 1024, 392 | AdvancedTransform: transformFunc, 393 | InverseTransform: inverseTransformFunc, 394 | } 395 | d := New(opts) 396 | defer d.EraseAll() 397 | 398 | testData := map[string]string{} 399 | 400 | for i := 0; i < 100; i++ { 401 | testData[randStringBytes(64)] = randStringBytes(100) 402 | } 403 | 404 | for i := 0; i < 100; i++ { 405 | testData[randStringBytes(20)] = randStringBytes(100) 406 | } 407 | 408 | for i := 0; i < 100; i++ { 409 | numsep := rand.Intn(10) + 1 410 | key := "" 411 | for j := 0; j < numsep; j++ { 412 | key += randStringBytes(10) + "/" 413 | } 414 | key += randStringBytes(40) 415 | testData[key] = randStringBytes(100) 416 | } 417 | 418 | for k, v := range testData { 419 | d.WriteString(k, v) 420 | } 421 | 422 | for k, v := range testData { 423 | readVal := d.ReadString(k) 424 | 425 | if v != readVal { 426 | t.Fatalf("read: expected %s, got %s", v, readVal) 427 | } 428 | } 429 | 430 | } 431 | -------------------------------------------------------------------------------- /compression.go: -------------------------------------------------------------------------------- 1 | package diskv 2 | 3 | import ( 4 | "compress/flate" 5 | "compress/gzip" 6 | "compress/zlib" 7 | "io" 8 | ) 9 | 10 | // Compression is an interface that Diskv uses to implement compression of 11 | // data. Writer takes a destination io.Writer and returns a WriteCloser that 12 | // compresses all data written through it. Reader takes a source io.Reader and 13 | // returns a ReadCloser that decompresses all data read through it. You may 14 | // define these methods on your own type, or use one of the NewCompression 15 | // helpers. 16 | type Compression interface { 17 | Writer(dst io.Writer) (io.WriteCloser, error) 18 | Reader(src io.Reader) (io.ReadCloser, error) 19 | } 20 | 21 | // NewGzipCompression returns a Gzip-based Compression. 22 | func NewGzipCompression() Compression { 23 | return NewGzipCompressionLevel(flate.DefaultCompression) 24 | } 25 | 26 | // NewGzipCompressionLevel returns a Gzip-based Compression with the given level. 27 | func NewGzipCompressionLevel(level int) Compression { 28 | return &genericCompression{ 29 | wf: func(w io.Writer) (io.WriteCloser, error) { return gzip.NewWriterLevel(w, level) }, 30 | rf: func(r io.Reader) (io.ReadCloser, error) { return gzip.NewReader(r) }, 31 | } 32 | } 33 | 34 | // NewZlibCompression returns a Zlib-based Compression. 35 | func NewZlibCompression() Compression { 36 | return NewZlibCompressionLevel(flate.DefaultCompression) 37 | } 38 | 39 | // NewZlibCompressionLevel returns a Zlib-based Compression with the given level. 40 | func NewZlibCompressionLevel(level int) Compression { 41 | return NewZlibCompressionLevelDict(level, nil) 42 | } 43 | 44 | // NewZlibCompressionLevelDict returns a Zlib-based Compression with the given 45 | // level, based on the given dictionary. 46 | func NewZlibCompressionLevelDict(level int, dict []byte) Compression { 47 | return &genericCompression{ 48 | func(w io.Writer) (io.WriteCloser, error) { return zlib.NewWriterLevelDict(w, level, dict) }, 49 | func(r io.Reader) (io.ReadCloser, error) { return zlib.NewReaderDict(r, dict) }, 50 | } 51 | } 52 | 53 | type genericCompression struct { 54 | wf func(w io.Writer) (io.WriteCloser, error) 55 | rf func(r io.Reader) (io.ReadCloser, error) 56 | } 57 | 58 | func (g *genericCompression) Writer(dst io.Writer) (io.WriteCloser, error) { 59 | return g.wf(dst) 60 | } 61 | 62 | func (g *genericCompression) Reader(src io.Reader) (io.ReadCloser, error) { 63 | return g.rf(src) 64 | } 65 | -------------------------------------------------------------------------------- /compression_test.go: -------------------------------------------------------------------------------- 1 | package diskv 2 | 3 | import ( 4 | "compress/flate" 5 | "fmt" 6 | "math/rand" 7 | "os" 8 | "testing" 9 | "time" 10 | ) 11 | 12 | func init() { 13 | rand.Seed(time.Now().UnixNano()) 14 | } 15 | 16 | func testCompressionWith(t *testing.T, c Compression, name string) { 17 | d := New(Options{ 18 | BasePath: "compression-test", 19 | CacheSizeMax: 0, 20 | Compression: c, 21 | }) 22 | defer d.EraseAll() 23 | 24 | sz := 4096 25 | val := make([]byte, sz) 26 | for i := 0; i < sz; i++ { 27 | val[i] = byte('a' + rand.Intn(26)) // {a-z}; should compress some 28 | } 29 | 30 | key := "a" 31 | if err := d.Write(key, val); err != nil { 32 | t.Fatalf("write failed: %s", err) 33 | } 34 | 35 | targetFile := fmt.Sprintf("%s%c%s", d.BasePath, os.PathSeparator, key) 36 | fi, err := os.Stat(targetFile) 37 | if err != nil { 38 | t.Fatalf("%s: %s", targetFile, err) 39 | } 40 | 41 | if fi.Size() >= int64(sz) { 42 | t.Fatalf("%s: size=%d, expected smaller", targetFile, fi.Size()) 43 | } 44 | t.Logf("%s compressed %d to %d", name, sz, fi.Size()) 45 | 46 | readVal, err := d.Read(key) 47 | if len(readVal) != sz { 48 | t.Fatalf("read: expected size=%d, got size=%d", sz, len(readVal)) 49 | } 50 | 51 | for i := 0; i < sz; i++ { 52 | if readVal[i] != val[i] { 53 | t.Fatalf("i=%d: expected %v, got %v", i, val[i], readVal[i]) 54 | } 55 | } 56 | } 57 | 58 | func TestGzipDefault(t *testing.T) { 59 | testCompressionWith(t, NewGzipCompression(), "gzip") 60 | } 61 | 62 | func TestGzipBestCompression(t *testing.T) { 63 | testCompressionWith(t, NewGzipCompressionLevel(flate.BestCompression), "gzip-max") 64 | } 65 | 66 | func TestGzipBestSpeed(t *testing.T) { 67 | testCompressionWith(t, NewGzipCompressionLevel(flate.BestSpeed), "gzip-min") 68 | } 69 | 70 | func TestZlib(t *testing.T) { 71 | testCompressionWith(t, NewZlibCompression(), "zlib") 72 | } 73 | -------------------------------------------------------------------------------- /diskv.go: -------------------------------------------------------------------------------- 1 | // Diskv (disk-vee) is a simple, persistent, key-value store. 2 | // It stores all data flatly on the filesystem. 3 | 4 | package diskv 5 | 6 | import ( 7 | "bytes" 8 | "errors" 9 | "fmt" 10 | "io" 11 | "io/ioutil" 12 | "os" 13 | "path/filepath" 14 | "strings" 15 | "sync" 16 | "syscall" 17 | ) 18 | 19 | const ( 20 | defaultBasePath = "diskv" 21 | defaultFilePerm os.FileMode = 0666 22 | defaultPathPerm os.FileMode = 0777 23 | ) 24 | 25 | // PathKey represents a string key that has been transformed to 26 | // a directory and file name where the content will eventually 27 | // be stored 28 | type PathKey struct { 29 | Path []string 30 | FileName string 31 | originalKey string 32 | } 33 | 34 | var ( 35 | defaultAdvancedTransform = func(s string) *PathKey { return &PathKey{Path: []string{}, FileName: s} } 36 | defaultInverseTransform = func(pathKey *PathKey) string { return pathKey.FileName } 37 | errCanceled = errors.New("canceled") 38 | errEmptyKey = errors.New("empty key") 39 | errBadKey = errors.New("bad key") 40 | errImportDirectory = errors.New("can't import a directory") 41 | ) 42 | 43 | // TransformFunction transforms a key into a slice of strings, with each 44 | // element in the slice representing a directory in the file path where the 45 | // key's entry will eventually be stored. 46 | // 47 | // For example, if TransformFunc transforms "abcdef" to ["ab", "cde", "f"], 48 | // the final location of the data file will be /ab/cde/f/abcdef 49 | type TransformFunction func(s string) []string 50 | 51 | // AdvancedTransformFunction transforms a key into a PathKey. 52 | // 53 | // A PathKey contains a slice of strings, where each element in the slice 54 | // represents a directory in the file path where the key's entry will eventually 55 | // be stored, as well as the filename. 56 | // 57 | // For example, if AdvancedTransformFunc transforms "abcdef/file.txt" to the 58 | // PathKey {Path: ["ab", "cde", "f"], FileName: "file.txt"}, the final location 59 | // of the data file will be /ab/cde/f/file.txt. 60 | // 61 | // You must provide an InverseTransformFunction if you use an 62 | // AdvancedTransformFunction. 63 | type AdvancedTransformFunction func(s string) *PathKey 64 | 65 | // InverseTransformFunction takes a PathKey and converts it back to a Diskv key. 66 | // In effect, it's the opposite of an AdvancedTransformFunction. 67 | type InverseTransformFunction func(pathKey *PathKey) string 68 | 69 | // Options define a set of properties that dictate Diskv behavior. 70 | // All values are optional. 71 | type Options struct { 72 | BasePath string 73 | Transform TransformFunction 74 | AdvancedTransform AdvancedTransformFunction 75 | InverseTransform InverseTransformFunction 76 | CacheSizeMax uint64 // bytes 77 | PathPerm os.FileMode 78 | FilePerm os.FileMode 79 | // If TempDir is set, it will enable filesystem atomic writes by 80 | // writing temporary files to that location before being moved 81 | // to BasePath. 82 | // Note that TempDir MUST be on the same device/partition as 83 | // BasePath. 84 | TempDir string 85 | 86 | Index Index 87 | IndexLess LessFunction 88 | 89 | Compression Compression 90 | } 91 | 92 | // Diskv implements the Diskv interface. You shouldn't construct Diskv 93 | // structures directly; instead, use the New constructor. 94 | type Diskv struct { 95 | Options 96 | mu sync.RWMutex 97 | cache map[string][]byte 98 | cacheSize uint64 99 | } 100 | 101 | // New returns an initialized Diskv structure, ready to use. 102 | // If the path identified by baseDir already contains data, 103 | // it will be accessible, but not yet cached. 104 | func New(o Options) *Diskv { 105 | if o.BasePath == "" { 106 | o.BasePath = defaultBasePath 107 | } 108 | 109 | if o.AdvancedTransform == nil { 110 | if o.Transform == nil { 111 | o.AdvancedTransform = defaultAdvancedTransform 112 | } else { 113 | o.AdvancedTransform = convertToAdvancedTransform(o.Transform) 114 | } 115 | if o.InverseTransform == nil { 116 | o.InverseTransform = defaultInverseTransform 117 | } 118 | } else { 119 | if o.InverseTransform == nil { 120 | panic("You must provide an InverseTransform function in advanced mode") 121 | } 122 | } 123 | 124 | if o.PathPerm == 0 { 125 | o.PathPerm = defaultPathPerm 126 | } 127 | if o.FilePerm == 0 { 128 | o.FilePerm = defaultFilePerm 129 | } 130 | 131 | d := &Diskv{ 132 | Options: o, 133 | cache: map[string][]byte{}, 134 | cacheSize: 0, 135 | } 136 | 137 | if d.Index != nil && d.IndexLess != nil { 138 | d.Index.Initialize(d.IndexLess, d.Keys(nil)) 139 | } 140 | 141 | return d 142 | } 143 | 144 | // convertToAdvancedTransform takes a classic Transform function and 145 | // converts it to the new AdvancedTransform 146 | func convertToAdvancedTransform(oldFunc func(s string) []string) AdvancedTransformFunction { 147 | return func(s string) *PathKey { 148 | return &PathKey{Path: oldFunc(s), FileName: s} 149 | } 150 | } 151 | 152 | // Write synchronously writes the key-value pair to disk, making it immediately 153 | // available for reads. Write relies on the filesystem to perform an eventual 154 | // sync to physical media. If you need stronger guarantees, see WriteStream. 155 | func (d *Diskv) Write(key string, val []byte) error { 156 | return d.WriteStream(key, bytes.NewReader(val), false) 157 | } 158 | 159 | // WriteString writes a string key-value pair to disk 160 | func (d *Diskv) WriteString(key string, val string) error { 161 | return d.Write(key, []byte(val)) 162 | } 163 | 164 | func (d *Diskv) transform(key string) (pathKey *PathKey) { 165 | pathKey = d.AdvancedTransform(key) 166 | pathKey.originalKey = key 167 | return pathKey 168 | } 169 | 170 | // WriteStream writes the data represented by the io.Reader to the disk, under 171 | // the provided key. If sync is true, WriteStream performs an explicit sync on 172 | // the file as soon as it's written. 173 | // 174 | // bytes.Buffer provides io.Reader semantics for basic data types. 175 | func (d *Diskv) WriteStream(key string, r io.Reader, sync bool) error { 176 | if len(key) <= 0 { 177 | return errEmptyKey 178 | } 179 | 180 | pathKey := d.transform(key) 181 | 182 | // Ensure keys cannot evaluate to paths that would not exist 183 | for _, pathPart := range pathKey.Path { 184 | if strings.ContainsRune(pathPart, os.PathSeparator) { 185 | return errBadKey 186 | } 187 | } 188 | 189 | if strings.ContainsRune(pathKey.FileName, os.PathSeparator) { 190 | return errBadKey 191 | } 192 | 193 | d.mu.Lock() 194 | defer d.mu.Unlock() 195 | 196 | return d.writeStreamWithLock(pathKey, r, sync) 197 | } 198 | 199 | // createKeyFileWithLock either creates the key file directly, or 200 | // creates a temporary file in TempDir if it is set. 201 | func (d *Diskv) createKeyFileWithLock(pathKey *PathKey) (*os.File, error) { 202 | if d.TempDir != "" { 203 | if err := os.MkdirAll(d.TempDir, d.PathPerm); err != nil { 204 | return nil, fmt.Errorf("temp mkdir: %s", err) 205 | } 206 | f, err := ioutil.TempFile(d.TempDir, "") 207 | if err != nil { 208 | return nil, fmt.Errorf("temp file: %s", err) 209 | } 210 | 211 | if err := os.Chmod(f.Name(), d.FilePerm); err != nil { 212 | f.Close() // error deliberately ignored 213 | os.Remove(f.Name()) // error deliberately ignored 214 | return nil, fmt.Errorf("chmod: %s", err) 215 | } 216 | return f, nil 217 | } 218 | 219 | mode := os.O_WRONLY | os.O_CREATE | os.O_TRUNC // overwrite if exists 220 | f, err := os.OpenFile(d.completeFilename(pathKey), mode, d.FilePerm) 221 | if err != nil { 222 | return nil, fmt.Errorf("open file: %s", err) 223 | } 224 | return f, nil 225 | } 226 | 227 | // writeStream does no input validation checking. 228 | func (d *Diskv) writeStreamWithLock(pathKey *PathKey, r io.Reader, sync bool) error { 229 | if err := d.ensurePathWithLock(pathKey); err != nil { 230 | return fmt.Errorf("ensure path: %s", err) 231 | } 232 | 233 | f, err := d.createKeyFileWithLock(pathKey) 234 | if err != nil { 235 | return fmt.Errorf("create key file: %s", err) 236 | } 237 | 238 | wc := io.WriteCloser(&nopWriteCloser{f}) 239 | if d.Compression != nil { 240 | wc, err = d.Compression.Writer(f) 241 | if err != nil { 242 | f.Close() // error deliberately ignored 243 | os.Remove(f.Name()) // error deliberately ignored 244 | return fmt.Errorf("compression writer: %s", err) 245 | } 246 | } 247 | 248 | if _, err := io.Copy(wc, r); err != nil { 249 | f.Close() // error deliberately ignored 250 | os.Remove(f.Name()) // error deliberately ignored 251 | return fmt.Errorf("i/o copy: %s", err) 252 | } 253 | 254 | if err := wc.Close(); err != nil { 255 | f.Close() // error deliberately ignored 256 | os.Remove(f.Name()) // error deliberately ignored 257 | return fmt.Errorf("compression close: %s", err) 258 | } 259 | 260 | if sync { 261 | if err := f.Sync(); err != nil { 262 | f.Close() // error deliberately ignored 263 | os.Remove(f.Name()) // error deliberately ignored 264 | return fmt.Errorf("file sync: %s", err) 265 | } 266 | } 267 | 268 | if err := f.Close(); err != nil { 269 | return fmt.Errorf("file close: %s", err) 270 | } 271 | 272 | fullPath := d.completeFilename(pathKey) 273 | if f.Name() != fullPath { 274 | if err := os.Rename(f.Name(), fullPath); err != nil { 275 | os.Remove(f.Name()) // error deliberately ignored 276 | return fmt.Errorf("rename: %s", err) 277 | } 278 | } 279 | 280 | if d.Index != nil { 281 | d.Index.Insert(pathKey.originalKey) 282 | } 283 | 284 | d.bustCacheWithLock(pathKey.originalKey) // cache only on read 285 | 286 | return nil 287 | } 288 | 289 | // Import imports the source file into diskv under the destination key. If the 290 | // destination key already exists, it's overwritten. If move is true, the 291 | // source file is removed after a successful import. 292 | func (d *Diskv) Import(srcFilename, dstKey string, move bool) (err error) { 293 | if dstKey == "" { 294 | return errEmptyKey 295 | } 296 | 297 | if fi, err := os.Stat(srcFilename); err != nil { 298 | return err 299 | } else if fi.IsDir() { 300 | return errImportDirectory 301 | } 302 | 303 | dstPathKey := d.transform(dstKey) 304 | 305 | d.mu.Lock() 306 | defer d.mu.Unlock() 307 | 308 | if err := d.ensurePathWithLock(dstPathKey); err != nil { 309 | return fmt.Errorf("ensure path: %s", err) 310 | } 311 | 312 | if move { 313 | if err := syscall.Rename(srcFilename, d.completeFilename(dstPathKey)); err == nil { 314 | d.bustCacheWithLock(dstPathKey.originalKey) 315 | return nil 316 | } else if err != syscall.EXDEV { 317 | // If it failed due to being on a different device, fall back to copying 318 | return err 319 | } 320 | } 321 | 322 | f, err := os.Open(srcFilename) 323 | if err != nil { 324 | return err 325 | } 326 | defer f.Close() 327 | err = d.writeStreamWithLock(dstPathKey, f, false) 328 | if err == nil && move { 329 | err = os.Remove(srcFilename) 330 | } 331 | return err 332 | } 333 | 334 | // Read reads the key and returns the value. 335 | // If the key is available in the cache, Read won't touch the disk. 336 | // If the key is not in the cache, Read will have the side-effect of 337 | // lazily caching the value. 338 | func (d *Diskv) Read(key string) ([]byte, error) { 339 | rc, err := d.ReadStream(key, false) 340 | if err != nil { 341 | return []byte{}, err 342 | } 343 | defer rc.Close() 344 | return ioutil.ReadAll(rc) 345 | } 346 | 347 | // ReadString reads the key and returns a string value 348 | // In case of error, an empty string is returned 349 | func (d *Diskv) ReadString(key string) string { 350 | value, _ := d.Read(key) 351 | return string(value) 352 | } 353 | 354 | // ReadStream reads the key and returns the value (data) as an io.ReadCloser. 355 | // If the value is cached from a previous read, and direct is false, 356 | // ReadStream will use the cached value. Otherwise, it will return a handle to 357 | // the file on disk, and cache the data on read. 358 | // 359 | // If direct is true, ReadStream will lazily delete any cached value for the 360 | // key, and return a direct handle to the file on disk. 361 | // 362 | // If compression is enabled, ReadStream taps into the io.Reader stream prior 363 | // to decompression, and caches the compressed data. 364 | func (d *Diskv) ReadStream(key string, direct bool) (io.ReadCloser, error) { 365 | 366 | pathKey := d.transform(key) 367 | d.mu.RLock() 368 | defer d.mu.RUnlock() 369 | 370 | if val, ok := d.cache[key]; ok { 371 | if !direct { 372 | buf := bytes.NewReader(val) 373 | if d.Compression != nil { 374 | return d.Compression.Reader(buf) 375 | } 376 | return ioutil.NopCloser(buf), nil 377 | } 378 | 379 | go func() { 380 | d.mu.Lock() 381 | defer d.mu.Unlock() 382 | d.uncacheWithLock(key, uint64(len(val))) 383 | }() 384 | } 385 | 386 | return d.readWithRLock(pathKey) 387 | } 388 | 389 | // read ignores the cache, and returns an io.ReadCloser representing the 390 | // decompressed data for the given key, streamed from the disk. Clients should 391 | // acquire a read lock on the Diskv and check the cache themselves before 392 | // calling read. 393 | func (d *Diskv) readWithRLock(pathKey *PathKey) (io.ReadCloser, error) { 394 | filename := d.completeFilename(pathKey) 395 | 396 | fi, err := os.Stat(filename) 397 | if err != nil { 398 | return nil, err 399 | } 400 | if fi.IsDir() { 401 | return nil, os.ErrNotExist 402 | } 403 | 404 | f, err := os.Open(filename) 405 | if err != nil { 406 | return nil, err 407 | } 408 | 409 | var r io.Reader 410 | if d.CacheSizeMax > 0 { 411 | r = newSiphon(f, d, pathKey.originalKey) 412 | } else { 413 | r = &closingReader{f} 414 | } 415 | 416 | var rc = io.ReadCloser(ioutil.NopCloser(r)) 417 | if d.Compression != nil { 418 | rc, err = d.Compression.Reader(r) 419 | if err != nil { 420 | return nil, err 421 | } 422 | } 423 | 424 | return rc, nil 425 | } 426 | 427 | // closingReader provides a Reader that automatically closes the 428 | // embedded ReadCloser when it reaches EOF 429 | type closingReader struct { 430 | rc io.ReadCloser 431 | } 432 | 433 | func (cr closingReader) Read(p []byte) (int, error) { 434 | n, err := cr.rc.Read(p) 435 | if err == io.EOF { 436 | if closeErr := cr.rc.Close(); closeErr != nil { 437 | return n, closeErr // close must succeed for Read to succeed 438 | } 439 | } 440 | return n, err 441 | } 442 | 443 | // siphon is like a TeeReader: it copies all data read through it to an 444 | // internal buffer, and moves that buffer to the cache at EOF. 445 | type siphon struct { 446 | f *os.File 447 | d *Diskv 448 | key string 449 | buf *bytes.Buffer 450 | } 451 | 452 | // newSiphon constructs a siphoning reader that represents the passed file. 453 | // When a successful series of reads ends in an EOF, the siphon will write 454 | // the buffered data to Diskv's cache under the given key. 455 | func newSiphon(f *os.File, d *Diskv, key string) io.Reader { 456 | return &siphon{ 457 | f: f, 458 | d: d, 459 | key: key, 460 | buf: &bytes.Buffer{}, 461 | } 462 | } 463 | 464 | // Read implements the io.Reader interface for siphon. 465 | func (s *siphon) Read(p []byte) (int, error) { 466 | n, err := s.f.Read(p) 467 | 468 | if err == nil { 469 | return s.buf.Write(p[0:n]) // Write must succeed for Read to succeed 470 | } 471 | 472 | if err == io.EOF { 473 | s.d.cacheWithoutLock(s.key, s.buf.Bytes()) // cache may fail 474 | if closeErr := s.f.Close(); closeErr != nil { 475 | return n, closeErr // close must succeed for Read to succeed 476 | } 477 | return n, err 478 | } 479 | 480 | return n, err 481 | } 482 | 483 | // Erase synchronously erases the given key from the disk and the cache. 484 | func (d *Diskv) Erase(key string) error { 485 | pathKey := d.transform(key) 486 | d.mu.Lock() 487 | defer d.mu.Unlock() 488 | 489 | d.bustCacheWithLock(key) 490 | 491 | // erase from index 492 | if d.Index != nil { 493 | d.Index.Delete(key) 494 | } 495 | 496 | // erase from disk 497 | filename := d.completeFilename(pathKey) 498 | if s, err := os.Stat(filename); err == nil { 499 | if s.IsDir() { 500 | return errBadKey 501 | } 502 | if err = os.Remove(filename); err != nil { 503 | return err 504 | } 505 | } else { 506 | // Return err as-is so caller can do os.IsNotExist(err). 507 | return err 508 | } 509 | 510 | // clean up and return 511 | d.pruneDirsWithLock(key) 512 | return nil 513 | } 514 | 515 | // EraseAll will delete all of the data from the store, both in the cache and on 516 | // the disk. Note that EraseAll doesn't distinguish diskv-related data from non- 517 | // diskv-related data. Care should be taken to always specify a diskv base 518 | // directory that is exclusively for diskv data. 519 | func (d *Diskv) EraseAll() error { 520 | d.mu.Lock() 521 | defer d.mu.Unlock() 522 | d.cache = make(map[string][]byte) 523 | d.cacheSize = 0 524 | if d.TempDir != "" { 525 | os.RemoveAll(d.TempDir) // errors ignored 526 | } 527 | return os.RemoveAll(d.BasePath) 528 | } 529 | 530 | // Has returns true if the given key exists. 531 | func (d *Diskv) Has(key string) bool { 532 | pathKey := d.transform(key) 533 | d.mu.Lock() 534 | defer d.mu.Unlock() 535 | 536 | if _, ok := d.cache[key]; ok { 537 | return true 538 | } 539 | 540 | filename := d.completeFilename(pathKey) 541 | s, err := os.Stat(filename) 542 | if err != nil { 543 | return false 544 | } 545 | if s.IsDir() { 546 | return false 547 | } 548 | 549 | return true 550 | } 551 | 552 | // Keys returns a channel that will yield every key accessible by the store, 553 | // in undefined order. If a cancel channel is provided, closing it will 554 | // terminate and close the keys channel. 555 | func (d *Diskv) Keys(cancel <-chan struct{}) <-chan string { 556 | return d.KeysPrefix("", cancel) 557 | } 558 | 559 | // KeysPrefix returns a channel that will yield every key accessible by the 560 | // store with the given prefix, in undefined order. If a cancel channel is 561 | // provided, closing it will terminate and close the keys channel. If the 562 | // provided prefix is the empty string, all keys will be yielded. 563 | func (d *Diskv) KeysPrefix(prefix string, cancel <-chan struct{}) <-chan string { 564 | var prepath string 565 | if prefix == "" { 566 | prepath = d.BasePath 567 | } else { 568 | prefixKey := d.transform(prefix) 569 | prepath = d.pathFor(prefixKey) 570 | } 571 | c := make(chan string) 572 | go func() { 573 | filepath.Walk(prepath, d.walker(c, prefix, cancel)) 574 | close(c) 575 | }() 576 | return c 577 | } 578 | 579 | // walker returns a function which satisfies the filepath.WalkFunc interface. 580 | // It sends every non-directory file entry down the channel c. 581 | func (d *Diskv) walker(c chan<- string, prefix string, cancel <-chan struct{}) filepath.WalkFunc { 582 | return func(path string, info os.FileInfo, err error) error { 583 | if err != nil { 584 | return err 585 | } 586 | 587 | relPath, _ := filepath.Rel(d.BasePath, path) 588 | dir, file := filepath.Split(relPath) 589 | pathSplit := strings.Split(dir, string(filepath.Separator)) 590 | pathSplit = pathSplit[:len(pathSplit)-1] 591 | 592 | pathKey := &PathKey{ 593 | Path: pathSplit, 594 | FileName: file, 595 | } 596 | 597 | key := d.InverseTransform(pathKey) 598 | 599 | if info.IsDir() || !strings.HasPrefix(key, prefix) { 600 | return nil // "pass" 601 | } 602 | 603 | select { 604 | case c <- key: 605 | case <-cancel: 606 | return errCanceled 607 | } 608 | 609 | return nil 610 | } 611 | } 612 | 613 | // pathFor returns the absolute path for location on the filesystem where the 614 | // data for the given key will be stored. 615 | func (d *Diskv) pathFor(pathKey *PathKey) string { 616 | return filepath.Join(d.BasePath, filepath.Join(pathKey.Path...)) 617 | } 618 | 619 | // ensurePathWithLock is a helper function that generates all necessary 620 | // directories on the filesystem for the given key. 621 | func (d *Diskv) ensurePathWithLock(pathKey *PathKey) error { 622 | return os.MkdirAll(d.pathFor(pathKey), d.PathPerm) 623 | } 624 | 625 | // completeFilename returns the absolute path to the file for the given key. 626 | func (d *Diskv) completeFilename(pathKey *PathKey) string { 627 | return filepath.Join(d.pathFor(pathKey), pathKey.FileName) 628 | } 629 | 630 | // cacheWithLock attempts to cache the given key-value pair in the store's 631 | // cache. It can fail if the value is larger than the cache's maximum size. 632 | func (d *Diskv) cacheWithLock(key string, val []byte) error { 633 | // If the key already exists, delete it. 634 | d.bustCacheWithLock(key) 635 | 636 | valueSize := uint64(len(val)) 637 | if err := d.ensureCacheSpaceWithLock(valueSize); err != nil { 638 | return fmt.Errorf("%s; not caching", err) 639 | } 640 | 641 | // be very strict about memory guarantees 642 | if (d.cacheSize + valueSize) > d.CacheSizeMax { 643 | panic(fmt.Sprintf("failed to make room for value (%d/%d)", valueSize, d.CacheSizeMax)) 644 | } 645 | 646 | d.cache[key] = val 647 | d.cacheSize += valueSize 648 | return nil 649 | } 650 | 651 | // cacheWithoutLock acquires the store's (write) mutex and calls cacheWithLock. 652 | func (d *Diskv) cacheWithoutLock(key string, val []byte) error { 653 | d.mu.Lock() 654 | defer d.mu.Unlock() 655 | return d.cacheWithLock(key, val) 656 | } 657 | 658 | func (d *Diskv) bustCacheWithLock(key string) { 659 | if val, ok := d.cache[key]; ok { 660 | d.uncacheWithLock(key, uint64(len(val))) 661 | } 662 | } 663 | 664 | func (d *Diskv) uncacheWithLock(key string, sz uint64) { 665 | d.cacheSize -= sz 666 | delete(d.cache, key) 667 | } 668 | 669 | // pruneDirsWithLock deletes empty directories in the path walk leading to the 670 | // key k. Typically this function is called after an Erase is made. 671 | func (d *Diskv) pruneDirsWithLock(key string) error { 672 | pathlist := d.transform(key).Path 673 | for i := range pathlist { 674 | dir := filepath.Join(d.BasePath, filepath.Join(pathlist[:len(pathlist)-i]...)) 675 | 676 | // thanks to Steven Blenkinsop for this snippet 677 | switch fi, err := os.Stat(dir); true { 678 | case err != nil: 679 | return err 680 | case !fi.IsDir(): 681 | panic(fmt.Sprintf("corrupt dirstate at %s", dir)) 682 | } 683 | 684 | nlinks, err := filepath.Glob(filepath.Join(dir, "*")) 685 | if err != nil { 686 | return err 687 | } else if len(nlinks) > 0 { 688 | return nil // has subdirs -- do not prune 689 | } 690 | if err = os.Remove(dir); err != nil { 691 | return err 692 | } 693 | } 694 | 695 | return nil 696 | } 697 | 698 | // ensureCacheSpaceWithLock deletes entries from the cache in arbitrary order 699 | // until the cache has at least valueSize bytes available. 700 | func (d *Diskv) ensureCacheSpaceWithLock(valueSize uint64) error { 701 | if valueSize > d.CacheSizeMax { 702 | return fmt.Errorf("value size (%d bytes) too large for cache (%d bytes)", valueSize, d.CacheSizeMax) 703 | } 704 | 705 | safe := func() bool { return (d.cacheSize + valueSize) <= d.CacheSizeMax } 706 | 707 | for key, val := range d.cache { 708 | if safe() { 709 | break 710 | } 711 | 712 | d.uncacheWithLock(key, uint64(len(val))) 713 | } 714 | 715 | if !safe() { 716 | panic(fmt.Sprintf("%d bytes still won't fit in the cache! (max %d bytes)", valueSize, d.CacheSizeMax)) 717 | } 718 | 719 | return nil 720 | } 721 | 722 | // nopWriteCloser wraps an io.Writer and provides a no-op Close method to 723 | // satisfy the io.WriteCloser interface. 724 | type nopWriteCloser struct { 725 | io.Writer 726 | } 727 | 728 | func (wc *nopWriteCloser) Write(p []byte) (int, error) { return wc.Writer.Write(p) } 729 | func (wc *nopWriteCloser) Close() error { return nil } 730 | -------------------------------------------------------------------------------- /examples/advanced-transform/advanced-transform.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | "github.com/peterbourgon/diskv/v3" 8 | ) 9 | 10 | func AdvancedTransformExample(key string) *diskv.PathKey { 11 | path := strings.Split(key, "/") 12 | last := len(path) - 1 13 | return &diskv.PathKey{ 14 | Path: path[:last], 15 | FileName: path[last] + ".txt", 16 | } 17 | } 18 | 19 | // If you provide an AdvancedTransform, you must also provide its 20 | // inverse: 21 | 22 | func InverseTransformExample(pathKey *diskv.PathKey) (key string) { 23 | txt := pathKey.FileName[len(pathKey.FileName)-4:] 24 | if txt != ".txt" { 25 | panic("Invalid file found in storage folder!") 26 | } 27 | return strings.Join(pathKey.Path, "/") + pathKey.FileName[:len(pathKey.FileName)-4] 28 | } 29 | 30 | func main() { 31 | d := diskv.New(diskv.Options{ 32 | BasePath: "my-data-dir", 33 | AdvancedTransform: AdvancedTransformExample, 34 | InverseTransform: InverseTransformExample, 35 | CacheSizeMax: 1024 * 1024, 36 | }) 37 | // Write some text to the key "alpha/beta/gamma". 38 | key := "alpha/beta/gamma" 39 | d.WriteString(key, "¡Hola!") // will be stored in "/alpha/beta/gamma.txt" 40 | fmt.Println(d.ReadString("alpha/beta/gamma")) 41 | } 42 | -------------------------------------------------------------------------------- /examples/content-addressable-store/cas.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "crypto/md5" 5 | "fmt" 6 | "io" 7 | 8 | "github.com/peterbourgon/diskv/v3" 9 | ) 10 | 11 | const transformBlockSize = 2 // grouping of chars per directory depth 12 | 13 | func blockTransform(s string) []string { 14 | var ( 15 | sliceSize = len(s) / transformBlockSize 16 | pathSlice = make([]string, sliceSize) 17 | ) 18 | for i := 0; i < sliceSize; i++ { 19 | from, to := i*transformBlockSize, (i*transformBlockSize)+transformBlockSize 20 | pathSlice[i] = s[from:to] 21 | } 22 | return pathSlice 23 | } 24 | 25 | func main() { 26 | d := diskv.New(diskv.Options{ 27 | BasePath: "data", 28 | Transform: blockTransform, 29 | CacheSizeMax: 1024 * 1024, // 1MB 30 | }) 31 | 32 | for _, valueStr := range []string{ 33 | "I am the very model of a modern Major-General", 34 | "I've information vegetable, animal, and mineral", 35 | "I know the kings of England, and I quote the fights historical", 36 | "From Marathon to Waterloo, in order categorical", 37 | "I'm very well acquainted, too, with matters mathematical", 38 | "I understand equations, both the simple and quadratical", 39 | "About binomial theorem I'm teeming with a lot o' news", 40 | "With many cheerful facts about the square of the hypotenuse", 41 | } { 42 | d.Write(md5sum(valueStr), []byte(valueStr)) 43 | } 44 | 45 | var keyCount int 46 | for key := range d.Keys(nil) { 47 | val, err := d.Read(key) 48 | if err != nil { 49 | panic(fmt.Sprintf("key %s had no value", key)) 50 | } 51 | fmt.Printf("%s: %s\n", key, val) 52 | keyCount++ 53 | } 54 | fmt.Printf("%d total keys\n", keyCount) 55 | 56 | // d.EraseAll() // leave it commented out to see how data is kept on disk 57 | } 58 | 59 | func md5sum(s string) string { 60 | h := md5.New() 61 | io.WriteString(h, s) 62 | return fmt.Sprintf("%x", h.Sum(nil)) 63 | } 64 | -------------------------------------------------------------------------------- /examples/git-like-store/git-like-store.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | /* This example uses a more advanced transform function that simulates a bit 4 | how Git stores objects: 5 | 6 | * places hash-like keys under the objects directory 7 | * any other key is placed in the base directory. If the key 8 | * contains slashes, these are converted to subdirectories 9 | 10 | */ 11 | 12 | import ( 13 | "fmt" 14 | "regexp" 15 | "strings" 16 | 17 | "github.com/peterbourgon/diskv/v3" 18 | ) 19 | 20 | var hex40 = regexp.MustCompile("[0-9a-fA-F]{40}") 21 | 22 | func hexTransform(s string) *diskv.PathKey { 23 | if hex40.MatchString(s) { 24 | return &diskv.PathKey{Path: []string{"objects", s[0:2]}, 25 | FileName: s, 26 | } 27 | } 28 | 29 | folders := strings.Split(s, "/") 30 | lfolders := len(folders) 31 | if lfolders > 1 { 32 | return &diskv.PathKey{Path: folders[:lfolders-1], 33 | FileName: folders[lfolders-1], 34 | } 35 | } 36 | 37 | return &diskv.PathKey{Path: []string{}, 38 | FileName: s, 39 | } 40 | } 41 | 42 | func hexInverseTransform(pathKey *diskv.PathKey) string { 43 | if hex40.MatchString(pathKey.FileName) { 44 | return pathKey.FileName 45 | } 46 | 47 | if len(pathKey.Path) == 0 { 48 | return pathKey.FileName 49 | } 50 | 51 | return strings.Join(pathKey.Path, "/") + "/" + pathKey.FileName 52 | } 53 | 54 | func main() { 55 | d := diskv.New(diskv.Options{ 56 | BasePath: "my-data-dir", 57 | AdvancedTransform: hexTransform, 58 | InverseTransform: hexInverseTransform, 59 | CacheSizeMax: 1024 * 1024, 60 | }) 61 | 62 | // Write some text to the key "alpha/beta/gamma". 63 | key := "1bd88421b055327fcc8660c76c4894c4ea4c95d7" 64 | d.WriteString(key, "¡Hola!") // will be stored in "/objects/1b/1bd88421b055327fcc8660c76c4894c4ea4c95d7" 65 | 66 | d.WriteString("refs/heads/master", "some text") // will be stored in "/refs/heads/master" 67 | 68 | fmt.Println("Enumerating All keys:") 69 | c := d.Keys(nil) 70 | 71 | for key := range c { 72 | value := d.ReadString(key) 73 | fmt.Printf("Key: %s, Value: %s\n", key, value) 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /examples/super-simple-store/super-simple-store.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | 6 | "github.com/peterbourgon/diskv/v3" 7 | ) 8 | 9 | func main() { 10 | d := diskv.New(diskv.Options{ 11 | BasePath: "my-diskv-data-directory", 12 | CacheSizeMax: 1024 * 1024, // 1MB 13 | }) 14 | 15 | key := "alpha" 16 | if err := d.Write(key, []byte{'1', '2', '3'}); err != nil { 17 | panic(err) 18 | } 19 | 20 | value, err := d.Read(key) 21 | if err != nil { 22 | panic(err) 23 | } 24 | fmt.Printf("%v\n", value) 25 | 26 | if err := d.Erase(key); err != nil { 27 | panic(err) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/peterbourgon/diskv/v3 2 | 3 | go 1.12 4 | 5 | require github.com/google/btree v1.0.0 6 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/google/btree v1.0.0 h1:0udJVsspx3VBr5FwtLhQQtuAsVc79tTq0ocGIPAU6qo= 2 | github.com/google/btree v1.0.0/go.mod h1:lNA+9X1NB3Zf8V7Ke586lFgjr2dZNuvo3lPJSGZ5JPQ= 3 | -------------------------------------------------------------------------------- /import_test.go: -------------------------------------------------------------------------------- 1 | package diskv_test 2 | 3 | import ( 4 | "bytes" 5 | "io/ioutil" 6 | "os" 7 | 8 | "github.com/peterbourgon/diskv/v3" 9 | 10 | "testing" 11 | ) 12 | 13 | func TestImportMove(t *testing.T) { 14 | b := []byte(`0123456789`) 15 | f, err := ioutil.TempFile("", "temp-test") 16 | if err != nil { 17 | t.Fatal(err) 18 | } 19 | if _, err := f.Write(b); err != nil { 20 | t.Fatal(err) 21 | } 22 | f.Close() 23 | 24 | d := diskv.New(diskv.Options{ 25 | BasePath: "test-import-move", 26 | }) 27 | defer d.EraseAll() 28 | 29 | key := "key" 30 | 31 | if err := d.Write(key, []byte(`TBD`)); err != nil { 32 | t.Fatal(err) 33 | } 34 | 35 | if err := d.Import(f.Name(), key, true); err != nil { 36 | t.Fatal(err) 37 | } 38 | 39 | if _, err := os.Stat(f.Name()); err == nil || !os.IsNotExist(err) { 40 | t.Errorf("expected temp file to be gone, but err = %v", err) 41 | } 42 | 43 | if !d.Has(key) { 44 | t.Errorf("%q not present", key) 45 | } 46 | 47 | if buf, err := d.Read(key); err != nil || bytes.Compare(b, buf) != 0 { 48 | t.Errorf("want %q, have %q (err = %v)", string(b), string(buf), err) 49 | } 50 | } 51 | 52 | func TestImportCopy(t *testing.T) { 53 | b := []byte(`¡åéîòü!`) 54 | 55 | f, err := ioutil.TempFile("", "temp-test") 56 | if err != nil { 57 | t.Fatal(err) 58 | } 59 | if _, err := f.Write(b); err != nil { 60 | t.Fatal(err) 61 | } 62 | f.Close() 63 | 64 | d := diskv.New(diskv.Options{ 65 | BasePath: "test-import-copy", 66 | }) 67 | defer d.EraseAll() 68 | 69 | if err := d.Import(f.Name(), "key", false); err != nil { 70 | t.Fatal(err) 71 | } 72 | 73 | if _, err := os.Stat(f.Name()); err != nil { 74 | t.Errorf("expected temp file to remain, but got err = %v", err) 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /index.go: -------------------------------------------------------------------------------- 1 | package diskv 2 | 3 | import ( 4 | "sync" 5 | 6 | "github.com/google/btree" 7 | ) 8 | 9 | // Index is a generic interface for things that can 10 | // provide an ordered list of keys. 11 | type Index interface { 12 | Initialize(less LessFunction, keys <-chan string) 13 | Insert(key string) 14 | Delete(key string) 15 | Keys(from string, n int) []string 16 | } 17 | 18 | // LessFunction is used to initialize an Index of keys in a specific order. 19 | type LessFunction func(string, string) bool 20 | 21 | // btreeString is a custom data type that satisfies the BTree Less interface, 22 | // making the strings it wraps sortable by the BTree package. 23 | type btreeString struct { 24 | s string 25 | l LessFunction 26 | } 27 | 28 | // Less satisfies the BTree.Less interface using the btreeString's LessFunction. 29 | func (s btreeString) Less(i btree.Item) bool { 30 | return s.l(s.s, i.(btreeString).s) 31 | } 32 | 33 | // BTreeIndex is an implementation of the Index interface using google/btree. 34 | type BTreeIndex struct { 35 | sync.RWMutex 36 | LessFunction 37 | *btree.BTree 38 | } 39 | 40 | // Initialize populates the BTree tree with data from the keys channel, 41 | // according to the passed less function. It's destructive to the BTreeIndex. 42 | func (i *BTreeIndex) Initialize(less LessFunction, keys <-chan string) { 43 | i.Lock() 44 | defer i.Unlock() 45 | i.LessFunction = less 46 | i.BTree = rebuild(less, keys) 47 | } 48 | 49 | // Insert inserts the given key (only) into the BTree tree. 50 | func (i *BTreeIndex) Insert(key string) { 51 | i.Lock() 52 | defer i.Unlock() 53 | if i.BTree == nil || i.LessFunction == nil { 54 | panic("uninitialized index") 55 | } 56 | i.BTree.ReplaceOrInsert(btreeString{s: key, l: i.LessFunction}) 57 | } 58 | 59 | // Delete removes the given key (only) from the BTree tree. 60 | func (i *BTreeIndex) Delete(key string) { 61 | i.Lock() 62 | defer i.Unlock() 63 | if i.BTree == nil || i.LessFunction == nil { 64 | panic("uninitialized index") 65 | } 66 | i.BTree.Delete(btreeString{s: key, l: i.LessFunction}) 67 | } 68 | 69 | // Keys yields a maximum of n keys in order. If the passed 'from' key is empty, 70 | // Keys will return the first n keys. If the passed 'from' key is non-empty, the 71 | // first key in the returned slice will be the key that immediately follows the 72 | // passed key, in key order. 73 | func (i *BTreeIndex) Keys(from string, n int) []string { 74 | i.RLock() 75 | defer i.RUnlock() 76 | 77 | if i.BTree == nil || i.LessFunction == nil { 78 | panic("uninitialized index") 79 | } 80 | 81 | if i.BTree.Len() <= 0 { 82 | return []string{} 83 | } 84 | 85 | btreeFrom := btreeString{s: from, l: i.LessFunction} 86 | skipFirst := true 87 | if len(from) <= 0 || !i.BTree.Has(btreeFrom) { 88 | // no such key, so fabricate an always-smallest item 89 | btreeFrom = btreeString{s: "", l: func(string, string) bool { return true }} 90 | skipFirst = false 91 | } 92 | 93 | keys := []string{} 94 | iterator := func(i btree.Item) bool { 95 | keys = append(keys, i.(btreeString).s) 96 | return len(keys) < n 97 | } 98 | i.BTree.AscendGreaterOrEqual(btreeFrom, iterator) 99 | 100 | if skipFirst && len(keys) > 0 { 101 | keys = keys[1:] 102 | } 103 | 104 | return keys 105 | } 106 | 107 | // rebuildIndex does the work of regenerating the index 108 | // with the given keys. 109 | func rebuild(less LessFunction, keys <-chan string) *btree.BTree { 110 | tree := btree.New(2) 111 | for key := range keys { 112 | tree.ReplaceOrInsert(btreeString{s: key, l: less}) 113 | } 114 | return tree 115 | } 116 | -------------------------------------------------------------------------------- /index_test.go: -------------------------------------------------------------------------------- 1 | package diskv 2 | 3 | import ( 4 | "bytes" 5 | "reflect" 6 | "testing" 7 | "time" 8 | ) 9 | 10 | func strLess(a, b string) bool { return a < b } 11 | 12 | func cmpStrings(a, b []string) bool { 13 | if len(a) != len(b) { 14 | return false 15 | } 16 | for i := 0; i < len(a); i++ { 17 | if a[i] != b[i] { 18 | return false 19 | } 20 | } 21 | return true 22 | } 23 | 24 | func (d *Diskv) isIndexed(key string) bool { 25 | if d.Index == nil { 26 | return false 27 | } 28 | 29 | for _, got := range d.Index.Keys("", 1000) { 30 | if got == key { 31 | return true 32 | } 33 | } 34 | return false 35 | } 36 | 37 | func TestIndexOrder(t *testing.T) { 38 | d := New(Options{ 39 | BasePath: "index-test", 40 | CacheSizeMax: 1024, 41 | Index: &BTreeIndex{}, 42 | IndexLess: strLess, 43 | }) 44 | defer d.EraseAll() 45 | 46 | v := []byte{'1', '2', '3'} 47 | d.Write("a", v) 48 | if !d.isIndexed("a") { 49 | t.Fatalf("'a' not indexed after write") 50 | } 51 | d.Write("1", v) 52 | d.Write("m", v) 53 | d.Write("-", v) 54 | d.Write("A", v) 55 | 56 | expectedKeys := []string{"-", "1", "A", "a", "m"} 57 | keys := []string{} 58 | for _, key := range d.Index.Keys("", 100) { 59 | keys = append(keys, key) 60 | } 61 | 62 | if !cmpStrings(keys, expectedKeys) { 63 | t.Fatalf("got %s, expected %s", keys, expectedKeys) 64 | } 65 | } 66 | 67 | func TestIndexLoad(t *testing.T) { 68 | d1 := New(Options{ 69 | BasePath: "index-test", 70 | CacheSizeMax: 1024, 71 | }) 72 | defer d1.EraseAll() 73 | 74 | val := []byte{'1', '2', '3'} 75 | keys := []string{"a", "b", "c", "d", "e", "f", "g"} 76 | for _, key := range keys { 77 | d1.Write(key, val) 78 | } 79 | 80 | d2 := New(Options{ 81 | BasePath: "index-test", 82 | CacheSizeMax: 1024, 83 | Index: &BTreeIndex{}, 84 | IndexLess: strLess, 85 | }) 86 | defer d2.EraseAll() 87 | 88 | // check d2 has properly loaded existing d1 data 89 | for _, key := range keys { 90 | if !d2.isIndexed(key) { 91 | t.Fatalf("key '%s' not indexed on secondary", key) 92 | } 93 | } 94 | 95 | // cache one 96 | if readValue, err := d2.Read(keys[0]); err != nil { 97 | t.Fatalf("%s", err) 98 | } else if bytes.Compare(val, readValue) != 0 { 99 | t.Fatalf("%s: got %s, expected %s", keys[0], readValue, val) 100 | } 101 | 102 | // make sure it got cached 103 | for i := 0; i < 10 && !d2.isCached(keys[0]); i++ { 104 | time.Sleep(10 * time.Millisecond) 105 | } 106 | if !d2.isCached(keys[0]) { 107 | t.Fatalf("key '%s' not cached", keys[0]) 108 | } 109 | 110 | // kill the disk 111 | d1.EraseAll() 112 | 113 | // cached value should still be there in the second 114 | if readValue, err := d2.Read(keys[0]); err != nil { 115 | t.Fatalf("%s", err) 116 | } else if bytes.Compare(val, readValue) != 0 { 117 | t.Fatalf("%s: got %s, expected %s", keys[0], readValue, val) 118 | } 119 | 120 | // but not in the original 121 | if _, err := d1.Read(keys[0]); err == nil { 122 | t.Fatalf("expected error reading from flushed store") 123 | } 124 | } 125 | 126 | func TestIndexKeysEmptyFrom(t *testing.T) { 127 | d := New(Options{ 128 | BasePath: "index-test", 129 | CacheSizeMax: 1024, 130 | Index: &BTreeIndex{}, 131 | IndexLess: strLess, 132 | }) 133 | defer d.EraseAll() 134 | 135 | for _, k := range []string{"a", "c", "z", "b", "x", "b", "y"} { 136 | d.Write(k, []byte("1")) 137 | } 138 | 139 | want := []string{"a", "b", "c", "x", "y", "z"} 140 | have := d.Index.Keys("", 99) 141 | if !reflect.DeepEqual(want, have) { 142 | t.Errorf("want %v, have %v", want, have) 143 | } 144 | } 145 | 146 | func TestBadKeys(t *testing.T) { 147 | d := New(Options{ 148 | BasePath: "index-test", 149 | CacheSizeMax: 1024, 150 | Index: &BTreeIndex{}, 151 | IndexLess: strLess, 152 | }) 153 | defer d.EraseAll() 154 | 155 | for _, k := range []string{"a/a"} { 156 | err := d.Write(k, []byte("1")) 157 | if err != errBadKey { 158 | t.Errorf("Expected bad key error, got: %v", err) 159 | } 160 | } 161 | } 162 | -------------------------------------------------------------------------------- /issues_test.go: -------------------------------------------------------------------------------- 1 | package diskv 2 | 3 | import ( 4 | "bytes" 5 | "io/ioutil" 6 | "math/rand" 7 | "sync" 8 | "testing" 9 | "time" 10 | ) 11 | 12 | // ReadStream from cache shouldn't panic on a nil dereference from a nonexistent 13 | // Compression :) 14 | func TestIssue2A(t *testing.T) { 15 | d := New(Options{ 16 | BasePath: "test-issue-2a", 17 | CacheSizeMax: 1024, 18 | }) 19 | defer d.EraseAll() 20 | 21 | input := "abcdefghijklmnopqrstuvwxy" 22 | key, writeBuf, sync := "a", bytes.NewBufferString(input), false 23 | if err := d.WriteStream(key, writeBuf, sync); err != nil { 24 | t.Fatal(err) 25 | } 26 | 27 | for i := 0; i < 2; i++ { 28 | began := time.Now() 29 | rc, err := d.ReadStream(key, false) 30 | if err != nil { 31 | t.Fatal(err) 32 | } 33 | buf, err := ioutil.ReadAll(rc) 34 | if err != nil { 35 | t.Fatal(err) 36 | } 37 | if !cmpBytes(buf, []byte(input)) { 38 | t.Fatalf("read #%d: '%s' != '%s'", i+1, string(buf), input) 39 | } 40 | rc.Close() 41 | t.Logf("read #%d in %s", i+1, time.Since(began)) 42 | } 43 | } 44 | 45 | // ReadStream on a key that resolves to a directory should return an error. 46 | func TestIssue2B(t *testing.T) { 47 | blockTransform := func(s string) []string { 48 | transformBlockSize := 3 49 | sliceSize := len(s) / transformBlockSize 50 | pathSlice := make([]string, sliceSize) 51 | for i := 0; i < sliceSize; i++ { 52 | from, to := i*transformBlockSize, (i*transformBlockSize)+transformBlockSize 53 | pathSlice[i] = s[from:to] 54 | } 55 | return pathSlice 56 | } 57 | 58 | d := New(Options{ 59 | BasePath: "test-issue-2b", 60 | Transform: blockTransform, 61 | CacheSizeMax: 0, 62 | }) 63 | defer d.EraseAll() 64 | 65 | v := []byte{'1', '2', '3'} 66 | if err := d.Write("abcabc", v); err != nil { 67 | t.Fatal(err) 68 | } 69 | 70 | _, err := d.ReadStream("abc", false) 71 | if err == nil { 72 | t.Fatal("ReadStream('abc') should return error") 73 | } 74 | t.Logf("ReadStream('abc') returned error: %v", err) 75 | } 76 | 77 | // Ensure ReadStream with direct=true isn't racy. 78 | func TestIssue17(t *testing.T) { 79 | var ( 80 | basePath = "test-data" 81 | ) 82 | 83 | dWrite := New(Options{ 84 | BasePath: basePath, 85 | CacheSizeMax: 0, 86 | }) 87 | defer dWrite.EraseAll() 88 | 89 | dRead := New(Options{ 90 | BasePath: basePath, 91 | CacheSizeMax: 50, 92 | }) 93 | 94 | cases := map[string]string{ 95 | "a": `1234567890`, 96 | "b": `2345678901`, 97 | "c": `3456789012`, 98 | "d": `4567890123`, 99 | "e": `5678901234`, 100 | } 101 | 102 | for k, v := range cases { 103 | if err := dWrite.Write(k, []byte(v)); err != nil { 104 | t.Fatalf("during write: %s", err) 105 | } 106 | dRead.Read(k) // ensure it's added to cache 107 | } 108 | 109 | var wg sync.WaitGroup 110 | start := make(chan struct{}) 111 | for k, v := range cases { 112 | wg.Add(1) 113 | go func(k, v string) { 114 | <-start 115 | dRead.ReadStream(k, true) 116 | wg.Done() 117 | }(k, v) 118 | } 119 | close(start) 120 | wg.Wait() 121 | } 122 | 123 | // Test for issue #40, where acquiring two stream readers on the same k/v pair 124 | // caused the value to be written into the cache twice, messing up the 125 | // size calculations. 126 | func TestIssue40(t *testing.T) { 127 | var ( 128 | basePath = "test-data" 129 | ) 130 | // Simplest transform function: put all the data files into the base dir. 131 | flatTransform := func(s string) []string { return []string{} } 132 | 133 | // Initialize a new diskv store, rooted at "my-data-dir", 134 | // with a 100 byte cache. 135 | d := New(Options{ 136 | BasePath: basePath, 137 | Transform: flatTransform, 138 | CacheSizeMax: 100, 139 | }) 140 | 141 | defer d.EraseAll() 142 | 143 | // Write a 50 byte value, filling the cache half-way 144 | k1 := "key1" 145 | d1 := make([]byte, 50) 146 | rand.Read(d1) 147 | d.Write(k1, d1) 148 | 149 | // Get *two* read streams on it. Because the key is not yet in the cache, 150 | // and will not be in the cache until a stream is fully read, both 151 | // readers use the 'siphon' object, which always writes to the cache 152 | // after reading. 153 | s1, err := d.ReadStream(k1, false) 154 | if err != nil { 155 | t.Fatal(err) 156 | } 157 | s2, err := d.ReadStream(k1, false) 158 | if err != nil { 159 | t.Fatal(err) 160 | } 161 | // When each stream is drained, the underlying siphon will write 162 | // the value into the cache's map and increment the cache size. 163 | // This means we will have 1 entry in the cache map 164 | // ("key1" mapping to a 50 byte slice) but the cache size will be 100, 165 | // because the buggy code does not check if an entry already exists 166 | // in the map. 167 | // s1 drains: 168 | // cache[k] = v 169 | // cacheSize += len(v) 170 | // s2 drains: 171 | // cache[k] = v /* overwrites existing */ 172 | // cacheSize += len(v) /* blindly adds to the cache size */ 173 | ioutil.ReadAll(s1) 174 | ioutil.ReadAll(s2) 175 | 176 | // Now write a different k/v pair, with a 60 byte array. 177 | k2 := "key2" 178 | d2 := make([]byte, 60) 179 | rand.Read(d2) 180 | d.Write(k2, d2) 181 | // The act of reading the k/v pair back out causes it to be cached. 182 | // Because the cache is only 100 bytes, it needs to delete existing 183 | // entries to make room. 184 | // If the cache is buggy, it will delete the single 50-byte entry 185 | // from the cache map & decrement cacheSize by 50... but because 186 | // cacheSize was improperly incremented twice earlier, this will 187 | // leave us with no entries in the cacheMap but with cacheSize==50. 188 | // Since CacheSizeMax-cacheSize (100-50) is less than 60, there 189 | // is no room in the cache for this entry and it panics. 190 | d.Read(k2) 191 | } 192 | -------------------------------------------------------------------------------- /keys_test.go: -------------------------------------------------------------------------------- 1 | package diskv 2 | 3 | import ( 4 | "reflect" 5 | "runtime" 6 | "strings" 7 | "testing" 8 | ) 9 | 10 | var ( 11 | keysTestData = map[string]string{ 12 | "ab01cd01": "When we started building CoreOS", 13 | "ab01cd02": "we looked at all the various components available to us", 14 | "ab01cd03": "re-using the best tools", 15 | "ef01gh04": "and building the ones that did not exist", 16 | "ef02gh05": "We believe strongly in the Unix philosophy", 17 | "xxxxxxxx": "tools should be independently useful", 18 | } 19 | 20 | prefixes = []string{ 21 | "", // all 22 | "a", 23 | "ab", 24 | "ab0", 25 | "ab01", 26 | "ab01cd0", 27 | "ab01cd01", 28 | "ab01cd01x", // none 29 | "b", // none 30 | "b0", // none 31 | "0", // none 32 | "01", // none 33 | "e", 34 | "ef", 35 | "efx", // none 36 | "ef01gh0", 37 | "ef01gh04", 38 | "ef01gh05", 39 | "ef01gh06", // none 40 | } 41 | ) 42 | 43 | func TestKeysFlat(t *testing.T) { 44 | transform := func(s string) []string { 45 | if s == "" { 46 | t.Fatalf(`transform should not be called with ""`) 47 | } 48 | return []string{} 49 | } 50 | d := New(Options{ 51 | BasePath: "test-data", 52 | Transform: transform, 53 | }) 54 | defer d.EraseAll() 55 | 56 | for k, v := range keysTestData { 57 | d.Write(k, []byte(v)) 58 | } 59 | 60 | checkKeys(t, d.Keys(nil), keysTestData) 61 | } 62 | 63 | func TestKeysNested(t *testing.T) { 64 | d := New(Options{ 65 | BasePath: "test-data", 66 | Transform: blockTransform(2), 67 | }) 68 | defer d.EraseAll() 69 | 70 | for k, v := range keysTestData { 71 | d.Write(k, []byte(v)) 72 | } 73 | 74 | checkKeys(t, d.Keys(nil), keysTestData) 75 | } 76 | 77 | func TestKeysPrefixFlat(t *testing.T) { 78 | d := New(Options{ 79 | BasePath: "test-data", 80 | }) 81 | defer d.EraseAll() 82 | 83 | for k, v := range keysTestData { 84 | d.Write(k, []byte(v)) 85 | } 86 | 87 | for _, prefix := range prefixes { 88 | checkKeys(t, d.KeysPrefix(prefix, nil), filterPrefix(keysTestData, prefix)) 89 | } 90 | } 91 | 92 | func TestKeysPrefixNested(t *testing.T) { 93 | d := New(Options{ 94 | BasePath: "test-data", 95 | Transform: blockTransform(2), 96 | }) 97 | defer d.EraseAll() 98 | 99 | for k, v := range keysTestData { 100 | d.Write(k, []byte(v)) 101 | } 102 | 103 | for _, prefix := range prefixes { 104 | checkKeys(t, d.KeysPrefix(prefix, nil), filterPrefix(keysTestData, prefix)) 105 | } 106 | } 107 | 108 | func TestKeysCancel(t *testing.T) { 109 | d := New(Options{ 110 | BasePath: "test-data", 111 | }) 112 | defer d.EraseAll() 113 | 114 | for k, v := range keysTestData { 115 | d.Write(k, []byte(v)) 116 | } 117 | 118 | var ( 119 | cancel = make(chan struct{}) 120 | received = 0 121 | cancelAfter = len(keysTestData) / 2 122 | ) 123 | 124 | for key := range d.Keys(cancel) { 125 | received++ 126 | 127 | if received >= cancelAfter { 128 | close(cancel) 129 | runtime.Gosched() // allow walker to detect cancel 130 | } 131 | 132 | t.Logf("received %d: %q", received, key) 133 | } 134 | 135 | if want, have := cancelAfter, received; want != have { 136 | t.Errorf("want %d, have %d", want, have) 137 | } 138 | } 139 | 140 | func checkKeys(t *testing.T, c <-chan string, want map[string]string) { 141 | for k := range c { 142 | if _, ok := want[k]; !ok { 143 | t.Errorf("%q yielded but not expected", k) 144 | continue 145 | } 146 | 147 | delete(want, k) 148 | t.Logf("%q yielded OK", k) 149 | } 150 | 151 | if len(want) != 0 { 152 | t.Errorf("%d expected key(s) not yielded: %s", len(want), strings.Join(flattenKeys(want), ", ")) 153 | } 154 | } 155 | 156 | func blockTransform(blockSize int) func(string) []string { 157 | return func(s string) []string { 158 | var ( 159 | sliceSize = len(s) / blockSize 160 | pathSlice = make([]string, sliceSize) 161 | ) 162 | for i := 0; i < sliceSize; i++ { 163 | from, to := i*blockSize, (i*blockSize)+blockSize 164 | pathSlice[i] = s[from:to] 165 | } 166 | return pathSlice 167 | } 168 | } 169 | 170 | func filterPrefix(in map[string]string, prefix string) map[string]string { 171 | out := map[string]string{} 172 | for k, v := range in { 173 | if strings.HasPrefix(k, prefix) { 174 | out[k] = v 175 | } 176 | } 177 | return out 178 | } 179 | 180 | func TestFilterPrefix(t *testing.T) { 181 | input := map[string]string{ 182 | "all": "", 183 | "and": "", 184 | "at": "", 185 | "available": "", 186 | "best": "", 187 | "building": "", 188 | "components": "", 189 | "coreos": "", 190 | "did": "", 191 | "exist": "", 192 | "looked": "", 193 | "not": "", 194 | "ones": "", 195 | "re-using": "", 196 | "started": "", 197 | "that": "", 198 | "the": "", 199 | "to": "", 200 | "tools": "", 201 | "us": "", 202 | "various": "", 203 | "we": "", 204 | "when": "", 205 | } 206 | 207 | for prefix, want := range map[string]map[string]string{ 208 | "a": map[string]string{"all": "", "and": "", "at": "", "available": ""}, 209 | "al": map[string]string{"all": ""}, 210 | "all": map[string]string{"all": ""}, 211 | "alll": map[string]string{}, 212 | "c": map[string]string{"components": "", "coreos": ""}, 213 | "co": map[string]string{"components": "", "coreos": ""}, 214 | "com": map[string]string{"components": ""}, 215 | } { 216 | have := filterPrefix(input, prefix) 217 | if !reflect.DeepEqual(want, have) { 218 | t.Errorf("%q: want %v, have %v", prefix, flattenKeys(want), flattenKeys(have)) 219 | } 220 | } 221 | } 222 | 223 | func flattenKeys(m map[string]string) []string { 224 | a := make([]string, 0, len(m)) 225 | for k := range m { 226 | a = append(a, k) 227 | } 228 | return a 229 | } 230 | -------------------------------------------------------------------------------- /speed_test.go: -------------------------------------------------------------------------------- 1 | package diskv 2 | 3 | import ( 4 | "fmt" 5 | "math/rand" 6 | "testing" 7 | ) 8 | 9 | func shuffle(keys []string) { 10 | ints := rand.Perm(len(keys)) 11 | for i := range keys { 12 | keys[i], keys[ints[i]] = keys[ints[i]], keys[i] 13 | } 14 | } 15 | 16 | func genValue(size int) []byte { 17 | v := make([]byte, size) 18 | for i := 0; i < size; i++ { 19 | v[i] = uint8((rand.Int() % 26) + 97) // a-z 20 | } 21 | return v 22 | } 23 | 24 | const ( 25 | keyCount = 1000 26 | ) 27 | 28 | func genKeys() []string { 29 | keys := make([]string, keyCount) 30 | for i := 0; i < keyCount; i++ { 31 | keys[i] = fmt.Sprintf("%d", i) 32 | } 33 | return keys 34 | } 35 | 36 | func (d *Diskv) load(keys []string, val []byte) { 37 | for _, key := range keys { 38 | d.Write(key, val) 39 | } 40 | } 41 | 42 | func benchRead(b *testing.B, size, cachesz int) { 43 | b.StopTimer() 44 | d := New(Options{ 45 | BasePath: "speed-test", 46 | CacheSizeMax: uint64(cachesz), 47 | }) 48 | defer d.EraseAll() 49 | 50 | keys := genKeys() 51 | value := genValue(size) 52 | d.load(keys, value) 53 | shuffle(keys) 54 | b.SetBytes(int64(size)) 55 | 56 | b.StartTimer() 57 | for i := 0; i < b.N; i++ { 58 | _, _ = d.Read(keys[i%len(keys)]) 59 | } 60 | b.StopTimer() 61 | } 62 | 63 | func benchWrite(b *testing.B, size int, withIndex bool) { 64 | b.StopTimer() 65 | 66 | options := Options{ 67 | BasePath: "speed-test", 68 | CacheSizeMax: 0, 69 | } 70 | if withIndex { 71 | options.Index = &BTreeIndex{} 72 | options.IndexLess = strLess 73 | } 74 | 75 | d := New(options) 76 | defer d.EraseAll() 77 | keys := genKeys() 78 | value := genValue(size) 79 | shuffle(keys) 80 | b.SetBytes(int64(size)) 81 | 82 | b.StartTimer() 83 | for i := 0; i < b.N; i++ { 84 | d.Write(keys[i%len(keys)], value) 85 | } 86 | b.StopTimer() 87 | } 88 | 89 | func BenchmarkWrite__32B_NoIndex(b *testing.B) { 90 | benchWrite(b, 32, false) 91 | } 92 | 93 | func BenchmarkWrite__1KB_NoIndex(b *testing.B) { 94 | benchWrite(b, 1024, false) 95 | } 96 | 97 | func BenchmarkWrite__4KB_NoIndex(b *testing.B) { 98 | benchWrite(b, 4096, false) 99 | } 100 | 101 | func BenchmarkWrite_10KB_NoIndex(b *testing.B) { 102 | benchWrite(b, 10240, false) 103 | } 104 | 105 | func BenchmarkWrite__32B_WithIndex(b *testing.B) { 106 | benchWrite(b, 32, true) 107 | } 108 | 109 | func BenchmarkWrite__1KB_WithIndex(b *testing.B) { 110 | benchWrite(b, 1024, true) 111 | } 112 | 113 | func BenchmarkWrite__4KB_WithIndex(b *testing.B) { 114 | benchWrite(b, 4096, true) 115 | } 116 | 117 | func BenchmarkWrite_10KB_WithIndex(b *testing.B) { 118 | benchWrite(b, 10240, true) 119 | } 120 | 121 | func BenchmarkRead__32B_NoCache(b *testing.B) { 122 | benchRead(b, 32, 0) 123 | } 124 | 125 | func BenchmarkRead__1KB_NoCache(b *testing.B) { 126 | benchRead(b, 1024, 0) 127 | } 128 | 129 | func BenchmarkRead__4KB_NoCache(b *testing.B) { 130 | benchRead(b, 4096, 0) 131 | } 132 | 133 | func BenchmarkRead_10KB_NoCache(b *testing.B) { 134 | benchRead(b, 10240, 0) 135 | } 136 | 137 | func BenchmarkRead__32B_WithCache(b *testing.B) { 138 | benchRead(b, 32, keyCount*32*2) 139 | } 140 | 141 | func BenchmarkRead__1KB_WithCache(b *testing.B) { 142 | benchRead(b, 1024, keyCount*1024*2) 143 | } 144 | 145 | func BenchmarkRead__4KB_WithCache(b *testing.B) { 146 | benchRead(b, 4096, keyCount*4096*2) 147 | } 148 | 149 | func BenchmarkRead_10KB_WithCache(b *testing.B) { 150 | benchRead(b, 10240, keyCount*4096*2) 151 | } 152 | -------------------------------------------------------------------------------- /stream_test.go: -------------------------------------------------------------------------------- 1 | package diskv 2 | 3 | import ( 4 | "bytes" 5 | "io/ioutil" 6 | "testing" 7 | ) 8 | 9 | func TestBasicStreamCaching(t *testing.T) { 10 | d := New(Options{ 11 | BasePath: "test-data", 12 | CacheSizeMax: 1024, 13 | }) 14 | defer d.EraseAll() 15 | 16 | input := "a1b2c3" 17 | key, writeBuf, sync := "a", bytes.NewBufferString(input), true 18 | if err := d.WriteStream(key, writeBuf, sync); err != nil { 19 | t.Fatal(err) 20 | } 21 | 22 | if d.isCached(key) { 23 | t.Fatalf("'%s' cached, but shouldn't be (yet)", key) 24 | } 25 | 26 | rc, err := d.ReadStream(key, false) 27 | if err != nil { 28 | t.Fatal(err) 29 | } 30 | 31 | readBuf, err := ioutil.ReadAll(rc) 32 | if err != nil { 33 | t.Fatal(err) 34 | } 35 | 36 | if !cmpBytes(readBuf, []byte(input)) { 37 | t.Fatalf("'%s' != '%s'", string(readBuf), input) 38 | } 39 | 40 | if !d.isCached(key) { 41 | t.Fatalf("'%s' isn't cached, but should be", key) 42 | } 43 | } 44 | 45 | func TestReadStreamDirect(t *testing.T) { 46 | var ( 47 | basePath = "test-data" 48 | ) 49 | dWrite := New(Options{ 50 | BasePath: basePath, 51 | CacheSizeMax: 0, 52 | }) 53 | defer dWrite.EraseAll() 54 | dRead := New(Options{ 55 | BasePath: basePath, 56 | CacheSizeMax: 1024, 57 | }) 58 | 59 | // Write 60 | key, val1, val2 := "a", []byte(`1234567890`), []byte(`aaaaaaaaaa`) 61 | if err := dWrite.Write(key, val1); err != nil { 62 | t.Fatalf("during first write: %s", err) 63 | } 64 | 65 | // First, caching read. 66 | val, err := dRead.Read(key) 67 | if err != nil { 68 | t.Fatalf("during initial read: %s", err) 69 | } 70 | t.Logf("read 1: %s => %s", key, string(val)) 71 | if !cmpBytes(val1, val) { 72 | t.Errorf("expected %q, got %q", string(val1), string(val)) 73 | } 74 | if !dRead.isCached(key) { 75 | t.Errorf("%q should be cached, but isn't", key) 76 | } 77 | 78 | // Write a different value. 79 | if err := dWrite.Write(key, val2); err != nil { 80 | t.Fatalf("during second write: %s", err) 81 | } 82 | 83 | // Second read, should hit cache and get the old value. 84 | val, err = dRead.Read(key) 85 | if err != nil { 86 | t.Fatalf("during second (cache-hit) read: %s", err) 87 | } 88 | t.Logf("read 2: %s => %s", key, string(val)) 89 | if !cmpBytes(val1, val) { 90 | t.Errorf("expected %q, got %q", string(val1), string(val)) 91 | } 92 | 93 | // Third, direct read, should get the updated value. 94 | rc, err := dRead.ReadStream(key, true) 95 | if err != nil { 96 | t.Fatalf("during third (direct) read, ReadStream: %s", err) 97 | } 98 | defer rc.Close() 99 | val, err = ioutil.ReadAll(rc) 100 | if err != nil { 101 | t.Fatalf("during third (direct) read, ReadAll: %s", err) 102 | } 103 | t.Logf("read 3: %s => %s", key, string(val)) 104 | if !cmpBytes(val2, val) { 105 | t.Errorf("expected %q, got %q", string(val1), string(val)) 106 | } 107 | 108 | // Fourth read, should hit cache and get the new value. 109 | val, err = dRead.Read(key) 110 | if err != nil { 111 | t.Fatalf("during fourth (cache-hit) read: %s", err) 112 | } 113 | t.Logf("read 4: %s => %s", key, string(val)) 114 | if !cmpBytes(val2, val) { 115 | t.Errorf("expected %q, got %q", string(val1), string(val)) 116 | } 117 | } 118 | --------------------------------------------------------------------------------