├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── buffer.go ├── buffer_test.go ├── chains.go ├── dto.pb.go ├── dto.proto ├── folder_util.go ├── go.mod ├── go.sum ├── lmdb.go ├── main_test.go ├── reader.go ├── reader_async.go ├── util.go ├── writer.go └── writer_test.go /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.dll 4 | *.so 5 | *.dylib 6 | 7 | # Test binary, build with `go test -c` 8 | *.test 9 | 10 | # Output of the go coverage tool, specifically when used with LiteIDE 11 | *.out 12 | 13 | # Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736 14 | .glide/ 15 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # This is a weird way of telling Travis to use the fast container-based test 2 | # runner instead of the slow VM-based runner. 3 | sudo: false 4 | 5 | language: go 6 | 7 | # Only the last two Go releases are supported by the Go team with security 8 | # updates. Any older versions be considered deprecated. Don't bother testing 9 | # with them. 10 | go: 11 | - 1.11 12 | 13 | # Only clone the most recent commit. 14 | git: 15 | depth: 1 16 | 17 | env: 18 | - GO111MODULE=on 19 | 20 | install: true 21 | 22 | notifications: 23 | email: false 24 | 25 | before_script: 26 | - go build ./... 27 | 28 | # script always runs to completion (set +e). If we have linter issues AND a 29 | # failing test, we want to see both. Configure golangci-lint with a 30 | # .golangci.yml file at the top level of your repo. 31 | script: 32 | - go test -v -race ./... # Run all the tests with the race detector enabled -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2017, Rinat Abdullin 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Cellar 2 | 3 | [![Build Status](https://travis-ci.com/abdullin/cellar.svg?branch=master)](https://travis-ci.com/carapace/cellar) 4 | 5 | Cellar is the append-only storage backend in Go designed for the analytical 6 | workloads. It replaces [geyser-net](https://github.com/abdullin/geyser-net). 7 | 8 | Core features: 9 | 10 | - events are automatically split into the chunks; 11 | - chunks are encrypted (LZ4) and compressed; 12 | - designed for batching operations (high throughput); 13 | - supports single writer and multiple concurrent readers; 14 | - store secondary indexes, lookups in the metadata DB. 15 | 16 | This storage takes ideas from the [Message Vault](https://github.com/abdullin/messageVault), 17 | which was based on the ideas of Kafka and append-only storage in [Lokad.CQRS](https://github.com/abdullin/lokad-cqrs) 18 | 19 | Analytical pipeline on top of this library was deployed at 20 | HappyPancake to run real-time aggregation and long-term data analysis 21 | on the largest social website in Sweden. You can read more about it in 22 | [Real-time Analytics with Go and LMDB](https://abdullin.com/bitgn/real-time-analytics/). 23 | 24 | # Contributors 25 | 26 | In the alphabetical order: 27 | 28 | - [Karel L. Kubat](https://github.com/KaiserKarel) 29 | - [Rinat Abdullin](https://github.com/abdullin) 30 | 31 | Don't hesitate to send a PR to include your profile. 32 | 33 | # Design 34 | 35 | Cellar stores data in a very simple manner: 36 | 37 | - LMDB database is used for keeping metadata (including user-defined); 38 | - a single pre-allocated file is used to buffer all writes; 39 | - when buffer fills, it is compressed, encrypted and added to the chunk list. 40 | 41 | # Writing 42 | 43 | You can have **only one writer at a time**. This writer has two operations: 44 | 45 | - `Append` - adds new bytes to the buffer, but doesn't flush it. 46 | - `Checkpoint` - performs all the flushing and saves the checkpoints. 47 | 48 | The store is optimized for throughput. You can efficiently execute 49 | thousands of appends followed by a single call to `Checkpoint`. 50 | 51 | Whenever a buffer is about to overflow (exceed the predefined max 52 | size), it will be "sealed" into an immutable chunk (compressed, 53 | encrypted and added to the chunk table) and replaced by a new buffer. 54 | 55 | See tests in `writer_test.go` for sample usage patters (for both 56 | writing and reading). 57 | 58 | # Reading 59 | 60 | At any point in time **multiple readers could be created** via 61 | `NewReader(folder, encryptionKey)`. You can optionally configure 62 | reader after creation by setting `StartPos` or `EndPos` to constrain 63 | reading to a part of the database. 64 | 65 | 66 | Readers have following operations available: 67 | 68 | - `Scan` - reads the database by executing the passed function against 69 | each record; 70 | - `ReadDb` - executes LMDB transaction against the metadata database 71 | (used to read lookup tables or indexes stored by the 72 | custom writing logic); 73 | - `ScanAsync` - launches reading in a goroutine and returns a buffered 74 | channel that will be filled up with records. 75 | 76 | Unit tests in `writer_test.go` feature use of readers as well. 77 | 78 | Note, that the reader tries to help you in achieving maximum 79 | throughput. While reading events from the chunk, it will decrypt and 80 | unpack the entire file in one go, allocating a memory buffer. All 81 | individual event reads will be performed against this buffer. 82 | 83 | # Example: Incremental Reporting 84 | 85 | This library was used as a building block for capturing millions and 86 | billions of events and then running reports on them. Consider a 87 | following example of building an incremental reporting pipeline. 88 | 89 | There is an external append-only storage with billions of events and a 90 | few terabytes of data (events are compressed separately with an 91 | equivalent of Snappy). It is located on a remote storage (cloud or a 92 | NAS). It is required to run custom reports on this data, refreshing 93 | them every hour. 94 | 95 | Cellar storage could be used to serve as a local cache on a dedicated 96 | reporting machine (e.g. you can find an instance with 32GB of RAM, 97 | Intel Xeon and 500GB of NNVMe SSD under 100 EUR per month). Since 98 | Cellar storage compresses events in chunks, high compression ratio 99 | could be achieved. For instance, protobuf messages tend to get 100 | compression of 2-10 in chunks. 101 | 102 | A solution might include an equivalent of a cron job that will execute 103 | following apps in sequence: 104 | 105 | - import job - a golang console that reads the last retrieved offset 106 | from the cellar, requests any new data from the remote storage and 107 | stores it locally in raw format; 108 | - compaction job - a golang console that incrementally pumps data from 109 | the "raw" cellar storage to another (using checkpoints to determine 110 | the location), while compacting and filtering events to keep only 111 | the ones needed for reporting; 112 | - report jobs - apps that perform a full scan on the compacted data, 113 | building reports in memory and then dumping them into the TSV (or 114 | whatever is format is used by your data processing framework). 115 | 116 | All these steps usually execute fast even on large datasets, since (1) 117 | and (2) are incremental and operate only on the fresh data. (3) can 118 | require full DB, however it works with the optimized and compacted 119 | data, hence it will be fast as well. To get the most performance, you 120 | might need to structure your messages for very fast reads without 121 | unnecessary memory allocations or CPU work (e.g. using something like 122 | FlatBuffers instead of JSON or ProtoBuf). 123 | 124 | Note, that the compaction job is optional. However, on fairly large 125 | datasets, it might make sense to optimize messages for very fast 126 | reads, while discarding all the unnecessary information. Should the 127 | job requirements change, you'll need to update the compaction logic, 128 | discard the compacted store and re-process all the raw data from the 129 | start. 130 | 131 | # License 132 | 133 | 3-clause BSD license. 134 | -------------------------------------------------------------------------------- /buffer.go: -------------------------------------------------------------------------------- 1 | package cellar 2 | 3 | import ( 4 | "bufio" 5 | "crypto/cipher" 6 | "io" 7 | "log" 8 | "os" 9 | "path" 10 | 11 | "github.com/pierrec/lz4" 12 | "github.com/pkg/errors" 13 | ) 14 | 15 | type Buffer struct { 16 | fileName string 17 | maxBytes int64 18 | startPos int64 19 | 20 | records int64 21 | pos int64 22 | 23 | writer *bufio.Writer 24 | stream *os.File 25 | } 26 | 27 | func openBuffer(d *BufferDto, folder string) (*Buffer, error) { 28 | 29 | if len(d.FileName) == 0 { 30 | return nil, errors.New("empty filename") 31 | } 32 | 33 | fullPath := path.Join(folder, d.FileName) 34 | 35 | f, err := os.OpenFile(fullPath, os.O_CREATE|os.O_RDWR, 0644) 36 | if err != nil { 37 | return nil, errors.Wrap(err, "Open file") 38 | } 39 | f.Truncate(int64(d.MaxBytes)) 40 | if _, err := f.Seek(int64(d.Pos), io.SeekStart); err != nil { 41 | return nil, errors.Wrap(err, "Seek") 42 | } 43 | 44 | b := &Buffer{ 45 | fileName: d.FileName, 46 | startPos: d.StartPos, 47 | maxBytes: d.MaxBytes, 48 | pos: d.Pos, 49 | records: d.Records, 50 | stream: f, 51 | writer: bufio.NewWriter(f), 52 | } 53 | return b, nil 54 | } 55 | 56 | func (b *Buffer) getState() *BufferDto { 57 | return &BufferDto{ 58 | FileName: b.fileName, 59 | MaxBytes: b.maxBytes, 60 | StartPos: b.startPos, 61 | Pos: b.pos, 62 | Records: b.records, 63 | } 64 | } 65 | 66 | func (b *Buffer) fits(bytes int64) bool { 67 | return (b.pos + bytes) <= b.maxBytes 68 | } 69 | 70 | func (b *Buffer) writeBytes(bs []byte) error { 71 | if _, err := b.writer.Write(bs); err != nil { 72 | return errors.Wrap(err, "Write") 73 | } 74 | b.pos += int64(len(bs)) 75 | return nil 76 | } 77 | 78 | func (b *Buffer) endRecord() { 79 | b.records++ 80 | } 81 | 82 | func (b *Buffer) flush() error { 83 | if err := b.writer.Flush(); err != nil { 84 | return errors.Wrap(err, "Flush") 85 | } 86 | return nil 87 | } 88 | 89 | func (b *Buffer) close() error { 90 | if b.stream == nil { 91 | return nil 92 | } 93 | var err error 94 | if err = b.stream.Close(); err != nil { 95 | return errors.Wrap(err, "stream.Close") 96 | } 97 | b.stream = nil 98 | return nil 99 | } 100 | 101 | func (b *Buffer) compress(key []byte) (dto *ChunkDto, err error) { 102 | 103 | loc := b.stream.Name() + ".lz4" 104 | 105 | if err = b.writer.Flush(); err != nil { 106 | log.Panicf("Failed to flush buffer: %s", err) 107 | } 108 | if err = b.stream.Sync(); err != nil { 109 | log.Panicf("Failed to Fsync buffer: %s", err) 110 | } 111 | 112 | if _, err = b.stream.Seek(0, io.SeekStart); err != nil { 113 | log.Panicf("Failed to seek to 0 in buffer: %s", err) 114 | } 115 | 116 | // create chunk file 117 | var chunkFile *os.File 118 | if chunkFile, err = os.Create(loc); err != nil { 119 | return nil, errors.Wrap(err, "os.Create") 120 | } 121 | 122 | defer func() { 123 | if err := chunkFile.Sync(); err != nil { 124 | panic("Failed to sync") 125 | } 126 | if err := chunkFile.Close(); err != nil { 127 | panic("Failed to close") 128 | } 129 | }() 130 | 131 | // buffer writes to file 132 | buffer := bufio.NewWriter(chunkFile) 133 | 134 | defer buffer.Flush() 135 | 136 | // encrypt before buffering 137 | var encryptor *cipher.StreamWriter 138 | if encryptor, err = chainEncryptor(key, buffer); err != nil { 139 | log.Panicf("Failed to chain encryptor for %s: %s", loc, err) 140 | } 141 | 142 | defer encryptor.Close() 143 | 144 | // compress before encrypting 145 | 146 | var zw *lz4.Writer 147 | if zw, err = chainCompressor(encryptor); err != nil { 148 | log.Panicf("Failed to chain compressor: %s", err) 149 | } 150 | 151 | // copy chunk to the chain 152 | if _, err = io.CopyN(zw, b.stream, b.pos); err != nil { 153 | return nil, errors.Wrap(err, "CopyN") 154 | } 155 | 156 | zw.Close() 157 | chunkFile.Sync() 158 | b.close() 159 | 160 | var size int64 161 | if size, err = chunkFile.Seek(0, io.SeekEnd); err != nil { 162 | return nil, errors.Wrap(err, "Seek") 163 | } 164 | 165 | dto = &ChunkDto{ 166 | FileName: b.fileName + ".lz4", 167 | Records: b.records, 168 | UncompressedByteSize: b.pos, 169 | StartPos: b.startPos, 170 | CompressedDiskSize: size, 171 | } 172 | return dto, nil 173 | } 174 | -------------------------------------------------------------------------------- /buffer_test.go: -------------------------------------------------------------------------------- 1 | package cellar 2 | 3 | import ( 4 | "os" 5 | "path" 6 | "testing" 7 | ) 8 | 9 | func TestWrites(t *testing.T) { 10 | 11 | folder := getFolder() 12 | 13 | b := &BufferDto{ 14 | FileName: "temp", 15 | MaxBytes: 100000, 16 | } 17 | 18 | var ( 19 | buf *Buffer 20 | err error 21 | ) 22 | 23 | if buf, err = openBuffer(b, folder); err != nil { 24 | panic(err) 25 | } 26 | 27 | buf.writeBytes(makeSlice(1)) 28 | 29 | assertExists(t, path.Join(folder, "temp")) 30 | 31 | assertPos(t, buf, 1) 32 | 33 | assert(t, buf.writeBytes(make([]byte, 10)), "writeBytes") 34 | assertPos(t, buf, 11) 35 | 36 | assert(t, buf.flush(), "flush") 37 | 38 | assertPos(t, buf, 11) 39 | 40 | err = buf.writeBytes(make([]byte, 10)) 41 | assert(t, err, "writeBytes") 42 | 43 | assertPos(t, buf, 21) 44 | } 45 | 46 | func assert(t *testing.T, err error, op string) { 47 | if err != nil { 48 | t.Fatalf("Failed %s: %s", op, err) 49 | } 50 | } 51 | 52 | func TestExist(t *testing.T) { 53 | 54 | folder := getFolder() 55 | 56 | b := &BufferDto{ 57 | FileName: "temp", 58 | MaxBytes: 100000, 59 | } 60 | 61 | var buf *Buffer 62 | var err error 63 | 64 | buf, err = openBuffer(b, folder) 65 | 66 | assert(t, err, "openBuffer") 67 | 68 | assert(t, buf.writeBytes(makeSlice(1)), "writeVarInt") 69 | 70 | assertPos(t, buf, 1) 71 | 72 | assert(t, buf.writeBytes(make([]byte, 10)), "writeBytes") 73 | assertExists(t, path.Join(folder, "temp")) 74 | 75 | buf.endRecord() 76 | 77 | key := []byte("example key 1234") 78 | var chunk *ChunkDto 79 | chunk, err = buf.compress(key) 80 | 81 | assert(t, err, "compress") 82 | assertExists(t, path.Join(folder, chunk.FileName)) 83 | 84 | if chunk.UncompressedByteSize != 11 { 85 | t.Fatalf("chunk size should match") 86 | } 87 | if chunk.Records != 1 { 88 | t.Fatalf("Chunk should have %d records", 1) 89 | } 90 | 91 | if chunk.StartPos != 0 { 92 | t.Fatalf("Chunk start pos should be %d", 0) 93 | } 94 | } 95 | 96 | func assertPos(t *testing.T, b *Buffer, expected int64) { 97 | if b.pos != expected { 98 | t.Fatalf("Expected pos to be %d but got %d", expected, b.pos) 99 | } 100 | } 101 | func assertExists(t *testing.T, path string) { 102 | if _, e := os.Stat(path); e != nil { 103 | t.Fatal("Buffer files should exist") 104 | } 105 | } 106 | -------------------------------------------------------------------------------- /chains.go: -------------------------------------------------------------------------------- 1 | package cellar 2 | 3 | import ( 4 | "crypto/aes" 5 | "crypto/cipher" 6 | "crypto/rand" 7 | "io" 8 | "log" 9 | 10 | "github.com/pierrec/lz4" 11 | "github.com/pkg/errors" 12 | ) 13 | 14 | var compressionLevel = 10 15 | 16 | // SetCompressionLevel allows you to set LZ4 compression level used for chunks 17 | func SetCompressionLevel(level int) { 18 | compressionLevel = level 19 | } 20 | 21 | func chainCompressor(w io.Writer) (*lz4.Writer, error) { 22 | zw := lz4.NewWriter(w) 23 | zw.Header.CompressionLevel = compressionLevel 24 | return zw, nil 25 | } 26 | 27 | func chainDecompressor(r io.Reader) (io.Reader, error) { 28 | zr := lz4.NewReader(r) 29 | return zr, nil 30 | } 31 | 32 | func chainDecryptor(key []byte, src io.Reader) (io.Reader, error) { 33 | var ( 34 | block cipher.Block 35 | err error 36 | ) 37 | if block, err = aes.NewCipher(key); err != nil { 38 | log.Panic("Failed to create a new cipher from the key") 39 | } 40 | 41 | iv := make([]byte, aes.BlockSize) 42 | 43 | if _, err = src.Read(iv); err != nil { 44 | return nil, errors.Wrap(err, "Failed to read IV") 45 | } 46 | 47 | stream := cipher.NewCFBDecrypter(block, iv) 48 | reader := &cipher.StreamReader{R: src, S: stream} 49 | return reader, nil 50 | } 51 | 52 | func chainEncryptor(key []byte, w io.Writer) (*cipher.StreamWriter, error) { 53 | 54 | var ( 55 | block cipher.Block 56 | err error 57 | ) 58 | if block, err = aes.NewCipher(key); err != nil { 59 | log.Panic("Failed to create a new cipher from the key") 60 | } 61 | 62 | iv := make([]byte, aes.BlockSize) 63 | if _, err = io.ReadFull(rand.Reader, iv); err != nil { 64 | panic(err) 65 | } 66 | 67 | if _, err = w.Write(iv); err != nil { 68 | return nil, errors.Wrap(err, "Write") 69 | } 70 | stream := cipher.NewCFBEncrypter(block, iv) 71 | 72 | writer := &cipher.StreamWriter{S: stream, W: w} 73 | return writer, nil 74 | } 75 | -------------------------------------------------------------------------------- /dto.pb.go: -------------------------------------------------------------------------------- 1 | // Code generated by protoc-gen-go. 2 | // source: dto.proto 3 | // DO NOT EDIT! 4 | 5 | /* 6 | Package cellar is a generated protocol buffer package. 7 | 8 | It is generated from these files: 9 | dto.proto 10 | 11 | It has these top-level messages: 12 | ChunkDto 13 | BufferDto 14 | MetaDto 15 | */ 16 | package cellar 17 | 18 | import proto "github.com/golang/protobuf/proto" 19 | import fmt "fmt" 20 | import math "math" 21 | 22 | // Reference imports to suppress errors if they are not otherwise used. 23 | var _ = proto.Marshal 24 | var _ = fmt.Errorf 25 | var _ = math.Inf 26 | 27 | // This is a compile-time assertion to ensure that this generated file 28 | // is compatible with the proto package it is being compiled against. 29 | // A compilation error at this line likely means your copy of the 30 | // proto package needs to be updated. 31 | const _ = proto.ProtoPackageIsVersion2 // please upgrade the proto package 32 | 33 | type ChunkDto struct { 34 | UncompressedByteSize int64 `protobuf:"varint,1,opt,name=uncompressedByteSize" json:"uncompressedByteSize,omitempty"` 35 | CompressedDiskSize int64 `protobuf:"varint,2,opt,name=compressedDiskSize" json:"compressedDiskSize,omitempty"` 36 | Records int64 `protobuf:"varint,3,opt,name=records" json:"records,omitempty"` 37 | FileName string `protobuf:"bytes,4,opt,name=fileName" json:"fileName,omitempty"` 38 | StartPos int64 `protobuf:"varint,5,opt,name=startPos" json:"startPos,omitempty"` 39 | } 40 | 41 | func (m *ChunkDto) Reset() { *m = ChunkDto{} } 42 | func (m *ChunkDto) String() string { return proto.CompactTextString(m) } 43 | func (*ChunkDto) ProtoMessage() {} 44 | func (*ChunkDto) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{0} } 45 | 46 | type BufferDto struct { 47 | StartPos int64 `protobuf:"varint,1,opt,name=startPos" json:"startPos,omitempty"` 48 | MaxBytes int64 `protobuf:"varint,2,opt,name=maxBytes" json:"maxBytes,omitempty"` 49 | Records int64 `protobuf:"varint,3,opt,name=records" json:"records,omitempty"` 50 | Pos int64 `protobuf:"varint,4,opt,name=pos" json:"pos,omitempty"` 51 | FileName string `protobuf:"bytes,5,opt,name=fileName" json:"fileName,omitempty"` 52 | } 53 | 54 | func (m *BufferDto) Reset() { *m = BufferDto{} } 55 | func (m *BufferDto) String() string { return proto.CompactTextString(m) } 56 | func (*BufferDto) ProtoMessage() {} 57 | func (*BufferDto) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{1} } 58 | 59 | type MetaDto struct { 60 | MaxKeySize int64 `protobuf:"varint,1,opt,name=maxKeySize" json:"maxKeySize,omitempty"` 61 | MaxValSize int64 `protobuf:"varint,2,opt,name=maxValSize" json:"maxValSize,omitempty"` 62 | } 63 | 64 | func (m *MetaDto) Reset() { *m = MetaDto{} } 65 | func (m *MetaDto) String() string { return proto.CompactTextString(m) } 66 | func (*MetaDto) ProtoMessage() {} 67 | func (*MetaDto) Descriptor() ([]byte, []int) { return fileDescriptor0, []int{2} } 68 | 69 | func init() { 70 | proto.RegisterType((*ChunkDto)(nil), "cellar.ChunkDto") 71 | proto.RegisterType((*BufferDto)(nil), "cellar.BufferDto") 72 | proto.RegisterType((*MetaDto)(nil), "cellar.MetaDto") 73 | } 74 | 75 | func init() { proto.RegisterFile("dto.proto", fileDescriptor0) } 76 | 77 | var fileDescriptor0 = []byte{ 78 | // 246 bytes of a gzipped FileDescriptorProto 79 | 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x09, 0x6e, 0x88, 0x02, 0xff, 0x7c, 0x91, 0xcd, 0x4a, 0xc3, 0x40, 80 | 0x10, 0xc7, 0x59, 0x63, 0x3f, 0x32, 0x27, 0x59, 0x3c, 0x2c, 0x1e, 0xa4, 0xe4, 0xd4, 0x53, 0x0f, 81 | 0xfa, 0x06, 0xb5, 0x17, 0x11, 0x45, 0x22, 0x78, 0x5f, 0x93, 0x09, 0x86, 0xee, 0x76, 0xc2, 0xce, 82 | 0x06, 0x5a, 0x5f, 0xc1, 0x97, 0xf2, 0xd1, 0x64, 0x97, 0x1a, 0x37, 0x50, 0x7a, 0xcb, 0xff, 0x0b, 83 | 0x7e, 0x99, 0x85, 0xbc, 0xf6, 0xb4, 0xea, 0x1c, 0x79, 0x92, 0xd3, 0x0a, 0x8d, 0xd1, 0xae, 0xf8, 84 | 0x11, 0x30, 0x7f, 0xf8, 0xec, 0x77, 0xdb, 0x8d, 0x27, 0x79, 0x07, 0xd7, 0xfd, 0xae, 0x22, 0xdb, 85 | 0x39, 0x64, 0xc6, 0x7a, 0x7d, 0xf0, 0xf8, 0xd6, 0x7e, 0xa1, 0x12, 0x0b, 0xb1, 0xcc, 0xca, 0x93, 86 | 0x99, 0x5c, 0x81, 0xfc, 0x77, 0x37, 0x2d, 0x6f, 0xe3, 0xe2, 0x22, 0x2e, 0x4e, 0x24, 0x52, 0xc1, 87 | 0xcc, 0x61, 0x45, 0xae, 0x66, 0x95, 0xc5, 0xd2, 0x9f, 0x94, 0x37, 0x30, 0x6f, 0x5a, 0x83, 0x2f, 88 | 0xda, 0xa2, 0xba, 0x5c, 0x88, 0x65, 0x5e, 0x0e, 0x3a, 0x64, 0xec, 0xb5, 0xf3, 0xaf, 0xc4, 0x6a, 89 | 0x12, 0x67, 0x83, 0x2e, 0xbe, 0x05, 0xe4, 0xeb, 0xbe, 0x69, 0xd0, 0x85, 0x7f, 0x48, 0x9b, 0x62, 90 | 0xdc, 0x0c, 0x99, 0xd5, 0xfb, 0x80, 0xce, 0x47, 0xc2, 0x41, 0x9f, 0xe1, 0xba, 0x82, 0xac, 0x23, 91 | 0x8e, 0x48, 0x59, 0x19, 0x3e, 0x47, 0xa4, 0x93, 0x31, 0x69, 0xf1, 0x08, 0xb3, 0x67, 0xf4, 0x3a, 92 | 0xa0, 0xdc, 0x02, 0x58, 0xbd, 0x7f, 0xc2, 0x43, 0x72, 0xc4, 0xc4, 0x39, 0xe6, 0xef, 0xda, 0x24, 93 | 0x27, 0x4b, 0x9c, 0x8f, 0x69, 0x7c, 0xaa, 0xfb, 0xdf, 0x00, 0x00, 0x00, 0xff, 0xff, 0x8a, 0x58, 94 | 0x12, 0xdc, 0xb7, 0x01, 0x00, 0x00, 95 | } 96 | -------------------------------------------------------------------------------- /dto.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package cellar; 4 | 5 | message ChunkDto { 6 | int64 uncompressedByteSize = 1; 7 | int64 compressedDiskSize = 2; 8 | int64 records = 3; 9 | string fileName = 4; 10 | int64 startPos = 5 ; 11 | } 12 | 13 | 14 | message BufferDto { 15 | int64 startPos = 1; 16 | int64 maxBytes = 2; 17 | int64 records = 3; 18 | int64 pos = 4; 19 | string fileName = 5; 20 | } 21 | 22 | 23 | message MetaDto { 24 | int64 maxKeySize = 1; 25 | int64 maxValSize = 2; 26 | } -------------------------------------------------------------------------------- /folder_util.go: -------------------------------------------------------------------------------- 1 | package cellar 2 | 3 | import ( 4 | fmt "fmt" 5 | "io/ioutil" 6 | "os" 7 | "sync/atomic" 8 | ) 9 | 10 | var folders = make(chan string, 100) 11 | var folderID int32 12 | 13 | // NewTempFolder creates a new unique empty folder. 14 | // Folders have to be cleaned up via RemoveTempFolders 15 | func NewTempFolder(name string) string { 16 | var folder string 17 | var err error 18 | 19 | var curr = atomic.AddInt32(&folderID, 1) 20 | 21 | if folder, err = ioutil.TempDir("", fmt.Sprintf("test_%s_%d_", name, curr)); err != nil { 22 | panic(err) 23 | } 24 | folders <- folder 25 | return folder 26 | } 27 | 28 | // RemoveTempFolders cleans up all test folders 29 | func RemoveTempFolders() { 30 | close(folders) 31 | for f := range folders { 32 | os.RemoveAll(f) 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/abdullin/cellar 2 | 3 | require ( 4 | github.com/abdullin/lex-go v0.0.0-20170809071836-51ee1bbe34a4 5 | github.com/abdullin/mdb v0.0.0-20171224093530-b63d30c6dad8 6 | github.com/bmatsuo/lmdb-go v1.8.0 7 | github.com/golang/protobuf v1.2.0 8 | github.com/pierrec/lz4 v0.0.0-20181005164709-635575b42742 9 | github.com/pierrec/xxHash v0.1.1 // indirect 10 | github.com/pkg/errors v0.8.0 11 | ) 12 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/abdullin/lex-go v0.0.0-20170809071836-51ee1bbe34a4 h1:8V2nybF/93O4jnLnIXfklL2S9bJnnUmatOSLtMM1f6Y= 2 | github.com/abdullin/lex-go v0.0.0-20170809071836-51ee1bbe34a4/go.mod h1:J7uLDotsvcN73ok54s1TLK0K9atmNdPhMtA657aevcM= 3 | github.com/abdullin/mdb v0.0.0-20171224093530-b63d30c6dad8 h1:MjAarp3DEBtzKR6+uAUv4v7ZtcuvBpo89ZAnNNnQLbc= 4 | github.com/abdullin/mdb v0.0.0-20171224093530-b63d30c6dad8/go.mod h1:Y/WE2fBxb7+iL2I96BIlpULEj+QK461egmA/4V/dgh8= 5 | github.com/bmatsuo/lmdb-go v1.8.0 h1:ohf3Q4xjXZBKh4AayUY4bb2CXuhRAI8BYGlJq08EfNA= 6 | github.com/bmatsuo/lmdb-go v1.8.0/go.mod h1:wWPZmKdOAZsl4qOqkowQ1aCrFie1HU8gWloHMCeAUdM= 7 | github.com/golang/protobuf v1.2.0 h1:P3YflyNX/ehuJFLhxviNdFxQPkGK5cDcApsge1SqnvM= 8 | github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 9 | github.com/pierrec/lz4 v0.0.0-20181005164709-635575b42742 h1:wKfigKMTgvSzBLIVvB5QaBBQI0odU6n45/UKSphjLus= 10 | github.com/pierrec/lz4 v0.0.0-20181005164709-635575b42742/go.mod h1:3/3N9NVKO0jef7pBehbT1qWhCMrIgbYNnFAZCqQ5LRc= 11 | github.com/pierrec/lz4 v1.0.1 h1:w6GMGWSsCI04fTM8wQRdnW74MuJISakuUU0onU0TYB4= 12 | github.com/pierrec/lz4 v1.0.1/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= 13 | github.com/pierrec/lz4 v2.0.5+incompatible h1:2xWsjqPFWcplujydGg4WmhC/6fZqK42wMM8aXeqhl0I= 14 | github.com/pierrec/lz4 v2.0.5+incompatible/go.mod h1:pdkljMzZIN41W+lC3N2tnIh5sFi+IEE17M5jbnwPHcY= 15 | github.com/pierrec/xxHash v0.1.1 h1:KP4NrV9023xp3M4FkTYfcXqWigsOCImL1ANJ7sh5vg4= 16 | github.com/pierrec/xxHash v0.1.1/go.mod h1:w2waW5Zoa/Wc4Yqe0wgrIYAGKqRMf7czn2HNKXmuL+I= 17 | github.com/pkg/errors v0.8.0 h1:WdK/asTD0HN+q6hsWO3/vpuAkAr+tw6aNJNDFFf0+qw= 18 | github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 19 | github.com/pkg/profile v1.2.1/go.mod h1:hJw3o1OdXxsrSjjVksARp5W95eeEaEfptyVZyv6JUPA= 20 | -------------------------------------------------------------------------------- /lmdb.go: -------------------------------------------------------------------------------- 1 | package cellar 2 | 3 | import ( 4 | "bytes" 5 | "encoding/binary" 6 | "log" 7 | 8 | "github.com/abdullin/lex-go/tuple" 9 | "github.com/abdullin/mdb" 10 | "github.com/bmatsuo/lmdb-go/lmdb" 11 | "github.com/bmatsuo/lmdb-go/lmdbscan" 12 | proto "github.com/golang/protobuf/proto" 13 | "github.com/pkg/errors" 14 | ) 15 | 16 | const ( 17 | ChunkTable byte = 1 18 | MetaTable byte = 2 19 | BufferTable byte = 3 20 | CellarTable byte = 4 21 | UserIndexTable byte = 5 22 | UserCheckpointTable byte = 6 23 | ) 24 | 25 | func lmdbPutUserCheckpoint(tx *mdb.Tx, name string, pos int64) error { 26 | key := mdb.CreateKey(UserCheckpointTable, name) 27 | 28 | value, err := tx.PutReserve(key, 8) 29 | if err != nil { 30 | return errors.Wrap(err, "PutReserve") 31 | } 32 | binary.LittleEndian.PutUint64(value, uint64(pos)) 33 | return nil 34 | } 35 | 36 | func lmdbGetUserCheckpoint(tx *mdb.Tx, name string) (int64, error) { 37 | 38 | key := mdb.CreateKey(UserCheckpointTable, name) 39 | value, err := tx.Get(key) 40 | if err != nil { 41 | return 0, errors.Wrap(err, "Get") 42 | } 43 | if len(value) == 0 { 44 | return 0, nil 45 | } 46 | return int64(binary.LittleEndian.Uint64(value)), nil 47 | } 48 | 49 | func lmdbAddChunk(tx *mdb.Tx, chunkStartPos int64, dto *ChunkDto) error { 50 | key := mdb.CreateKey(ChunkTable, chunkStartPos) 51 | 52 | if err := tx.PutProto(key, dto); err != nil { 53 | return errors.Wrap(err, "PutProto") 54 | } 55 | 56 | log.Printf("Added chunk %s with %d records and %d bytes (%d compressed)", dto.FileName, dto.Records, dto.UncompressedByteSize, dto.CompressedDiskSize) 57 | return nil 58 | } 59 | 60 | func lmdbListChunks(tx *mdb.Tx) ([]*ChunkDto, error) { 61 | 62 | tpl := mdb.CreateKey(ChunkTable) 63 | 64 | scanner := lmdbscan.New(tx.Tx, tx.DB) 65 | 66 | defer scanner.Close() 67 | scanner.Set(tpl, nil, lmdb.SetRange) 68 | 69 | var chunks []*ChunkDto 70 | 71 | for scanner.Scan() { 72 | key := scanner.Key() 73 | 74 | if !bytes.HasPrefix(key, tpl) { 75 | break 76 | } 77 | 78 | var chunk = &ChunkDto{} 79 | val := scanner.Val() 80 | if err := proto.Unmarshal(val, chunk); err != nil { 81 | return nil, errors.Wrapf(err, "Unmarshal %x at %x", val, key) 82 | } 83 | 84 | chunks = append(chunks, chunk) 85 | } 86 | 87 | if err := scanner.Err(); err != nil { 88 | return nil, errors.Wrap(err, "Scanner.Scan") 89 | } 90 | return chunks, nil 91 | } 92 | 93 | func lmdbPutBuffer(tx *mdb.Tx, dto *BufferDto) error { 94 | tpl := tuple.Tuple([]tuple.Element{BufferTable}) 95 | 96 | key := tpl.Pack() 97 | var val []byte 98 | var err error 99 | 100 | if val, err = proto.Marshal(dto); err != nil { 101 | return errors.Wrap(err, "Marshal") 102 | } 103 | if err = tx.Put(key, val); err != nil { 104 | return errors.Wrap(err, "tx.Put") 105 | } 106 | return nil 107 | } 108 | 109 | func lmdbGetBuffer(tx *mdb.Tx) (*BufferDto, error) { 110 | 111 | tpl := tuple.Tuple([]tuple.Element{BufferTable}) 112 | key := tpl.Pack() 113 | var data []byte 114 | var err error 115 | 116 | if data, err = tx.Get(key); err != nil { 117 | return nil, errors.Wrap(err, "tx.Get") 118 | } 119 | if data == nil { 120 | return nil, nil 121 | } 122 | dto := &BufferDto{} 123 | if err = proto.Unmarshal(data, dto); err != nil { 124 | return nil, errors.Wrap(err, "Unmarshal") 125 | } 126 | return dto, nil 127 | } 128 | 129 | func lmdbIndexPosition(tx *mdb.Tx, stream string, k uint64, pos int64) error { 130 | tpl := tuple.Tuple([]tuple.Element{MetaTable, stream, k}) 131 | key := tpl.Pack() 132 | var err error 133 | 134 | buf := make([]byte, binary.MaxVarintLen64) 135 | 136 | n := binary.PutVarint(buf, pos) 137 | if err = tx.Put(key, buf[0:n]); err != nil { 138 | return errors.Wrap(err, "tx.Put") 139 | } 140 | return nil 141 | } 142 | 143 | func lmdbLookupPosition(tx *mdb.Tx, stream string, k uint64) (int64, error) { 144 | 145 | tpl := tuple.Tuple([]tuple.Element{MetaTable, stream, k}) 146 | key := tpl.Pack() 147 | var err error 148 | 149 | var val []byte 150 | if val, err = tx.Get(key); err != nil { 151 | return 0, errors.Wrap(err, "tx.Get") 152 | } 153 | var pos int64 154 | 155 | pos, _ = binary.Varint(val) 156 | return pos, nil 157 | } 158 | 159 | func lmdbSetCellarMeta(tx *mdb.Tx, m *MetaDto) error { 160 | key := mdb.CreateKey(CellarTable) 161 | return tx.PutProto(key, m) 162 | } 163 | 164 | func lmdbGetCellarMeta(tx *mdb.Tx) (*MetaDto, error) { 165 | 166 | key := mdb.CreateKey(CellarTable) 167 | dto := &MetaDto{} 168 | var err error 169 | 170 | if err = tx.ReadProto(key, dto); err != nil { 171 | return nil, errors.Wrap(err, "ReadProto") 172 | } 173 | return dto, nil 174 | 175 | } 176 | -------------------------------------------------------------------------------- /main_test.go: -------------------------------------------------------------------------------- 1 | package cellar 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | ) 7 | 8 | func getFolder() string { 9 | return NewTempFolder("cellar") 10 | } 11 | 12 | func TestMain(m *testing.M) { 13 | // setup 14 | retCode := m.Run() 15 | RemoveTempFolders() 16 | os.Exit(retCode) 17 | } 18 | 19 | func makeSlice(l int) []byte { 20 | return make([]byte, l) 21 | } 22 | -------------------------------------------------------------------------------- /reader.go: -------------------------------------------------------------------------------- 1 | package cellar 2 | 3 | import ( 4 | "encoding/binary" 5 | "io" 6 | "log" 7 | "os" 8 | "path" 9 | 10 | "github.com/abdullin/mdb" 11 | "github.com/pkg/errors" 12 | ) 13 | 14 | type ReadFlag int 15 | 16 | const ( 17 | RF_None ReadFlag = 0 18 | RF_LoadBuffer ReadFlag = 1 << 1 19 | RF_PrintChunks ReadFlag = 1 << 2 20 | ) 21 | 22 | type Reader struct { 23 | Folder string 24 | Key []byte 25 | Flags ReadFlag 26 | StartPos int64 27 | EndPos int64 28 | LimitChunks int 29 | } 30 | 31 | func NewReader(folder string, key []byte) *Reader { 32 | return &Reader{folder, key, RF_LoadBuffer, 0, 0, 0} 33 | } 34 | 35 | type ReaderInfo struct { 36 | // can be used to convert to file name 37 | ChunkPos int64 38 | // global start pos 39 | StartPos int64 40 | // global read pos 41 | NextPos int64 42 | } 43 | 44 | type ReadOp func(pos *ReaderInfo, data []byte) error 45 | 46 | func (r *Reader) ReadDB(op mdb.TxOp) error { 47 | var db *mdb.DB 48 | var err error 49 | 50 | cfg := mdb.NewConfig() 51 | if db, err = mdb.New(r.Folder, cfg); err != nil { 52 | return errors.Wrap(err, "mdb.New") 53 | } 54 | 55 | defer db.Close() 56 | 57 | return db.Read(op) 58 | } 59 | 60 | func (r *Reader) Scan(op ReadOp) error { 61 | 62 | var db *mdb.DB 63 | var err error 64 | 65 | cfg := mdb.NewConfig() 66 | if db, err = mdb.New(r.Folder, cfg); err != nil { 67 | return errors.Wrap(err, "mdb.New") 68 | } 69 | 70 | defer db.Close() 71 | 72 | var b *BufferDto 73 | // var meta *MetaDto 74 | var chunks []*ChunkDto 75 | 76 | loadBuffer := (r.Flags & RF_LoadBuffer) == RF_LoadBuffer 77 | printChunks := (r.Flags & RF_PrintChunks) == RF_PrintChunks 78 | 79 | err = db.Read(func(tx *mdb.Tx) error { 80 | var err error 81 | if b, err = lmdbGetBuffer(tx); err != nil { 82 | return errors.Wrap(err, "lmdbGetBuffer") 83 | } 84 | if _, err = lmdbGetCellarMeta(tx); err != nil { 85 | return errors.Wrap(err, "lmdbGetCellarMeta") 86 | } 87 | if chunks, err = lmdbListChunks(tx); err != nil { 88 | return errors.Wrap(err, "lmdbListChunks") 89 | } 90 | return nil 91 | 92 | }) 93 | 94 | if err != nil { 95 | return errors.Wrap(err, "db.Read") 96 | } 97 | 98 | if b == nil && len(chunks) == 0 { 99 | return nil 100 | } 101 | 102 | info := &ReaderInfo{} 103 | 104 | log.Printf("Found %d chunks and limit is %d", len(chunks), r.LimitChunks) 105 | 106 | if len(chunks) > 0 { 107 | 108 | if r.LimitChunks > 0 && len(chunks) > r.LimitChunks { 109 | log.Printf("Truncating input from %d to %d chunks", len(chunks), r.LimitChunks) 110 | chunks = chunks[:r.LimitChunks] 111 | } 112 | 113 | for i, c := range chunks { 114 | 115 | endPos := c.StartPos + c.UncompressedByteSize 116 | 117 | if r.StartPos != 0 && endPos < r.StartPos { 118 | // skip chunk if it ends before range we are interested in 119 | continue 120 | } 121 | 122 | if r.EndPos != 0 && c.StartPos > r.EndPos { 123 | // skip the chunk if it starts after the range we are interested in 124 | continue 125 | } 126 | 127 | chunk := make([]byte, c.UncompressedByteSize) 128 | var file = path.Join(r.Folder, c.FileName) 129 | 130 | if printChunks { 131 | log.Printf("Loading chunk %d %s with size %d", i, c.FileName, c.UncompressedByteSize) 132 | } 133 | 134 | if chunk, err = loadChunkIntoBuffer(file, r.Key, c.UncompressedByteSize, chunk); err != nil { 135 | log.Panicf("Failed to load chunk %s", c.FileName) 136 | } 137 | 138 | info.ChunkPos = c.StartPos 139 | 140 | chunkPos := 0 141 | if r.StartPos != 0 && r.StartPos > c.StartPos { 142 | // reader starts in the middle 143 | chunkPos = int(r.StartPos - c.StartPos) 144 | } 145 | 146 | if err = replayChunk(info, chunk, op, chunkPos); err != nil { 147 | return errors.Wrap(err, "Failed to read chunk") 148 | } 149 | } 150 | } 151 | 152 | if loadBuffer && b != nil && b.Pos > 0 { 153 | 154 | if r.EndPos != 0 && b.StartPos > r.EndPos { 155 | // if buffer starts after the end of our search interval - skip it 156 | return nil 157 | } 158 | 159 | loc := path.Join(r.Folder, b.FileName) 160 | 161 | var f *os.File 162 | 163 | if f, err = os.Open(loc); err != nil { 164 | log.Panicf("Failed to open buffer file %s", loc) 165 | } 166 | 167 | curChunk := make([]byte, b.Pos) 168 | 169 | var n int 170 | if n, err = f.Read(curChunk); err != nil { 171 | log.Panicf("Failed to read %d bytes from buffer %s", b.Pos, loc) 172 | } 173 | if n != int(b.Pos) { 174 | log.Panic("Failed to read bytes") 175 | } 176 | 177 | info.ChunkPos = b.StartPos 178 | 179 | chunkPos := 0 180 | 181 | if r.StartPos > b.StartPos { 182 | chunkPos = int(r.StartPos - b.StartPos) 183 | } 184 | 185 | if err = replayChunk(info, curChunk, op, chunkPos); err != nil { 186 | return errors.Wrap(err, "Failed to read chunk") 187 | } 188 | 189 | } 190 | 191 | return nil 192 | 193 | } 194 | 195 | func readVarint(b []byte) (val int64, n int) { 196 | 197 | val, n = binary.Varint(b) 198 | if n <= 0 { 199 | log.Panicf("Failed to read varint %d", n) 200 | } 201 | 202 | return 203 | 204 | } 205 | 206 | func replayChunk(info *ReaderInfo, chunk []byte, op ReadOp, pos int) error { 207 | 208 | max := len(chunk) 209 | 210 | var err error 211 | 212 | // while we are not at the end, 213 | // read first len 214 | // then pass the bytes to the op 215 | for pos < max { 216 | 217 | info.StartPos = int64(pos) + info.ChunkPos 218 | 219 | recordSize, shift := readVarint(chunk[pos:]) 220 | 221 | // move position by the header size 222 | pos += shift 223 | 224 | // get chunk 225 | record := chunk[pos : pos+int(recordSize)] 226 | // apply chunk 227 | 228 | pos += int(recordSize) 229 | 230 | info.NextPos = int64(pos) + info.ChunkPos 231 | 232 | if err = op(info, record); err != nil { 233 | return errors.Wrap(err, "Failed to execute op") 234 | } 235 | // shift pos 236 | 237 | } 238 | return nil 239 | 240 | } 241 | 242 | func getMaxByteSize(cs []*ChunkDto, b *BufferDto) int64 { 243 | 244 | var bufferSize int64 245 | 246 | for _, c := range cs { 247 | if c.UncompressedByteSize > bufferSize { 248 | bufferSize = c.UncompressedByteSize 249 | } 250 | } 251 | 252 | if b != nil && b.MaxBytes > bufferSize { 253 | bufferSize = b.MaxBytes 254 | } 255 | return bufferSize 256 | } 257 | 258 | func loadChunkIntoBuffer(loc string, key []byte, size int64, b []byte) ([]byte, error) { 259 | 260 | var decryptor, zr io.Reader 261 | var err error 262 | 263 | var chunkFile *os.File 264 | if chunkFile, err = os.Open(loc); err != nil { 265 | log.Panicf("Failed to open chunk %s", loc) 266 | } 267 | 268 | defer chunkFile.Close() 269 | 270 | if decryptor, err = chainDecryptor(key, chunkFile); err != nil { 271 | log.Panicf("Failed to chain decryptor for %s: %s", loc, err) 272 | } 273 | 274 | if zr, err = chainDecompressor(decryptor); err != nil { 275 | log.Panicf("Failed to chain decompressor for %s: %s", loc, err) 276 | } 277 | //zr.Header.CompressionLevel = 4 278 | var readBytes int 279 | if readBytes, err = zr.Read(b); err != nil { 280 | log.Panicf("Failed to read from chunk %s (%d): %s", loc, size, err) 281 | } 282 | 283 | if int64(readBytes) != size { 284 | log.Panicf("Read %d bytes but expected %d", readBytes, size) 285 | } 286 | return b[0:readBytes], nil 287 | 288 | } 289 | -------------------------------------------------------------------------------- /reader_async.go: -------------------------------------------------------------------------------- 1 | package cellar 2 | 3 | import ( 4 | "log" 5 | ) 6 | 7 | type Rec struct { 8 | Data []byte 9 | ChunkPos int64 10 | StartPos int64 11 | NextPos int64 12 | } 13 | 14 | func (reader *Reader) ScanAsync(buffer int) chan *Rec { 15 | 16 | vals := make(chan *Rec, buffer) 17 | 18 | go func() { 19 | // make sure we terminate the channel on scan read 20 | defer close(vals) 21 | 22 | err := reader.Scan(func(ri *ReaderInfo, data []byte) error { 23 | vals <- &Rec{data, ri.ChunkPos, ri.StartPos, ri.NextPos} 24 | return nil 25 | }) 26 | 27 | if err != nil { 28 | log.Panic(err) 29 | } 30 | }() 31 | 32 | return vals 33 | } 34 | -------------------------------------------------------------------------------- /util.go: -------------------------------------------------------------------------------- 1 | package cellar 2 | 3 | import ( 4 | "os" 5 | 6 | "github.com/pkg/errors" 7 | ) 8 | 9 | func ensureFolder(folder string) (err error) { 10 | 11 | var stat os.FileInfo 12 | if stat, err = os.Stat(folder); err == nil { 13 | if stat.IsDir() { 14 | return nil 15 | } 16 | return errors.Errorf("Path is a file: %s", folder) 17 | } 18 | 19 | if os.IsNotExist(err) { 20 | // file does not exist - create 21 | if err = os.MkdirAll(folder, 0644); err != nil { 22 | return errors.Wrap(err, "MkdirAll") 23 | } 24 | return nil 25 | 26 | } 27 | return errors.Wrap(err, "os.Stat") 28 | 29 | } 30 | -------------------------------------------------------------------------------- /writer.go: -------------------------------------------------------------------------------- 1 | package cellar 2 | 3 | import ( 4 | "encoding/binary" 5 | fmt "fmt" 6 | "log" 7 | "os" 8 | "path" 9 | 10 | "github.com/abdullin/mdb" 11 | "github.com/pkg/errors" 12 | ) 13 | 14 | type Writer struct { 15 | db *mdb.DB 16 | b *Buffer 17 | maxKeySize int64 18 | maxValSize int64 19 | folder string 20 | maxBufferSize int64 21 | key []byte 22 | encodingBuf []byte 23 | } 24 | 25 | func NewWriter(folder string, maxBufferSize int64, key []byte) (*Writer, error) { 26 | ensureFolder(folder) 27 | 28 | var db *mdb.DB 29 | var err error 30 | 31 | cfg := mdb.NewConfig() 32 | // make sure we are writing sync 33 | cfg.EnvFlags = 0 34 | 35 | if db, err = mdb.New(folder, cfg); err != nil { 36 | return nil, errors.Wrap(err, "mdb.New") 37 | } 38 | 39 | var meta *MetaDto 40 | var b *Buffer 41 | 42 | err = db.Update(func(tx *mdb.Tx) error { 43 | var err error 44 | 45 | var dto *BufferDto 46 | if dto, err = lmdbGetBuffer(tx); err != nil { 47 | return errors.Wrap(err, "lmdbGetBuffer") 48 | } 49 | 50 | if dto == nil { 51 | if b, err = createBuffer(tx, 0, maxBufferSize, folder); err != nil { 52 | return errors.Wrap(err, "SetNewBuffer") 53 | } 54 | return nil 55 | 56 | } else if b, err = openBuffer(dto, folder); err != nil { 57 | return errors.Wrap(err, "openBuffer") 58 | } 59 | 60 | if meta, err = lmdbGetCellarMeta(tx); err != nil { 61 | return errors.Wrap(err, "lmdbGetCellarMeta") 62 | } 63 | return nil 64 | }) 65 | 66 | if err != nil { 67 | return nil, errors.Wrap(err, "Update") 68 | } 69 | 70 | wr := &Writer{ 71 | folder: folder, 72 | maxBufferSize: maxBufferSize, 73 | key: key, 74 | encodingBuf: make([]byte, binary.MaxVarintLen64), 75 | db: db, 76 | b: b, 77 | } 78 | 79 | if meta != nil { 80 | wr.maxKeySize = meta.MaxKeySize 81 | wr.maxValSize = meta.MaxValSize 82 | } 83 | 84 | return wr, nil 85 | 86 | } 87 | 88 | func (w *Writer) VolatilePos() int64 { 89 | if w.b != nil { 90 | return w.b.startPos + w.b.pos 91 | } 92 | return 0 93 | } 94 | 95 | func (w *Writer) Append(data []byte) (pos int64, err error) { 96 | 97 | dataLen := int64(len(data)) 98 | n := binary.PutVarint(w.encodingBuf, dataLen) 99 | 100 | totalSize := n + len(data) 101 | 102 | if !w.b.fits(int64(totalSize)) { 103 | if err = w.SealTheBuffer(); err != nil { 104 | return 0, errors.Wrap(err, "SealTheBuffer") 105 | } 106 | } 107 | 108 | if err = w.b.writeBytes(w.encodingBuf[0:n]); err != nil { 109 | return 0, errors.Wrap(err, "write len prefix") 110 | } 111 | if err = w.b.writeBytes(data); err != nil { 112 | return 0, errors.Wrap(err, "write body") 113 | } 114 | 115 | w.b.endRecord() 116 | 117 | // update statistics 118 | if dataLen > w.maxValSize { 119 | w.maxValSize = dataLen 120 | } 121 | 122 | pos = w.b.startPos + w.b.pos 123 | 124 | return pos, nil 125 | } 126 | 127 | func createBuffer(tx *mdb.Tx, startPos int64, maxSize int64, folder string) (*Buffer, error) { 128 | name := fmt.Sprintf("%012d", startPos) 129 | dto := &BufferDto{ 130 | Pos: 0, 131 | StartPos: startPos, 132 | MaxBytes: maxSize, 133 | Records: 0, 134 | FileName: name, 135 | } 136 | var err error 137 | var buf *Buffer 138 | 139 | if buf, err = openBuffer(dto, folder); err != nil { 140 | return nil, errors.Wrapf(err, "openBuffer %s", folder) 141 | } 142 | 143 | if err = lmdbPutBuffer(tx, dto); err != nil { 144 | return nil, errors.Wrap(err, "lmdbPutBuffer") 145 | } 146 | return buf, nil 147 | 148 | } 149 | 150 | func (w *Writer) SealTheBuffer() error { 151 | 152 | var err error 153 | 154 | oldBuffer := w.b 155 | var newBuffer *Buffer 156 | 157 | if err = oldBuffer.flush(); err != nil { 158 | return errors.Wrap(err, "buffer.Flush") 159 | } 160 | 161 | var dto *ChunkDto 162 | 163 | if dto, err = oldBuffer.compress(w.key); err != nil { 164 | return errors.Wrap(err, "compress") 165 | } 166 | 167 | newStartPos := dto.StartPos + dto.UncompressedByteSize 168 | 169 | err = w.db.Update(func(tx *mdb.Tx) error { 170 | 171 | if err = lmdbAddChunk(tx, dto.StartPos, dto); err != nil { 172 | return errors.Wrap(err, "lmdbAddChunk") 173 | } 174 | 175 | if newBuffer, err = createBuffer(tx, newStartPos, w.maxBufferSize, w.folder); err != nil { 176 | return errors.Wrap(err, "createBuffer") 177 | } 178 | return nil 179 | 180 | }) 181 | 182 | if err != nil { 183 | return errors.Wrap(err, "w.db.Update") 184 | } 185 | 186 | w.b = newBuffer 187 | 188 | oldBufferPath := path.Join(w.folder, oldBuffer.fileName) 189 | 190 | if err = os.Remove(oldBufferPath); err != nil { 191 | log.Printf("Can't remove old buffer %s: %s", oldBufferPath, err) 192 | } 193 | return nil 194 | 195 | } 196 | 197 | // Close disposes all resources 198 | func (w *Writer) Close() error { 199 | 200 | // TODO: flush, checkpoint and close current buffer 201 | return w.db.Close() 202 | } 203 | 204 | // ReadDB allows to execute read transaction against 205 | // the meta database 206 | func (w *Writer) ReadDB(op mdb.TxOp) error { 207 | return w.db.Read(op) 208 | } 209 | 210 | // Write DB allows to execute write transaction against 211 | // the meta database 212 | func (w *Writer) UpdateDB(op mdb.TxOp) error { 213 | return w.db.Update(op) 214 | } 215 | 216 | func (w *Writer) PutUserCheckpoint(name string, pos int64) error { 217 | return w.db.Update(func(tx *mdb.Tx) error { 218 | return lmdbPutUserCheckpoint(tx, name, pos) 219 | }) 220 | } 221 | 222 | func (w *Writer) GetUserCheckpoint(name string) (int64, error) { 223 | 224 | var pos int64 225 | err := w.db.Read(func(tx *mdb.Tx) error { 226 | p, e := lmdbGetUserCheckpoint(tx, name) 227 | if e != nil { 228 | return e 229 | } 230 | pos = p 231 | return nil 232 | }) 233 | if err != nil { 234 | return 0, err 235 | } 236 | return pos, nil 237 | } 238 | 239 | func (w *Writer) Checkpoint() (int64, error) { 240 | w.b.flush() 241 | 242 | var err error 243 | 244 | dto := w.b.getState() 245 | 246 | current := dto.StartPos + dto.Pos 247 | 248 | err = w.db.Update(func(tx *mdb.Tx) error { 249 | var err error 250 | 251 | if err = lmdbPutBuffer(tx, dto); err != nil { 252 | return errors.Wrap(err, "lmdbPutBuffer") 253 | } 254 | 255 | meta := &MetaDto{ 256 | MaxKeySize: w.maxKeySize, 257 | MaxValSize: w.maxValSize, 258 | } 259 | 260 | if err = lmdbSetCellarMeta(tx, meta); err != nil { 261 | return errors.Wrap(err, "lmdbSetCellarMeta") 262 | } 263 | return nil 264 | 265 | }) 266 | 267 | if err != nil { 268 | return 0, errors.Wrap(err, "txn.Update") 269 | } 270 | 271 | return current, nil 272 | 273 | } 274 | -------------------------------------------------------------------------------- /writer_test.go: -------------------------------------------------------------------------------- 1 | package cellar 2 | 3 | import ( 4 | "crypto/rand" 5 | fmt "fmt" 6 | "io" 7 | "log" 8 | rnd "math/rand" 9 | "testing" 10 | "time" 11 | ) 12 | 13 | func genRandBytes(size int) []byte { 14 | 15 | key := make([]byte, size) 16 | var err error 17 | if _, err = io.ReadFull(rand.Reader, key); err != nil { 18 | panic(err) 19 | } 20 | return key 21 | } 22 | 23 | func genSeedBytes(size int, seed int) []byte { 24 | buf := make([]byte, size) 25 | for i := 0; i < size; i++ { 26 | buf[i] = byte((i + seed) % 256) 27 | } 28 | return buf 29 | } 30 | func checkSeedBytes(data []byte, seed int) error { 31 | for i := 0; i < len(data); i++ { 32 | expect := byte((i + seed) % 256) 33 | if data[i] != expect { 34 | return fmt.Errorf("Given seed %d expected %d at position %d but got %d", seed, expect, i, data[i]) 35 | } 36 | } 37 | return nil 38 | } 39 | 40 | func TestWithClosing(t *testing.T) { 41 | 42 | var w *Writer 43 | var err error 44 | 45 | folder := getFolder() 46 | key := genRandBytes(16) 47 | w, err = NewWriter(folder, 1000, key) 48 | 49 | defer closeWriter(t, w) 50 | 51 | assert(t, err, "NewWriter") 52 | 53 | var valuesWritten int 54 | 55 | var k int 56 | 57 | for j := 0; j < 5; j++ { 58 | for i := 0; i < 30; i++ { 59 | valuesWritten += 64 60 | 61 | if _, err = w.Append(genSeedBytes(64, k)); err != nil { 62 | t.Fatalf("Append failed: %s", err) 63 | } 64 | k++ 65 | 66 | if k%17 == 0 { 67 | assertCheckpoint(t, w) 68 | } 69 | } 70 | 71 | assertCheckpoint(t, w) 72 | w.Checkpoint() 73 | err = w.Close() 74 | 75 | assert(t, err, "Closing") 76 | 77 | w, err = NewWriter(folder, 1000, key) 78 | assert(t, err, "Opening writer") 79 | 80 | } 81 | 82 | reader := NewReader(folder, key) 83 | 84 | var valuesRead int 85 | var n int 86 | 87 | err = reader.Scan(func(pos *ReaderInfo, s []byte) error { 88 | 89 | if err := checkSeedBytes(s, n); err != nil { 90 | t.Fatalf("Failed seed check: %s", err) 91 | } 92 | 93 | valuesRead += len(s) 94 | n++ 95 | 96 | return nil 97 | }) 98 | 99 | assert(t, err, "ReadAll") 100 | 101 | if valuesRead != valuesWritten { 102 | t.Fatalf("Expected to read %d bytes but read %d", valuesWritten, valuesRead) 103 | } 104 | } 105 | 106 | func closeWriter(t *testing.T, w *Writer) { 107 | err := w.Close() 108 | if err != nil { 109 | t.Fatalf("Failed to close the writer %s", err) 110 | } 111 | } 112 | 113 | func assertCheckpoint(t *testing.T, w *Writer) { 114 | _, err := w.Checkpoint() 115 | if err != nil { 116 | t.Fatalf("Failed to checkpoint %s", err) 117 | } 118 | } 119 | 120 | func TestUserCheckpoints(t *testing.T) { 121 | 122 | var ( 123 | w *Writer 124 | err error 125 | pos int64 126 | ) 127 | 128 | folder := getFolder() 129 | key := genRandBytes(16) 130 | w, err = NewWriter(folder, 1000, key) 131 | 132 | defer closeWriter(t, w) 133 | 134 | assert(t, err, "NewWriter") 135 | 136 | pos, err = w.GetUserCheckpoint("custom") 137 | assert(t, err, "GetCheckpoint") 138 | if pos != 0 { 139 | t.Fatal("Checkpoint should be 0") 140 | } 141 | 142 | assert(t, w.PutUserCheckpoint("custom", 42), "PutCheckpoint") 143 | 144 | pos, err = w.GetUserCheckpoint("custom") 145 | assert(t, err, "GetCheckpoint") 146 | if pos != 42 { 147 | t.Fatal("Checkpoint should be 42") 148 | } 149 | 150 | } 151 | 152 | func TestSingleChunkDB(t *testing.T) { 153 | 154 | log.Print("Starting single chunk") 155 | defer log.Print("Single chunk over") 156 | 157 | var w *Writer 158 | var err error 159 | 160 | folder := getFolder() 161 | key := genRandBytes(16) 162 | w, err = NewWriter(folder, 1000, key) 163 | 164 | defer closeWriter(t, w) 165 | 166 | assert(t, err, "NewWriter") 167 | 168 | var valuesWritten int 169 | for i := 0; i < 2; i++ { 170 | valuesWritten += 64 171 | 172 | if _, err = w.Append(genSeedBytes(64, i)); err != nil { 173 | t.Fatalf("Append failed: %s", err) 174 | } 175 | } 176 | assertCheckpoint(t, w) 177 | w.Close() 178 | 179 | var valuesRead int 180 | var n int 181 | 182 | reader := NewReader(folder, key) 183 | 184 | err = reader.Scan(func(pos *ReaderInfo, s []byte) error { 185 | 186 | if err := checkSeedBytes(s, n); err != nil { 187 | t.Fatalf("Failed seed check: %s", err) 188 | } 189 | 190 | valuesRead += len(s) 191 | n++ 192 | 193 | return nil 194 | }) 195 | 196 | assert(t, err, "ReadAll") 197 | 198 | if valuesRead != valuesWritten { 199 | t.Fatalf("Expected to read %d bytes but read %d", valuesWritten, valuesRead) 200 | } 201 | 202 | } 203 | 204 | func TestReadingWithOffset(t *testing.T) { 205 | } 206 | 207 | func TestSimpleKey(t *testing.T) { 208 | 209 | var w *Writer 210 | var err error 211 | 212 | folder := getFolder() 213 | key := genRandBytes(16) 214 | w, err = NewWriter(folder, 1000, key) 215 | 216 | defer closeWriter(t, w) 217 | 218 | assert(t, err, "NewWriter") 219 | 220 | var valuesWritten int 221 | for i := 0; i < 30; i++ { 222 | valuesWritten += 64 223 | 224 | if _, err = w.Append(genSeedBytes(64, i)); err != nil { 225 | t.Fatalf("Append failed: %s", err) 226 | } 227 | } 228 | assertCheckpoint(t, w) 229 | 230 | reader := NewReader(folder, key) 231 | var valuesRead int 232 | var n int 233 | 234 | err = reader.Scan(func(pos *ReaderInfo, s []byte) error { 235 | 236 | if err := checkSeedBytes(s, n); err != nil { 237 | t.Fatalf("Failed seed check: %s", err) 238 | } 239 | 240 | valuesRead += len(s) 241 | n++ 242 | 243 | return nil 244 | }) 245 | 246 | assert(t, err, "ReadAll") 247 | 248 | if valuesRead != valuesWritten { 249 | t.Fatalf("Expected to read %d bytes but read %d", valuesWritten, valuesRead) 250 | } 251 | } 252 | 253 | type rec struct { 254 | pos int64 255 | seed int 256 | size int 257 | } 258 | 259 | func TestFuzz(t *testing.T) { 260 | 261 | seed := time.Now().UnixNano() 262 | r := rnd.New(rnd.NewSource(seed)) 263 | 264 | folder := getFolder() 265 | maxIterations := 1000 266 | maxValueLength := r.Intn(1024*128) + 10 267 | maxBufferSize := r.Intn(maxValueLength*maxIterations/2) + 1 268 | key := genRandBytes(16) 269 | 270 | t.Logf("maxVal %d; maxBuffer %d; seed %d", maxValueLength, maxBufferSize, seed) 271 | 272 | var writer *Writer 273 | var err error 274 | 275 | var recs []rec 276 | 277 | for i := 0; i <= maxIterations; i++ { 278 | if r.Intn(17) == 13 || i == maxIterations { 279 | if writer != nil { 280 | assertCheckpoint(t, writer) 281 | writer.Checkpoint() 282 | err = writer.Close() 283 | assert(t, err, "Closing writer") 284 | 285 | writer = nil 286 | } 287 | 288 | recordsSaved := len(recs) 289 | 290 | reader := NewReader(folder, key) 291 | recordPos := 0 292 | if r.Intn(5) > 2 && recordsSaved > 0 { 293 | // pick a random pos to scan from 294 | recordPos = r.Intn(recordsSaved) 295 | } 296 | 297 | r := recs[recordPos] 298 | 299 | reader.StartPos = r.pos 300 | scanSeed := r.seed 301 | 302 | var bytesRead int 303 | var recordsRead int 304 | var bytesWritten int 305 | var recordsWritten int 306 | for i := recordPos; i < recordsSaved; i++ { 307 | bytesWritten += recs[i].size 308 | recordsWritten++ 309 | } 310 | 311 | reader.Scan(func(p *ReaderInfo, b []byte) error { 312 | bytesRead += len(b) 313 | recordsRead++ 314 | if err := checkSeedBytes(b, scanSeed); err != nil { 315 | t.Fatalf("Failed to verify data: %s", err) 316 | } 317 | scanSeed++ 318 | return nil 319 | }) 320 | if bytesWritten != bytesRead { 321 | t.Fatalf("Written %d bytes but read %d bytes from %d (%d). Records: %d, %d", bytesWritten, bytesRead, reader.StartPos, bytesRead+int(reader.StartPos), recordsWritten, recordsRead) 322 | } 323 | } 324 | 325 | if writer == nil { 326 | writer, err = NewWriter(folder, int64(maxBufferSize), key) 327 | assert(t, err, "new writer") 328 | } 329 | 330 | valSize := r.Intn(maxValueLength) 331 | 332 | val := genSeedBytes(valSize, i) 333 | pos := writer.VolatilePos() 334 | _, err = writer.Append(val) 335 | 336 | recs = append(recs, rec{ 337 | pos: pos, 338 | seed: i, 339 | size: valSize, 340 | }) 341 | if err != nil { 342 | assert(t, err, "append") 343 | } 344 | } 345 | } 346 | --------------------------------------------------------------------------------