├── History.md ├── Makefile ├── ci.yml ├── _examples ├── logging.go ├── simple.go └── stress.go ├── LICENSE ├── Readme.md ├── buffer_test.go └── buffer.go /History.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | test: 3 | @go test -cover 4 | 5 | bench: 6 | @go test -bench=. -cpu 1,2,4,8 -run Benchmark 7 | 8 | .PHONY: bench test -------------------------------------------------------------------------------- /ci.yml: -------------------------------------------------------------------------------- 1 | version: 0.2 2 | 3 | phases: 4 | install: 5 | commands: 6 | - go get -t ./... 7 | build: 8 | commands: 9 | - go test -cover -v ./... 10 | 11 | -------------------------------------------------------------------------------- /_examples/logging.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "log" 5 | "time" 6 | 7 | "github.com/tj/go-disk-buffer" 8 | ) 9 | 10 | func main() { 11 | b, err := buffer.New("/tmp/pets", &buffer.Config{ 12 | FlushWrites: 250, 13 | FlushBytes: 1 << 20, 14 | FlushInterval: 10 * time.Second, 15 | Verbosity: 2, 16 | }) 17 | 18 | if err != nil { 19 | log.Fatalf("error opening: %s", err) 20 | } 21 | 22 | go func() { 23 | for range b.Queue { 24 | // discard 25 | } 26 | }() 27 | 28 | for i := 0; i < 100000; i++ { 29 | _, err := b.Write([]byte("Tobi Ferret")) 30 | if err != nil { 31 | log.Fatalf("error writing: %s", err) 32 | } 33 | } 34 | 35 | err = b.Close() 36 | if err != nil { 37 | log.Fatalf("error closing: %s", err) 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /_examples/simple.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "io/ioutil" 5 | "log" 6 | "time" 7 | 8 | "github.com/tj/go-disk-buffer" 9 | ) 10 | 11 | func main() { 12 | b, err := buffer.New("/tmp/pets", &buffer.Config{ 13 | FlushWrites: 250, 14 | FlushBytes: 1 << 20, 15 | FlushInterval: 10 * time.Second, 16 | Verbosity: 0, 17 | }) 18 | 19 | if err != nil { 20 | log.Fatalf("error opening: %s", err) 21 | } 22 | 23 | go func() { 24 | for file := range b.Queue { 25 | log.Printf("flushed %s", file) 26 | 27 | b, err := ioutil.ReadFile(file.Path) 28 | if err != nil { 29 | log.Fatalf("error reading: %s", err) 30 | } 31 | 32 | log.Printf("%q is %d bytes", file.Path, len(b)) 33 | } 34 | }() 35 | 36 | for i := 0; i < 10000; i++ { 37 | _, err := b.Write([]byte("Tobi Ferret")) 38 | if err != nil { 39 | log.Fatalf("error writing: %s", err) 40 | } 41 | } 42 | 43 | err = b.Close() 44 | if err != nil { 45 | log.Fatalf("error closing: %s", err) 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /_examples/stress.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "log" 5 | "time" 6 | 7 | "github.com/rakyll/coop" 8 | "github.com/tj/go-disk-buffer" 9 | ) 10 | 11 | func main() { 12 | b, err := buffer.New("/tmp/pets", &buffer.Config{ 13 | FlushBytes: 20 << 20, 14 | FlushInterval: 15 * time.Second, 15 | BufferSize: 5 << 10, 16 | Verbosity: 0, 17 | }) 18 | 19 | if err != nil { 20 | log.Fatalf("error opening: %s", err) 21 | } 22 | 23 | go func() { 24 | for file := range b.Queue { 25 | log.Printf("flushed %+v", file) 26 | } 27 | }() 28 | 29 | ops := 10000000 30 | con := 80 31 | per := ops / con 32 | start := time.Now() 33 | 34 | <-coop.Replicate(con, func() { 35 | for i := 0; i < per; i++ { 36 | _, err := b.Write([]byte("Tobi Ferret")) 37 | if err != nil { 38 | log.Fatalf("error writing: %s", err) 39 | } 40 | } 41 | }) 42 | 43 | err = b.Close() 44 | if err != nil { 45 | log.Fatalf("error closing: %s", err) 46 | } 47 | 48 | log.Printf("ops: %d total, %d per, %d concurrent", ops, per, con) 49 | log.Printf("duration: %s", time.Since(start)) 50 | } 51 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | (The MIT License) 2 | 3 | Copyright (c) 2017 TJ Holowaychuk tj@tjholowaychuk.com 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | 'Software'), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # buffer 2 | 3 | Package go-disk-buffer provides an io.Writer as a 1:N on-disk buffer, publishing 4 | flushed files to a channel for processing. 5 | 6 | Files may be flushed via interval, write count, or byte size. 7 | 8 | All exported methods are thread-safe. 9 | 10 | ## Usage 11 | 12 | #### type Buffer 13 | 14 | ```go 15 | type Buffer struct { 16 | *Config 17 | 18 | sync.RWMutex 19 | } 20 | ``` 21 | 22 | Buffer represents a 1:N on-disk buffer. 23 | 24 | #### func New 25 | 26 | ```go 27 | func New(path string, config *Config) (*Buffer, error) 28 | ``` 29 | New buffer at `path`. The path given is used for the base of the filenames 30 | created, which append ".{pid}.{id}.{fid}". 31 | 32 | #### func (*Buffer) Bytes 33 | 34 | ```go 35 | func (b *Buffer) Bytes() int64 36 | ``` 37 | Bytes returns the number of bytes made to the current file. 38 | 39 | #### func (*Buffer) Close 40 | 41 | ```go 42 | func (b *Buffer) Close() error 43 | ``` 44 | Close the underlying file after flushing. 45 | 46 | #### func (*Buffer) Flush 47 | 48 | ```go 49 | func (b *Buffer) Flush() error 50 | ``` 51 | Flush forces a flush. 52 | 53 | #### func (*Buffer) Write 54 | 55 | ```go 56 | func (b *Buffer) Write(data []byte) (int, error) 57 | ``` 58 | Write implements io.Writer. 59 | 60 | #### func (*Buffer) Writes 61 | 62 | ```go 63 | func (b *Buffer) Writes() int64 64 | ``` 65 | Writes returns the number of writes made to the current file. 66 | 67 | #### type Config 68 | 69 | ```go 70 | type Config struct { 71 | FlushWrites int64 // Flush after N writes, zero to disable 72 | FlushBytes int64 // Flush after N bytes, zero to disable 73 | FlushInterval time.Duration // Flush after duration, zero to disable 74 | BufferSize int // Buffer size for writes 75 | Queue chan *Flush // Queue of flushed files 76 | Verbosity int // Verbosity level, 0-3 77 | Logger *log.Logger // Logger instance 78 | } 79 | ``` 80 | 81 | Config for disk buffer. 82 | 83 | #### func (*Config) Validate 84 | 85 | ```go 86 | func (c *Config) Validate() error 87 | ``` 88 | Validate the configuration. 89 | 90 | #### type Flush 91 | 92 | ```go 93 | type Flush struct { 94 | Reason Reason `json:"reason"` 95 | Path string `json:"path"` 96 | Writes int64 `json:"writes"` 97 | Bytes int64 `json:"bytes"` 98 | Opened time.Time `json:"opened"` 99 | Closed time.Time `json:"closed"` 100 | Age time.Duration `json:"age"` 101 | } 102 | ``` 103 | 104 | Flush represents a flushed file. 105 | 106 | #### type Reason 107 | 108 | ```go 109 | type Reason string 110 | ``` 111 | 112 | Reason for flush. 113 | 114 | ```go 115 | const ( 116 | Forced Reason = "forced" 117 | Writes Reason = "writes" 118 | Bytes Reason = "bytes" 119 | Interval Reason = "interval" 120 | ) 121 | ``` 122 | Flush reasons. 123 | -------------------------------------------------------------------------------- /buffer_test.go: -------------------------------------------------------------------------------- 1 | package buffer 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | 7 | "github.com/tj/assert" 8 | ) 9 | 10 | var config = &Config{ 11 | Queue: make(chan *Flush, 100), 12 | FlushWrites: 1000, 13 | FlushBytes: 1000, 14 | FlushInterval: time.Second, 15 | Verbosity: 0, 16 | } 17 | 18 | func discard(b *Buffer) { 19 | go func() { 20 | for range b.Queue { 21 | 22 | } 23 | }() 24 | } 25 | 26 | func write(buffer *Buffer, n int, b []byte) { 27 | go func() { 28 | for i := 0; i < n; i++ { 29 | _, err := buffer.Write(b) 30 | if err != nil { 31 | panic(err) 32 | } 33 | } 34 | }() 35 | } 36 | 37 | // Test immediate open / close. 38 | func TestBuffer_Open(t *testing.T) { 39 | b, err := New("/tmp/buffer", config) 40 | assert.Equal(t, nil, err) 41 | 42 | err = b.Close() 43 | assert.Equal(t, nil, err) 44 | } 45 | 46 | // Test buffer writes. 47 | func TestBuffer_Write(t *testing.T) { 48 | b, err := New("/tmp/buffer", config) 49 | assert.Equal(t, nil, err) 50 | 51 | n, err := b.Write([]byte("hello")) 52 | assert.Equal(t, nil, err) 53 | assert.Equal(t, 5, n) 54 | assert.Equal(t, int64(1), b.writes) 55 | assert.Equal(t, int64(5), b.bytes) 56 | 57 | n, err = b.Write([]byte("world")) 58 | assert.Equal(t, nil, err) 59 | assert.Equal(t, 5, n) 60 | assert.Equal(t, int64(2), b.writes) 61 | assert.Equal(t, int64(10), b.bytes) 62 | 63 | err = b.Close() 64 | assert.Equal(t, nil, err) 65 | } 66 | 67 | // Test flushing on write count. 68 | func TestBuffer_Write_FlushOnWrites(t *testing.T) { 69 | b, err := New("/tmp/buffer", &Config{ 70 | Queue: make(chan *Flush, 100), 71 | FlushWrites: 10, 72 | FlushBytes: 1024, 73 | FlushInterval: time.Second, 74 | Verbosity: 0, 75 | }) 76 | 77 | assert.Equal(t, nil, err) 78 | 79 | write(b, 25, []byte("hello")) 80 | 81 | flush := <-b.Queue 82 | assert.Equal(t, int64(10), flush.Writes) 83 | assert.Equal(t, int64(50), flush.Bytes) 84 | assert.Equal(t, Writes, flush.Reason) 85 | 86 | flush = <-b.Queue 87 | assert.Equal(t, int64(10), flush.Writes) 88 | assert.Equal(t, int64(50), flush.Bytes) 89 | assert.Equal(t, Writes, flush.Reason) 90 | 91 | err = b.Close() 92 | assert.Equal(t, nil, err) 93 | } 94 | 95 | // Test flushing on byte count. 96 | func TestBuffer_Write_FlushOnBytes(t *testing.T) { 97 | b, err := New("/tmp/buffer", &Config{ 98 | Queue: make(chan *Flush, 100), 99 | FlushWrites: 10000, 100 | FlushBytes: 1024, 101 | FlushInterval: time.Second, 102 | Verbosity: 0, 103 | }) 104 | 105 | assert.Equal(t, nil, err) 106 | 107 | write(b, 250, []byte("hello world")) 108 | flush := <-b.Queue 109 | assert.Equal(t, int64(94), flush.Writes) 110 | assert.Equal(t, int64(1034), flush.Bytes) 111 | assert.Equal(t, Bytes, flush.Reason) 112 | 113 | flush = <-b.Queue 114 | assert.Equal(t, int64(94), flush.Writes) 115 | assert.Equal(t, int64(1034), flush.Bytes) 116 | assert.Equal(t, Bytes, flush.Reason) 117 | 118 | err = b.Close() 119 | assert.Equal(t, nil, err) 120 | } 121 | 122 | // Test flushing on interval. 123 | func TestBuffer_Write_FlushOnInterval(t *testing.T) { 124 | b, err := New("/tmp/buffer", &Config{ 125 | Queue: make(chan *Flush, 100), 126 | FlushInterval: time.Second, 127 | }) 128 | 129 | assert.Equal(t, nil, err) 130 | 131 | b.Write([]byte("hello world")) 132 | b.Write([]byte("hello world")) 133 | 134 | flush := <-b.Queue 135 | assert.Equal(t, int64(2), flush.Writes) 136 | assert.Equal(t, int64(22), flush.Bytes) 137 | assert.Equal(t, Interval, flush.Reason) 138 | 139 | err = b.Close() 140 | assert.Equal(t, nil, err) 141 | } 142 | 143 | // Test config validation. 144 | func TestConfig_Validate(t *testing.T) { 145 | _, err := New("/tmp/buffer", &Config{}) 146 | assert.Equal(t, "at least one flush mechanism must be non-zero", err.Error()) 147 | } 148 | 149 | // Benchmark buffer writes. 150 | func BenchmarkBuffer_Write(t *testing.B) { 151 | b, err := New("/tmp/buffer", &Config{ 152 | FlushWrites: 30000, 153 | FlushBytes: 1 << 30, 154 | FlushInterval: time.Minute, 155 | Verbosity: 0, 156 | }) 157 | 158 | if err != nil { 159 | t.Fatalf("error: %s", err) 160 | } 161 | 162 | discard(b) 163 | 164 | t.ResetTimer() 165 | 166 | t.RunParallel(func(pb *testing.PB) { 167 | for pb.Next() { 168 | b.Write([]byte("hello world")) 169 | } 170 | }) 171 | } 172 | 173 | // Benchmark buffer writes with bufio. 174 | func BenchmarkBuffer_Write_Bufio(t *testing.B) { 175 | b, err := New("/tmp/buffer", &Config{ 176 | FlushWrites: 30000, 177 | FlushBytes: 1 << 30, 178 | FlushInterval: time.Minute, 179 | BufferSize: 1 << 10, 180 | Verbosity: 0, 181 | }) 182 | 183 | if err != nil { 184 | t.Fatalf("error: %s", err) 185 | } 186 | 187 | discard(b) 188 | 189 | t.ResetTimer() 190 | 191 | t.RunParallel(func(pb *testing.PB) { 192 | for pb.Next() { 193 | b.Write([]byte("hello world")) 194 | } 195 | }) 196 | } 197 | -------------------------------------------------------------------------------- /buffer.go: -------------------------------------------------------------------------------- 1 | // Package buffer provides an io.Writer as a 1:N on-disk buffer, 2 | // publishing flushed files to a channel for processing. 3 | // 4 | // Files may be flushed via interval, write count, or byte size. 5 | // 6 | // All exported methods are thread-safe. 7 | package buffer 8 | 9 | import ( 10 | "bufio" 11 | "fmt" 12 | "log" 13 | "os" 14 | "sync" 15 | "sync/atomic" 16 | "time" 17 | ) 18 | 19 | // PID for unique filename. 20 | var pid = os.Getpid() 21 | 22 | // Ids for unique filename. 23 | var ids = int64(0) 24 | 25 | // Reason for flush. 26 | type Reason string 27 | 28 | // Flush reasons. 29 | const ( 30 | Forced Reason = "forced" 31 | Writes Reason = "writes" 32 | Bytes Reason = "bytes" 33 | Interval Reason = "interval" 34 | ) 35 | 36 | // Flush represents a flushed file. 37 | type Flush struct { 38 | Reason Reason `json:"reason"` 39 | Path string `json:"path"` 40 | Writes int64 `json:"writes"` 41 | Bytes int64 `json:"bytes"` 42 | Opened time.Time `json:"opened"` 43 | Closed time.Time `json:"closed"` 44 | Age time.Duration `json:"age"` 45 | } 46 | 47 | // Config for disk buffer. 48 | type Config struct { 49 | FlushWrites int64 // Flush after N writes, zero to disable 50 | FlushBytes int64 // Flush after N bytes, zero to disable 51 | FlushInterval time.Duration // Flush after duration, zero to disable 52 | BufferSize int // Buffer size for writes 53 | Queue chan *Flush // Queue of flushed files 54 | Verbosity int // Verbosity level, 0-3 55 | Logger *log.Logger // Logger instance 56 | } 57 | 58 | // Validate the configuration. 59 | func (c *Config) Validate() error { 60 | switch { 61 | case c.FlushBytes == 0 && c.FlushWrites == 0 && c.FlushInterval == 0: 62 | return fmt.Errorf("at least one flush mechanism must be non-zero") 63 | default: 64 | return nil 65 | } 66 | } 67 | 68 | // Buffer represents a 1:N on-disk buffer. 69 | type Buffer struct { 70 | *Config 71 | 72 | verbosity int 73 | path string 74 | ids int64 75 | id int64 76 | 77 | sync.RWMutex 78 | buf *bufio.Writer 79 | opened time.Time 80 | writes int64 81 | bytes int64 82 | file *os.File 83 | tick *time.Ticker 84 | } 85 | 86 | // New buffer at `path`. The path given is used for the base 87 | // of the filenames created, which append ".{pid}.{id}.{fid}". 88 | func New(path string, config *Config) (*Buffer, error) { 89 | id := atomic.AddInt64(&ids, 1) 90 | 91 | b := &Buffer{ 92 | Config: config, 93 | path: path, 94 | id: id, 95 | verbosity: 1, 96 | } 97 | 98 | if b.Logger == nil { 99 | prefix := fmt.Sprintf("buffer #%d %q ", b.id, path) 100 | b.Logger = log.New(os.Stderr, prefix, log.LstdFlags) 101 | } 102 | 103 | if b.Queue == nil { 104 | b.Queue = make(chan *Flush) 105 | } 106 | 107 | if b.FlushInterval != 0 { 108 | b.tick = time.NewTicker(config.FlushInterval) 109 | go b.loop() 110 | } 111 | 112 | err := config.Validate() 113 | if err != nil { 114 | return nil, err 115 | } 116 | 117 | return b, b.open() 118 | } 119 | 120 | // Write implements io.Writer. 121 | func (b *Buffer) Write(data []byte) (int, error) { 122 | b.log(3, "write %s", data) 123 | 124 | b.Lock() 125 | defer b.Unlock() 126 | 127 | n, err := b.write(data) 128 | if err != nil { 129 | return n, err 130 | } 131 | 132 | if b.FlushWrites != 0 && b.writes >= b.FlushWrites { 133 | err := b.flush(Writes) 134 | if err != nil { 135 | return n, err 136 | } 137 | } 138 | 139 | if b.FlushBytes != 0 && b.bytes >= b.FlushBytes { 140 | err := b.flush(Bytes) 141 | if err != nil { 142 | return n, err 143 | } 144 | } 145 | 146 | return n, err 147 | } 148 | 149 | // Close the underlying file after flushing. 150 | func (b *Buffer) Close() error { 151 | b.Lock() 152 | defer b.Unlock() 153 | 154 | if b.tick != nil { 155 | b.tick.Stop() 156 | } 157 | 158 | return b.flush(Forced) 159 | } 160 | 161 | // Flush forces a flush. 162 | func (b *Buffer) Flush() error { 163 | b.Lock() 164 | defer b.Unlock() 165 | return b.flush(Forced) 166 | } 167 | 168 | // Writes returns the number of writes made to the current file. 169 | func (b *Buffer) Writes() int64 { 170 | b.RLock() 171 | defer b.RUnlock() 172 | return b.writes 173 | } 174 | 175 | // Bytes returns the number of bytes made to the current file. 176 | func (b *Buffer) Bytes() int64 { 177 | b.RLock() 178 | defer b.RUnlock() 179 | return b.bytes 180 | } 181 | 182 | // Loop for flush interval. 183 | func (b *Buffer) loop() { 184 | for range b.tick.C { 185 | b.Lock() 186 | b.flush(Interval) 187 | b.Unlock() 188 | } 189 | } 190 | 191 | // Open a new buffer. 192 | func (b *Buffer) open() error { 193 | path := b.pathname() 194 | 195 | b.log(1, "opening %s", path) 196 | f, err := os.Create(path) 197 | if err != nil { 198 | return err 199 | } 200 | 201 | b.log(2, "buffer size %d", b.BufferSize) 202 | if b.BufferSize != 0 { 203 | b.buf = bufio.NewWriterSize(f, b.BufferSize) 204 | } 205 | 206 | b.log(2, "reset state") 207 | b.opened = time.Now() 208 | b.writes = 0 209 | b.bytes = 0 210 | b.file = f 211 | 212 | return nil 213 | } 214 | 215 | // Write with metrics. 216 | func (b *Buffer) write(data []byte) (int, error) { 217 | b.writes++ 218 | b.bytes += int64(len(data)) 219 | 220 | if b.BufferSize != 0 { 221 | return b.buf.Write(data) 222 | } 223 | 224 | return b.file.Write(data) 225 | } 226 | 227 | // Flush for the given reason and re-open. 228 | func (b *Buffer) flush(reason Reason) error { 229 | b.log(1, "flushing (%s)", reason) 230 | 231 | if b.writes == 0 { 232 | b.log(2, "nothing to flush") 233 | return nil 234 | } 235 | 236 | err := b.close() 237 | if err != nil { 238 | return err 239 | } 240 | 241 | b.Queue <- &Flush{ 242 | Reason: reason, 243 | Writes: b.writes, 244 | Bytes: b.bytes, 245 | Opened: b.opened, 246 | Closed: time.Now(), 247 | Path: b.file.Name() + ".closed", 248 | Age: time.Since(b.opened), 249 | } 250 | 251 | return b.open() 252 | } 253 | 254 | // Close existing file after a rename. 255 | func (b *Buffer) close() error { 256 | if b.file == nil { 257 | return nil 258 | } 259 | 260 | path := b.file.Name() 261 | 262 | b.log(2, "renaming %q", path) 263 | err := os.Rename(path, path+".closed") 264 | if err != nil { 265 | return err 266 | } 267 | 268 | if b.BufferSize != 0 { 269 | b.log(2, "flushing %q", path) 270 | err = b.buf.Flush() 271 | if err != nil { 272 | return err 273 | } 274 | } 275 | 276 | b.log(2, "closing %q", path) 277 | return b.file.Close() 278 | } 279 | 280 | // Pathname for a new buffer. 281 | func (b *Buffer) pathname() string { 282 | fid := atomic.AddInt64(&b.ids, 1) 283 | return fmt.Sprintf("%s.%d.%d.%d", b.path, pid, b.id, fid) 284 | } 285 | 286 | // Log helper. 287 | func (b *Buffer) log(n int, msg string, args ...interface{}) { 288 | if b.Verbosity >= n { 289 | b.Logger.Printf(msg, args...) 290 | } 291 | } 292 | --------------------------------------------------------------------------------