├── fixtures ├── basic.log └── windows.log ├── .gitignore ├── .travis.yml ├── script ├── test ├── build └── deb ├── config.json.sample ├── exponential_backoff.go ├── snapshotter.go ├── statistics_server.go ├── LICENSE.md ├── exponential_backoff_test.go ├── supervisor_test.go ├── bolt_snapshotter.go ├── client └── client.go ├── bolt_snapshotter_test.go ├── file_reader_pool_test.go ├── spooler.go ├── file_reader_pool.go ├── lumberjack ├── client_test.go ├── client.go └── server.go ├── config.go ├── cmd └── butteredscones │ └── butteredscones.go ├── file_reader.go ├── README.md ├── statistics.go ├── file_reader_test.go └── supervisor.go /fixtures/basic.log: -------------------------------------------------------------------------------- 1 | line1 2 | line2 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | bin/ 2 | /state.db 3 | *.deb 4 | -------------------------------------------------------------------------------- /fixtures/windows.log: -------------------------------------------------------------------------------- 1 | line1 2 | line2 3 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | go: 3 | - 1.3 4 | script: script/test 5 | -------------------------------------------------------------------------------- /script/test: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | base="$(cd "$(dirname "${BASH_SOURCE[0]}")/.."; pwd)" 5 | 6 | cd "$base" 7 | go test -v . ./lumberjack 8 | -------------------------------------------------------------------------------- /script/build: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | base="$(cd "$(dirname "${BASH_SOURCE[0]}")/../cmd/butteredscones"; pwd)" 5 | 6 | cd "$base" 7 | mkdir -p bin 8 | go build -o bin/buttered-scones *.go 9 | -------------------------------------------------------------------------------- /config.json.sample: -------------------------------------------------------------------------------- 1 | { 2 | "state": "state.db", 3 | "max_length": 8192, 4 | 5 | "network": { 6 | "servers": [ 7 | { 8 | "addr": "logstash.internal.example.com:5043" 9 | } 10 | ], 11 | "certificate": "/etc/butteredscones/forwarder.crt", 12 | "key": "/etc/butteredscones/forwarder.key", 13 | "ca": "/etc/butteredscones/ca.crt", 14 | "timeout": 15 15 | }, 16 | 17 | "statistics": { 18 | "addr": "127.0.0.1:8088" 19 | }, 20 | 21 | "files": [ 22 | { 23 | "paths": ["/var/log/messages", "/var/log/*.log"], 24 | "fields": {"type": "syslog"} 25 | } 26 | ] 27 | } 28 | -------------------------------------------------------------------------------- /exponential_backoff.go: -------------------------------------------------------------------------------- 1 | package butteredscones 2 | 3 | import ( 4 | "time" 5 | ) 6 | 7 | type ExponentialBackoff struct { 8 | Minimum time.Duration 9 | Maximum time.Duration 10 | 11 | current time.Duration 12 | } 13 | 14 | func (b *ExponentialBackoff) Current() time.Duration { 15 | if b.current == 0 { 16 | b.current = b.Minimum 17 | } 18 | 19 | return b.current 20 | } 21 | 22 | func (b *ExponentialBackoff) Next() time.Duration { 23 | if b.current == 0 { 24 | b.current = b.Minimum 25 | return b.current 26 | } else { 27 | b.current = b.current * 2 28 | if b.current >= b.Maximum { 29 | b.current = b.Maximum 30 | } 31 | 32 | return b.current 33 | } 34 | } 35 | 36 | func (b *ExponentialBackoff) Reset() { 37 | b.current = 0 38 | } 39 | -------------------------------------------------------------------------------- /script/deb: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | base="$(cd "$(dirname "${BASH_SOURCE[0]}")/.."; pwd)" 5 | 6 | if [ ! which fpm &> /dev/null ]; then 7 | echo "fpm was not found; please install it: gem install fpm" 8 | exit 1 9 | fi 10 | 11 | if [ -z "$GOOS" -o -z "$GOARCH" ]; then 12 | echo "Set GOOS and GOARCH to make sure you are building for the correct platform" 13 | echo "You probably want GOOS=linux GOARCH=amd64" 14 | exit 1 15 | fi 16 | 17 | : ${VERSION:="$(git tag --contains HEAD | head -1)"} 18 | if [ -z "$VERSION" ]; then 19 | echo "Set VERSION as the version of the deb package to build" 20 | echo "For example, VERSION=1.2.0" 21 | exit 1 22 | fi 23 | 24 | : ${PREFIX:="/opt/buttered-scones"} 25 | 26 | $base/script/build 27 | 28 | fpm -s dir -t deb -n buttered-scones -v "$VERSION" \ 29 | --description "transports log files over the network to logstash" \ 30 | --url "https://github.com/digitalocean/buttered-scones" \ 31 | "${base}/bin/buttered-scones=${PREFIX}/bin/buttered-scones" 32 | -------------------------------------------------------------------------------- /snapshotter.go: -------------------------------------------------------------------------------- 1 | package butteredscones 2 | 3 | type HighWaterMark struct { 4 | FilePath string 5 | 6 | // Position is the index in the file after a given line. Seeking to it would 7 | // read the next line. 8 | Position int64 9 | } 10 | 11 | type Snapshotter interface { 12 | HighWaterMark(filePath string) (*HighWaterMark, error) 13 | SetHighWaterMarks(marks []*HighWaterMark) error 14 | } 15 | 16 | type MemorySnapshotter struct { 17 | files map[string]int64 18 | } 19 | 20 | func (s *MemorySnapshotter) HighWaterMark(filePath string) (*HighWaterMark, error) { 21 | highWaterMark := &HighWaterMark{FilePath: filePath} 22 | if s.files != nil { 23 | highWaterMark.Position = s.files[filePath] 24 | } 25 | 26 | return highWaterMark, nil 27 | } 28 | 29 | func (s *MemorySnapshotter) SetHighWaterMarks(marks []*HighWaterMark) error { 30 | if s.files == nil { 31 | s.files = make(map[string]int64) 32 | } 33 | 34 | for _, mark := range marks { 35 | s.files[mark.FilePath] = mark.Position 36 | } 37 | return nil 38 | } 39 | -------------------------------------------------------------------------------- /statistics_server.go: -------------------------------------------------------------------------------- 1 | package butteredscones 2 | 3 | import ( 4 | "encoding/json" 5 | "net/http" 6 | ) 7 | 8 | // StatisticsServer constructs an HTTP server that returns JSON formatted 9 | // statistics. These statistics can be used for debugging or automated 10 | // monitoring. 11 | type StatisticsServer struct { 12 | Statistics *Statistics 13 | Addr string 14 | } 15 | 16 | func (s *StatisticsServer) ListenAndServe() error { 17 | mux := http.NewServeMux() 18 | mux.HandleFunc("/", s.handleRoot) 19 | 20 | server := &http.Server{ 21 | Addr: s.Addr, 22 | Handler: mux, 23 | } 24 | 25 | return server.ListenAndServe() 26 | } 27 | 28 | func (s *StatisticsServer) handleRoot(writer http.ResponseWriter, request *http.Request) { 29 | s.Statistics.UpdateFileSizeStatistics() 30 | 31 | jsonStats, err := json.Marshal(s.Statistics) 32 | if err != nil { 33 | writer.WriteHeader(500) 34 | writer.Write([]byte(err.Error())) 35 | } else { 36 | writer.Header().Add("Content-Type", "application/json") 37 | writer.Write(jsonStats) 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 butteredscones authors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /exponential_backoff_test.go: -------------------------------------------------------------------------------- 1 | package butteredscones 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | ) 7 | 8 | func TestExponentialBackoff(t *testing.T) { 9 | backoff := &ExponentialBackoff{ 10 | Minimum: 1 * time.Second, 11 | Maximum: 10 * time.Second, 12 | } 13 | 14 | dur := backoff.Next() 15 | if dur != 1*time.Second { 16 | t.Fatalf("Expected %q, but got %q", 1*time.Second, dur) 17 | } 18 | 19 | dur = backoff.Next() 20 | if dur != 2*time.Second { 21 | t.Fatalf("Expected %q, but got %q", 2*time.Second, dur) 22 | } 23 | 24 | dur = backoff.Next() 25 | if dur != 4*time.Second { 26 | t.Fatalf("Expected %q, but got %q", 4*time.Second, dur) 27 | } 28 | 29 | dur = backoff.Next() 30 | if dur != 8*time.Second { 31 | t.Fatalf("Expected %q, but got %q", 8*time.Second, dur) 32 | } 33 | 34 | // Maximum 35 | dur = backoff.Next() 36 | if dur != 10*time.Second { 37 | t.Fatalf("Expected %q, but got %q", 10*time.Second, dur) 38 | } 39 | 40 | // Maximum again 41 | dur = backoff.Next() 42 | if dur != 10*time.Second { 43 | t.Fatalf("Expected %q, but got %q", 10*time.Second, dur) 44 | } 45 | 46 | // Reset 47 | backoff.Reset() 48 | 49 | dur = backoff.Next() 50 | if dur != 1*time.Second { 51 | t.Fatalf("Expected %q, but got %q", 1*time.Second, dur) 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /supervisor_test.go: -------------------------------------------------------------------------------- 1 | package butteredscones 2 | 3 | import ( 4 | "io/ioutil" 5 | "os" 6 | "testing" 7 | "time" 8 | 9 | "github.com/digitalocean/butteredscones/client" 10 | ) 11 | 12 | func TestSupervisorSmokeTest(t *testing.T) { 13 | tmpFile, err := ioutil.TempFile("", "butteredscones") 14 | if err != nil { 15 | t.Fatal(err) 16 | } 17 | defer tmpFile.Close() 18 | defer os.Remove(tmpFile.Name()) 19 | 20 | _, err = tmpFile.Write([]byte("line1\n")) 21 | if err != nil { 22 | t.Fatal(err) 23 | } 24 | 25 | files := []FileConfiguration{ 26 | FileConfiguration{Paths: []string{tmpFile.Name()}, Fields: map[string]string{"field1": "value1"}}, 27 | } 28 | testClient := &client.TestClient{} 29 | snapshotter := &MemorySnapshotter{} 30 | 31 | supervisor := NewSupervisor(files, []client.Client{testClient}, snapshotter, 0) 32 | supervisor.Start() 33 | defer supervisor.Stop() 34 | 35 | <-time.After(250 * time.Millisecond) 36 | if testClient.DataSent == nil { 37 | t.Fatalf("no data sent on test client before timeout") 38 | } 39 | 40 | data := testClient.DataSent[0] 41 | if data["line"] != "line1" { 42 | t.Fatalf("expected [\"line\"] to be %q, but got %q", "line1", data["line"]) 43 | } 44 | 45 | hwm, err := snapshotter.HighWaterMark(tmpFile.Name()) 46 | if err != nil { 47 | t.Fatal(err) 48 | } 49 | if hwm.Position != 6 { 50 | t.Fatalf("expected high water mark position to be %d, but got %d", 6, hwm.Position) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /bolt_snapshotter.go: -------------------------------------------------------------------------------- 1 | package butteredscones 2 | 3 | import ( 4 | "github.com/boltdb/bolt" 5 | "strconv" 6 | ) 7 | 8 | const ( 9 | boltSnapshotterBucket = "high_water_marks" 10 | ) 11 | 12 | type BoltSnapshotter struct { 13 | DB *bolt.DB 14 | } 15 | 16 | func (s *BoltSnapshotter) HighWaterMark(filePath string) (*HighWaterMark, error) { 17 | highWaterMark := &HighWaterMark{FilePath: filePath} 18 | err := s.DB.View(func(tx *bolt.Tx) error { 19 | bucket := tx.Bucket([]byte(boltSnapshotterBucket)) 20 | if bucket == nil { 21 | return nil 22 | } 23 | 24 | positionBytes := bucket.Get([]byte(filePath)) 25 | if positionBytes == nil { 26 | return nil 27 | } 28 | 29 | position, err := strconv.ParseInt(string(positionBytes), 10, 64) 30 | if err != nil { 31 | return err 32 | } 33 | 34 | highWaterMark.Position = position 35 | return nil 36 | }) 37 | 38 | if err != nil { 39 | return nil, err 40 | } 41 | return highWaterMark, nil 42 | } 43 | 44 | func (s *BoltSnapshotter) SetHighWaterMarks(marks []*HighWaterMark) error { 45 | err := s.DB.Update(func(tx *bolt.Tx) error { 46 | bucket, err := tx.CreateBucketIfNotExists([]byte(boltSnapshotterBucket)) 47 | if err != nil { 48 | return err 49 | } 50 | 51 | for _, mark := range marks { 52 | err = bucket.Put([]byte(mark.FilePath), []byte(strconv.FormatInt(mark.Position, 10))) 53 | if err != nil { 54 | return err 55 | } 56 | } 57 | 58 | return nil 59 | }) 60 | 61 | return err 62 | } 63 | -------------------------------------------------------------------------------- /client/client.go: -------------------------------------------------------------------------------- 1 | package client 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | type Data map[string]string 8 | 9 | type Client interface { 10 | // A human-readable unique name for the client, for use in statistics. A 11 | // reasonable name for a remote client would be the hostname:port, for 12 | // instance. 13 | Name() string 14 | 15 | // Send forwards a payload of `Data` instances to a remote system 16 | Send(lines []Data) error 17 | } 18 | 19 | // TestClient is an in-memory client that allows inspecting the data that was 20 | // 'sent' thorugh it. It is useful in test cases. 21 | type TestClient struct { 22 | DataSent []Data 23 | 24 | // Set Error to return an error to clients when they call Send. It is useful 25 | // for testing how they react to errors. 26 | Error error 27 | } 28 | 29 | func (c *TestClient) Name() string { 30 | return fmt.Sprintf("TestClient[%p]", c) 31 | } 32 | 33 | func (c *TestClient) Send(lines []Data) error { 34 | if c.DataSent == nil { 35 | c.DataSent = make([]Data, 0) 36 | } 37 | 38 | if c.Error != nil { 39 | return c.Error 40 | } else { 41 | c.DataSent = append(c.DataSent, lines...) 42 | return nil 43 | } 44 | } 45 | 46 | // StdoutClient writes messages to stardard out. It was useful for development. 47 | type StdoutClient struct { 48 | } 49 | 50 | func (c *StdoutClient) Name() string { 51 | return fmt.Sprintf("StdoutClient[%p]", c) 52 | } 53 | 54 | func (c *StdoutClient) Send(lines []Data) error { 55 | for _, data := range lines { 56 | fmt.Printf("%#v\n", data) 57 | } 58 | 59 | return nil 60 | } 61 | -------------------------------------------------------------------------------- /bolt_snapshotter_test.go: -------------------------------------------------------------------------------- 1 | package butteredscones 2 | 3 | import ( 4 | "github.com/boltdb/bolt" 5 | "io/ioutil" 6 | "os" 7 | "testing" 8 | "time" 9 | ) 10 | 11 | func TestBoltSnapshotter(t *testing.T) { 12 | tmpFile, err := ioutil.TempFile("", "butteredscones") 13 | if err != nil { 14 | t.Fatal(err) 15 | } 16 | defer os.Remove(tmpFile.Name()) 17 | 18 | db, err := bolt.Open(tmpFile.Name(), 0600, &bolt.Options{Timeout: 1 * time.Second}) 19 | if err != nil { 20 | t.Fatal(err) 21 | } 22 | 23 | var snapshotter Snapshotter 24 | snapshotter = &BoltSnapshotter{DB: db} 25 | 26 | // Default is 0 27 | highWaterMark, err := snapshotter.HighWaterMark("/tmp/foo") 28 | if err != nil { 29 | t.Fatal(err) 30 | } 31 | if highWaterMark.FilePath != "/tmp/foo" { 32 | t.Fatalf("Expected FilePath=%q, but got %q", "/tmp/foo", highWaterMark.FilePath) 33 | } 34 | if highWaterMark.Position != 0 { 35 | t.Fatalf("Expected Position=%d, but got %d", 0, highWaterMark.Position) 36 | } 37 | 38 | // Set 39 | err = snapshotter.SetHighWaterMarks([]*HighWaterMark{ 40 | &HighWaterMark{FilePath: "/tmp/foo", Position: 10245}, 41 | }) 42 | if err != nil { 43 | t.Fatal(err) 44 | } 45 | 46 | // Retrieve the value we just stored 47 | highWaterMark, err = snapshotter.HighWaterMark("/tmp/foo") 48 | if err != nil { 49 | t.Fatal(err) 50 | } 51 | if highWaterMark.FilePath != "/tmp/foo" { 52 | t.Fatalf("Expected FilePath=%q, but got %q", "/tmp/foo", highWaterMark.FilePath) 53 | } 54 | if highWaterMark.Position != 10245 { 55 | t.Fatalf("Expected Position=%d, but got %d", 10245, highWaterMark.Position) 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /file_reader_pool_test.go: -------------------------------------------------------------------------------- 1 | package butteredscones 2 | 3 | import ( 4 | "io/ioutil" 5 | "os" 6 | "testing" 7 | "time" 8 | ) 9 | 10 | func TestFileReaderPoolLockUnlock(t *testing.T) { 11 | tmpFile, err := ioutil.TempFile("", "butteredscones") 12 | if err != nil { 13 | t.Fatal(err) 14 | } 15 | defer tmpFile.Close() 16 | defer os.Remove(tmpFile.Name()) 17 | 18 | pool := NewFileReaderPool() 19 | reader, _ := NewFileReader(tmpFile, map[string]string{}, 128, 0) 20 | pool.Add(reader) 21 | 22 | lockedReaders := make(chan *FileReader) 23 | go func() { 24 | for i := 0; i < 20; i++ { 25 | if lockedReader := pool.LockNext(); lockedReader != nil { 26 | lockedReaders <- lockedReader 27 | } 28 | <-time.After(10 * time.Millisecond) 29 | } 30 | }() 31 | 32 | select { 33 | case lockedReader := <-lockedReaders: 34 | if lockedReader != reader { 35 | t.Fatalf("Expected reader %p but got %p", reader, lockedReader) 36 | } 37 | // Attempting to grab another reader should be nil 38 | if anotherLockedReader := pool.LockNext(); anotherLockedReader != nil { 39 | t.Fatalf("Expected to get nil when locking another reader, but got %#v", anotherLockedReader) 40 | } 41 | // Unlock the reader to make it available again 42 | pool.Unlock(lockedReader) 43 | case <-time.After(50 * time.Millisecond): 44 | t.Fatalf("Timed out") 45 | } 46 | 47 | select { 48 | case lockedReader := <-lockedReaders: 49 | if lockedReader != reader { 50 | t.Fatalf("Expected reader %p but got %p", reader, lockedReader) 51 | } 52 | case <-time.After(50 * time.Millisecond): 53 | t.Fatalf("Timed out") 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /spooler.go: -------------------------------------------------------------------------------- 1 | package butteredscones 2 | 3 | import ( 4 | "time" 5 | ) 6 | 7 | // Spooler accepts items on the In channel and chunks them into items on the 8 | // Out channel. 9 | type Spooler struct { 10 | In chan *FileData 11 | Out chan []*FileData 12 | 13 | size int 14 | timeout time.Duration 15 | } 16 | 17 | const ( 18 | // The number of items that can be buffered in the Out channel. 19 | spoolOutBuffer = 4 20 | ) 21 | 22 | func NewSpooler(size int, timeout time.Duration) *Spooler { 23 | return &Spooler{ 24 | In: make(chan *FileData, size*spoolOutBuffer), 25 | Out: make(chan []*FileData, spoolOutBuffer), 26 | size: size, 27 | timeout: timeout, 28 | } 29 | } 30 | 31 | // Spool accepts items from the In channel and spools them into the Out 32 | // channel. To stop the spooling, close the In channel. 33 | func (s *Spooler) Spool() { 34 | timer := time.NewTimer(s.timeout) 35 | currentChunk := make([]*FileData, 0, s.size) 36 | for { 37 | select { 38 | case fileData, ok := <-s.In: 39 | if ok { 40 | currentChunk = append(currentChunk, fileData) 41 | if len(currentChunk) >= s.size { 42 | s.Out <- currentChunk 43 | currentChunk = make([]*FileData, 0, s.size) 44 | } 45 | } else { 46 | return 47 | } 48 | case <-timer.C: 49 | if len(currentChunk) > 0 { 50 | select { 51 | case s.Out <- currentChunk: 52 | currentChunk = make([]*FileData, 0, s.size) 53 | default: 54 | // Never block trying to send to the channel because of a timer 55 | // firing. Otherwise, small chunks may be added to the channel. If 56 | // we can't send immediately, we might as well keep spooling to build 57 | // up a bigger chunk. 58 | } 59 | } 60 | } 61 | 62 | timer.Reset(s.timeout) 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /file_reader_pool.go: -------------------------------------------------------------------------------- 1 | package butteredscones 2 | 3 | import ( 4 | "sync" 5 | ) 6 | 7 | type FileReaderPool struct { 8 | available map[string]*FileReader 9 | locked map[string]*FileReader 10 | lock sync.RWMutex 11 | } 12 | 13 | func NewFileReaderPool() *FileReaderPool { 14 | return &FileReaderPool{ 15 | available: make(map[string]*FileReader), 16 | locked: make(map[string]*FileReader), 17 | } 18 | } 19 | 20 | func (p *FileReaderPool) Counts() (available int, locked int) { 21 | p.lock.RLock() 22 | defer p.lock.RUnlock() 23 | 24 | return len(p.available), len(p.locked) 25 | } 26 | 27 | // TODO: Figure out how to make this block, rather than return nil 28 | func (p *FileReaderPool) LockNext() *FileReader { 29 | p.lock.Lock() 30 | defer p.lock.Unlock() 31 | 32 | for filePath, reader := range p.available { 33 | delete(p.available, filePath) 34 | p.locked[filePath] = reader 35 | return reader 36 | } 37 | 38 | // Nothing available to lock 39 | return nil 40 | } 41 | 42 | func (p *FileReaderPool) Unlock(reader *FileReader) { 43 | p.lock.Lock() 44 | defer p.lock.Unlock() 45 | 46 | filePath := reader.FilePath() 47 | delete(p.locked, filePath) 48 | p.available[filePath] = reader 49 | } 50 | 51 | func (p *FileReaderPool) UnlockAll(readers []*FileReader) { 52 | p.lock.Lock() 53 | defer p.lock.Unlock() 54 | 55 | for _, reader := range readers { 56 | filePath := reader.FilePath() 57 | delete(p.locked, filePath) 58 | p.available[filePath] = reader 59 | } 60 | } 61 | 62 | func (p *FileReaderPool) IsPathInPool(filePath string) bool { 63 | p.lock.RLock() 64 | defer p.lock.RUnlock() 65 | 66 | return (p.available[filePath] != nil || p.locked[filePath] != nil) 67 | } 68 | 69 | func (p *FileReaderPool) Add(reader *FileReader) { 70 | p.lock.Lock() 71 | defer p.lock.Unlock() 72 | 73 | filePath := reader.FilePath() 74 | p.available[filePath] = reader 75 | } 76 | 77 | func (p *FileReaderPool) Remove(reader *FileReader) { 78 | p.lock.Lock() 79 | defer p.lock.Unlock() 80 | 81 | filePath := reader.FilePath() 82 | delete(p.available, filePath) 83 | delete(p.locked, filePath) 84 | } 85 | -------------------------------------------------------------------------------- /lumberjack/client_test.go: -------------------------------------------------------------------------------- 1 | package lumberjack 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | 7 | "github.com/digitalocean/butteredscones/client" 8 | ) 9 | 10 | func TestClientSmokeTest(t *testing.T) { 11 | server, err := newLumberjackServer(&serverOptions{ 12 | Network: "tcp", 13 | Address: "127.0.0.1:0", // random port 14 | 15 | WriteTimeout: 2 * time.Second, 16 | ReadTimeout: 2 * time.Second, 17 | }) 18 | 19 | if err != nil { 20 | t.Fatal(err) 21 | } 22 | defer server.Close() 23 | 24 | dataCh := make(chan client.Data, 1) 25 | go server.ServeInto(dataCh) 26 | 27 | c := NewClient(&ClientOptions{ 28 | Network: "tcp", 29 | Address: server.Addr().String(), 30 | ConnectionTimeout: 2 * time.Second, 31 | SendTimeout: 2 * time.Second, 32 | }) 33 | 34 | lines := []client.Data{ 35 | client.Data{"line": "foo bar baz", "offset": "25"}, 36 | } 37 | err = c.Send(lines) 38 | if err != nil { 39 | t.Error(err) 40 | } 41 | 42 | select { 43 | case receivedLine := <-dataCh: 44 | if receivedLine["line"] != lines[0]["line"] { 45 | t.Fatalf("Got line of %s, expected %s", receivedLine["line"], lines[0]["line"]) 46 | } 47 | case <-time.After(250 * time.Millisecond): 48 | t.Fatal("Timeout waiting for lines to arrive") 49 | } 50 | } 51 | 52 | func TestClientReconnectSmokeTest(t *testing.T) { 53 | server, err := newLumberjackServer(&serverOptions{ 54 | Network: "tcp", 55 | Address: "127.0.0.1:0", // random port 56 | 57 | WriteTimeout: 2 * time.Second, 58 | ReadTimeout: 2 * time.Second, 59 | }) 60 | 61 | if err != nil { 62 | t.Fatal(err) 63 | } 64 | defer server.Close() 65 | 66 | // Without the server accepting connections, we should run into a connection 67 | // timeout 68 | c := NewClient(&ClientOptions{ 69 | Network: "tcp", 70 | Address: server.Addr().String(), 71 | ConnectionTimeout: 1 * time.Second, 72 | SendTimeout: 1 * time.Second, 73 | }) 74 | 75 | lines := []client.Data{ 76 | client.Data{"line": "foo bar baz", "offset": "25"}, 77 | } 78 | err = c.Send(lines) 79 | if err == nil { 80 | t.Fatalf("Expected Send to timeout, but did not") 81 | } 82 | 83 | // Now, setup the server properly, things should go through 84 | dataCh := make(chan client.Data, 1) 85 | go server.ServeInto(dataCh) 86 | 87 | err = c.Send(lines) 88 | if err != nil { 89 | t.Error(err) 90 | } 91 | 92 | select { 93 | case <-dataCh: 94 | // success 95 | case <-time.After(250 * time.Millisecond): 96 | t.Fatal("Timeout waiting for lines to arrive") 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /config.go: -------------------------------------------------------------------------------- 1 | package butteredscones 2 | 3 | import ( 4 | "crypto/tls" 5 | "crypto/x509" 6 | "encoding/json" 7 | "encoding/pem" 8 | "fmt" 9 | "io/ioutil" 10 | "os" 11 | ) 12 | 13 | type Configuration struct { 14 | State string `json:"state"` 15 | Network NetworkConfiguration `json:"network"` 16 | Statistics StatisticsConfiguration `json:"statistics"` 17 | Files []FileConfiguration `json:"files"` 18 | MaxLength int `json:"max_length"` 19 | } 20 | 21 | type NetworkConfiguration struct { 22 | Servers []ServerConfiguration `json:"servers"` 23 | Certificate string `json:"certificate"` 24 | Key string `json:"key"` 25 | CA string `json:"ca"` 26 | Timeout int `json:"timeout"` 27 | SpoolSize int `json:"spool_size"` 28 | } 29 | 30 | type ServerConfiguration struct { 31 | Addr string `json:"addr"` 32 | Name string `json:"name"` 33 | } 34 | 35 | type StatisticsConfiguration struct { 36 | Addr string `json:"addr"` 37 | } 38 | 39 | type FileConfiguration struct { 40 | Paths []string `json:"paths"` 41 | Fields map[string]string `json:"fields"` 42 | } 43 | 44 | func LoadConfiguration(configFile string) (*Configuration, error) { 45 | file, err := os.Open(configFile) 46 | if err != nil { 47 | return nil, err 48 | } 49 | defer file.Close() 50 | 51 | decoder := json.NewDecoder(file) 52 | configuration := new(Configuration) 53 | 54 | err = decoder.Decode(configuration) 55 | if err != nil { 56 | return nil, err 57 | } 58 | return configuration, nil 59 | } 60 | 61 | func (c *Configuration) BuildTLSConfig() (*tls.Config, error) { 62 | if c.Network.Certificate == "" || c.Network.Key == "" { 63 | return nil, fmt.Errorf("certificate and key not specified") 64 | } 65 | 66 | cert, err := tls.LoadX509KeyPair(c.Network.Certificate, c.Network.Key) 67 | if err != nil { 68 | return nil, err 69 | } 70 | 71 | tlsConfig := new(tls.Config) 72 | tlsConfig.Certificates = []tls.Certificate{cert} 73 | 74 | if c.Network.CA != "" { 75 | tlsConfig.RootCAs = x509.NewCertPool() 76 | 77 | data, err := ioutil.ReadFile(c.Network.CA) 78 | if err != nil { 79 | return nil, err 80 | } 81 | 82 | block, _ := pem.Decode(data) 83 | if block == nil { 84 | return nil, fmt.Errorf("CA file %q did not contain PEM encoded data", c.Network.CA) 85 | } 86 | if block.Type != "CERTIFICATE" { 87 | return nil, fmt.Errorf("CA file %q did not contain certificate data", c.Network.CA) 88 | } 89 | 90 | cert, err := x509.ParseCertificate(block.Bytes) 91 | if err != nil { 92 | return nil, err 93 | } 94 | 95 | tlsConfig.RootCAs.AddCert(cert) 96 | } 97 | 98 | return tlsConfig, nil 99 | } 100 | -------------------------------------------------------------------------------- /cmd/butteredscones/butteredscones.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "os" 7 | "os/signal" 8 | "syscall" 9 | "time" 10 | 11 | "github.com/boltdb/bolt" 12 | "github.com/digitalocean/butteredscones" 13 | "github.com/digitalocean/butteredscones/client" 14 | "github.com/digitalocean/butteredscones/lumberjack" 15 | "github.com/technoweenie/grohl" 16 | ) 17 | 18 | func main() { 19 | grohl.AddContext("app", "buttered-scones") 20 | 21 | var configFile string 22 | flag.StringVar(&configFile, "config", "", "configuration file path") 23 | flag.Parse() 24 | 25 | if configFile == "" { 26 | flag.Usage() 27 | os.Exit(1) 28 | } 29 | 30 | config, err := butteredscones.LoadConfiguration(configFile) 31 | if err != nil { 32 | fmt.Printf("error opening configuration file: %s\n", err.Error()) 33 | os.Exit(1) 34 | } 35 | 36 | clients := make([]client.Client, 0, len(config.Network.Servers)) 37 | for _, server := range config.Network.Servers { 38 | tlsConfig, err := config.BuildTLSConfig() 39 | if err != nil { 40 | fmt.Printf("%s\n", err.Error()) 41 | os.Exit(1) 42 | } 43 | tlsConfig.ServerName = server.Name 44 | 45 | options := &lumberjack.ClientOptions{ 46 | Network: "tcp", 47 | Address: server.Addr, 48 | TLSConfig: tlsConfig, 49 | ConnectionTimeout: time.Duration(config.Network.Timeout) * time.Second, 50 | SendTimeout: time.Duration(config.Network.Timeout) * time.Second, 51 | } 52 | client := lumberjack.NewClient(options) 53 | clients = append(clients, client) 54 | } 55 | 56 | // clients := []Client{&StdoutClient{}} 57 | 58 | db, err := bolt.Open(config.State, 0600, &bolt.Options{Timeout: 2 * time.Second}) 59 | if err != nil { 60 | fmt.Printf("error opening state database: %s\n", err.Error()) 61 | os.Exit(1) 62 | } 63 | snapshotter := &butteredscones.BoltSnapshotter{DB: db} 64 | 65 | if config.Statistics.Addr != "" { 66 | stats_server := &butteredscones.StatisticsServer{ 67 | Statistics: butteredscones.GlobalStatistics, 68 | Addr: config.Statistics.Addr, 69 | } 70 | 71 | go func() { 72 | err := stats_server.ListenAndServe() 73 | grohl.Report(err, grohl.Data{"msg": "stats server failed to start"}) 74 | }() 75 | } 76 | 77 | // Default spool size 78 | spoolSize := config.Network.SpoolSize 79 | if spoolSize == 0 { 80 | spoolSize = 1024 81 | } 82 | 83 | supervisor := butteredscones.NewSupervisor(config.Files, clients, snapshotter, config.MaxLength) 84 | supervisor.SpoolSize = spoolSize 85 | supervisor.GlobRefresh = 15 * time.Second 86 | 87 | supervisor.Start() 88 | 89 | signalCh := make(chan os.Signal, 1) 90 | go signal.Notify(signalCh, syscall.SIGTERM, syscall.SIGINT) 91 | 92 | signal := <-signalCh 93 | fmt.Printf("Received %s, shutting down cleanly ...\n", signal) 94 | supervisor.Stop() 95 | fmt.Printf("Done shutting down\n") 96 | } 97 | -------------------------------------------------------------------------------- /file_reader.go: -------------------------------------------------------------------------------- 1 | package butteredscones 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "io" 7 | "os" 8 | 9 | "github.com/digitalocean/butteredscones/client" 10 | "github.com/technoweenie/grohl" 11 | ) 12 | 13 | type FileData struct { 14 | client.Data 15 | *HighWaterMark 16 | } 17 | 18 | type FileReader struct { 19 | C chan []*FileData 20 | ChunkSize int 21 | MaxLength int 22 | 23 | file *os.File 24 | filePath string 25 | fields map[string]string 26 | 27 | position int64 28 | buf *bufio.Reader 29 | 30 | hostname string 31 | } 32 | 33 | func NewFileReader(file *os.File, fields map[string]string, chunkSize, maxLength int) (*FileReader, error) { 34 | position, err := file.Seek(0, os.SEEK_CUR) 35 | if err != nil { 36 | return nil, err 37 | } 38 | 39 | hostname, _ := os.Hostname() 40 | 41 | reader := &FileReader{ 42 | C: make(chan []*FileData, 1), 43 | ChunkSize: chunkSize, 44 | MaxLength: maxLength, 45 | file: file, 46 | filePath: file.Name(), 47 | fields: fields, 48 | position: position, 49 | buf: bufio.NewReader(file), 50 | hostname: hostname, 51 | } 52 | go reader.read() 53 | 54 | return reader, nil 55 | } 56 | 57 | func (h *FileReader) read() { 58 | logger := grohl.NewContext(grohl.Data{"ns": "FileReader", "file_path": h.filePath}) 59 | 60 | currentChunk := make([]*FileData, 0, h.ChunkSize) 61 | for { 62 | line, err := h.buf.ReadBytes('\n') 63 | if err != nil { 64 | if err != io.EOF { 65 | logger.Report(err, grohl.Data{"msg": "error reading file", "resolution": "closing file"}) 66 | } 67 | 68 | h.sendChunk(currentChunk) 69 | close(h.C) 70 | 71 | return 72 | } 73 | h.position += int64(len(line)) 74 | // if maxLength is configured, skip lines that are too long 75 | if h.MaxLength > 0 && len(line) > h.MaxLength { 76 | continue 77 | } 78 | 79 | fileData := &FileData{ 80 | Data: h.buildDataWithLine(bytes.TrimRight(line, "\r\n")), 81 | HighWaterMark: &HighWaterMark{ 82 | FilePath: h.filePath, 83 | Position: h.position, 84 | }, 85 | } 86 | currentChunk = append(currentChunk, fileData) 87 | 88 | if len(currentChunk) >= h.ChunkSize { 89 | h.sendChunk(currentChunk) 90 | currentChunk = make([]*FileData, 0, h.ChunkSize) 91 | } 92 | } 93 | } 94 | 95 | func (h *FileReader) FilePath() string { 96 | return h.filePath 97 | } 98 | 99 | func (h *FileReader) sendChunk(chunk []*FileData) { 100 | if len(chunk) > 0 { 101 | h.C <- chunk 102 | } 103 | } 104 | 105 | func (h *FileReader) buildDataWithLine(line []byte) client.Data { 106 | var data client.Data 107 | if h.fields != nil { 108 | data = make(client.Data, len(h.fields)+1) 109 | } else { 110 | data = make(client.Data, 2) 111 | } 112 | data["line"] = string(line) 113 | data["host"] = h.hostname 114 | 115 | for k, v := range h.fields { 116 | data[k] = v 117 | } 118 | 119 | return data 120 | } 121 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # butteredscones [![Build Status](https://travis-ci.org/digitalocean/butteredscones.svg?branch=master)](https://travis-ci.org/digitalocean/butteredscones) 2 | 3 | **butteredscones** watches log files on disk and forwards them to **logstash** 4 | via the **lumberjack** protocol. It is similar to 5 | [**logstash-forwarder**](https://github.com/elasticsearch/logstash-forwarder). 6 | 7 | > I'm a lumberjack and I'm OK 8 | 9 | > I sleep all night and I work all day. 10 | 11 | > [...] 12 | 13 | > On Wednesdays I go shoppin' 14 | 15 | > And have **buttered scones** for tea. 16 | 17 | ## Configuration 18 | 19 | Like **logstash-forwarder**, **butteredscones** is configured via a JSON file. 20 | 21 | ```json 22 | { 23 | "state": "/var/lib/butteredscones/state.db", 24 | 25 | "network": { 26 | "servers": [ 27 | { 28 | "addr": "192.168.0.1:5043", 29 | "name": "logstash.internal.example.com", 30 | } 31 | ], 32 | "certificate": "/etc/butteredscones/forwarder.crt", 33 | "key": "/etc/butteredscones/forwarder.key", 34 | "ca": "/etc/butteredscones/ca.crt", 35 | "timeout": 15 36 | }, 37 | 38 | "statistics": { 39 | "addr": "127.0.0.1:8088" 40 | }, 41 | 42 | "files": [ 43 | { 44 | "paths": ["/var/log/messages", "/var/log/*.log"], 45 | "fields": {"type": "syslog"} 46 | } 47 | ] 48 | } 49 | ``` 50 | 51 | **state** is where **butteredscones** keeps information about how far it has 52 | read into each file. The directory where it lives must be writable by the 53 | user that runs the **butteredscones** process. 54 | 55 | **network/servers** can include one or more servers. If multiple servers are 56 | present, **butteredscones** will send to all servers concurrently. Specifying 57 | an **name** for a server is _optional_. If specified, the **addr** will be used 58 | to connect, but the **name** will be used to verify the certificate. This 59 | allows butteredscones to connect properly even if DNS is broken. 60 | 61 | The SSL certificate presented by the remote logstash server must be signed by 62 | the specified CA, if the `"ca"` option is specified. Otherwise, 63 | **butteredscones** will not communicate with the remote server. 64 | 65 | If given, **statistics/addr** specifies a socket address where an HTTP server 66 | will listen. Statistics about what **butteredscones** is doing will be written 67 | in JSON format. Use these statistics to debug problems or write automated 68 | monitoring tools. For example: `curl -si http://localhost:8088` 69 | 70 | **files** supports glob patterns. **butteredscones** will periodically check 71 | for new files that match the glob pattern and tail them. 72 | 73 | Currently, **butteredscones** does _not_ support log files that are truncated 74 | or renamed. This is not a use case the original developers had. However, if it 75 | interests you, pull requests are welcomed. 76 | 77 | ## Development & Packaging 78 | 79 | To build the static binary, `butteredscones`: 80 | 81 | ``` 82 | script/build 83 | ``` 84 | 85 | To run the tests: 86 | 87 | ``` 88 | script/test 89 | ``` 90 | 91 | To package `butteredscones` into a debian package: 92 | 93 | ``` 94 | GOOS=linux GOARCH=amd64 VERSION=0.0.1 script/deb 95 | ``` 96 | 97 | ## Future Work 98 | 99 | * Support input from standard in 100 | * Support files which are truncated in place 101 | -------------------------------------------------------------------------------- /lumberjack/client.go: -------------------------------------------------------------------------------- 1 | package lumberjack 2 | 3 | import ( 4 | "bytes" 5 | "compress/zlib" 6 | "crypto/tls" 7 | "encoding/binary" 8 | "net" 9 | "strings" 10 | "time" 11 | 12 | "github.com/digitalocean/butteredscones/client" 13 | "github.com/technoweenie/grohl" 14 | ) 15 | 16 | type Client struct { 17 | options *ClientOptions 18 | 19 | conn net.Conn 20 | sequence uint32 21 | } 22 | 23 | type ClientOptions struct { 24 | Network string 25 | Address string 26 | ConnectionTimeout time.Duration 27 | SendTimeout time.Duration 28 | TLSConfig *tls.Config 29 | } 30 | 31 | func NewClient(options *ClientOptions) *Client { 32 | return &Client{ 33 | options: options, 34 | } 35 | } 36 | 37 | func (c *Client) ensureConnected() error { 38 | if c.conn == nil { 39 | logger := grohl.NewContext(grohl.Data{"ns": "lumberjack.Client", "fn": "ensureConnected", "addr": c.options.Address}) 40 | timer := logger.Timer(grohl.Data{}) 41 | 42 | var conn net.Conn 43 | 44 | conn, err := net.DialTimeout(c.options.Network, c.options.Address, c.options.ConnectionTimeout) 45 | if err != nil { 46 | logger.Report(err, grohl.Data{}) 47 | return err 48 | } 49 | 50 | if c.options.TLSConfig != nil { 51 | if c.options.TLSConfig.ServerName == "" { 52 | parts := strings.Split(c.options.Address, ":") 53 | c.options.TLSConfig.ServerName = parts[0] 54 | } 55 | 56 | tlsConn := tls.Client(conn, c.options.TLSConfig) 57 | tlsConn.SetDeadline(time.Now().Add(c.options.SendTimeout)) 58 | if err := tlsConn.Handshake(); err != nil { 59 | conn.Close() 60 | 61 | logger.Report(err, grohl.Data{}) 62 | return err 63 | } 64 | conn = tlsConn 65 | } 66 | 67 | timer.Finish() 68 | c.conn = conn 69 | } 70 | 71 | return nil 72 | } 73 | 74 | func (c *Client) Disconnect() error { 75 | var err error 76 | if c.conn != nil { 77 | err = c.conn.Close() 78 | c.conn = nil 79 | } 80 | 81 | c.sequence = 0 82 | return err 83 | } 84 | 85 | func (c *Client) Name() string { 86 | return c.options.Address 87 | } 88 | 89 | func (c *Client) Send(lines []client.Data) error { 90 | err := c.ensureConnected() 91 | if err != nil { 92 | return err 93 | } 94 | 95 | // Serialize (w/ compression) 96 | linesBuf := c.serialize(lines) 97 | linesBytes := linesBuf.Bytes() 98 | 99 | headerBuf := new(bytes.Buffer) 100 | 101 | // Window size 102 | headerBuf.WriteString("1W") 103 | binary.Write(headerBuf, binary.BigEndian, uint32(len(lines))) 104 | 105 | // Compressed size 106 | headerBuf.WriteString("1C") 107 | binary.Write(headerBuf, binary.BigEndian, uint32(len(linesBytes))) 108 | 109 | // Write header to socket 110 | c.conn.SetDeadline(time.Now().Add(c.options.SendTimeout)) 111 | _, err = c.conn.Write(headerBuf.Bytes()) 112 | if err != nil { 113 | c.Disconnect() 114 | return err 115 | } 116 | 117 | // Write compressed lines to socket 118 | _, err = c.conn.Write(linesBytes) 119 | if err != nil { 120 | c.Disconnect() 121 | return err 122 | } 123 | 124 | // Wait for ACK (6 bytes) 125 | // This is pretty weird, but is mirroring what logstash-forwarder does 126 | ack := make([]byte, 6) 127 | ackBytes := 0 128 | for ackBytes < 6 { 129 | n, err := c.conn.Read(ack[ackBytes:len(ack)]) 130 | if n > 0 { 131 | ackBytes += n 132 | } else if err != nil { 133 | c.Disconnect() 134 | return err 135 | } 136 | } 137 | 138 | return nil 139 | } 140 | 141 | func (c *Client) serialize(lines []client.Data) *bytes.Buffer { 142 | buf := new(bytes.Buffer) 143 | compressor := zlib.NewWriter(buf) 144 | 145 | for _, data := range lines { 146 | c.sequence += 1 147 | 148 | compressor.Write([]byte("1D")) 149 | binary.Write(compressor, binary.BigEndian, uint32(c.sequence)) 150 | binary.Write(compressor, binary.BigEndian, uint32(len(data))) 151 | for k, v := range data { 152 | binary.Write(compressor, binary.BigEndian, uint32(len(k))) 153 | compressor.Write([]byte(k)) 154 | binary.Write(compressor, binary.BigEndian, uint32(len(v))) 155 | compressor.Write([]byte(v)) 156 | } 157 | } 158 | 159 | compressor.Close() 160 | return buf 161 | } 162 | -------------------------------------------------------------------------------- /lumberjack/server.go: -------------------------------------------------------------------------------- 1 | package lumberjack 2 | 3 | import ( 4 | "bytes" 5 | "compress/zlib" 6 | "crypto/tls" 7 | "encoding/binary" 8 | "fmt" 9 | "log" 10 | "net" 11 | "time" 12 | 13 | "github.com/digitalocean/butteredscones/client" 14 | ) 15 | 16 | type Server struct { 17 | options *serverOptions 18 | listener net.Listener 19 | } 20 | 21 | type serverOptions struct { 22 | Network string 23 | Address string 24 | 25 | TLSConfig *tls.Config 26 | 27 | WriteTimeout time.Duration 28 | ReadTimeout time.Duration 29 | } 30 | 31 | func newLumberjackServer(options *serverOptions) (*Server, error) { 32 | var listener net.Listener 33 | 34 | listener, err := net.Listen(options.Network, options.Address) 35 | if err != nil { 36 | return nil, err 37 | } 38 | 39 | return &Server{ 40 | options: options, 41 | listener: listener, 42 | }, nil 43 | } 44 | 45 | func (s *Server) Addr() net.Addr { 46 | return s.listener.Addr() 47 | } 48 | 49 | func (s *Server) ServeInto(dataCh chan<- client.Data) error { 50 | for { 51 | var client net.Conn 52 | 53 | client, err := s.listener.Accept() 54 | if err != nil { 55 | return err 56 | } 57 | 58 | if s.options.TLSConfig != nil { 59 | client = tls.Server(client, s.options.TLSConfig) 60 | } 61 | 62 | go func() { 63 | err := s.serveClient(client, dataCh) 64 | if err != nil { 65 | // TODO: grohl logging 66 | log.Print(err) 67 | } 68 | }() 69 | } 70 | } 71 | 72 | func (s *Server) serveClient(conn net.Conn, dataCh chan<- client.Data) error { 73 | defer conn.Close() 74 | controlBuf := make([]byte, 8) // up to 8 bytes (uint32 size) for storing control bytes 75 | 76 | conn.SetReadDeadline(time.Now().Add(s.options.ReadTimeout)) 77 | 78 | // Window size 79 | var windowSize uint32 80 | if _, err := conn.Read(controlBuf[0:2]); err != nil { 81 | return err 82 | } 83 | if bytes.Compare(controlBuf[0:2], []byte("1W")) != 0 { 84 | return fmt.Errorf("Expected 1W, got %v", controlBuf[0:2]) 85 | } 86 | if err := binary.Read(conn, binary.BigEndian, &windowSize); err != nil { 87 | return err 88 | } 89 | 90 | // Compressed size 91 | var compressedSize uint32 92 | if _, err := conn.Read(controlBuf[0:2]); err != nil { 93 | return err 94 | } 95 | if bytes.Compare(controlBuf[0:2], []byte("1C")) != 0 { 96 | return fmt.Errorf("Expected 1C, got %v", controlBuf[0:2]) 97 | } 98 | if err := binary.Read(conn, binary.BigEndian, &compressedSize); err != nil { 99 | return err 100 | } 101 | 102 | // Compressed payload 103 | // TODO: It is possible to rework this without allocating a huge buffer upfront 104 | compressedBuf := make([]byte, int(compressedSize)) 105 | if _, err := conn.Read(compressedBuf); err != nil { 106 | return err 107 | } 108 | uncompressor, err := zlib.NewReader(bytes.NewBuffer(compressedBuf)) 109 | if err != nil { 110 | return err 111 | } 112 | defer uncompressor.Close() 113 | 114 | lines := make([]client.Data, 0, int(windowSize)) 115 | for i := 0; i < int(windowSize); i++ { 116 | if _, err := uncompressor.Read(controlBuf[0:2]); err != nil { 117 | return err 118 | } 119 | if bytes.Compare(controlBuf[0:2], []byte("1D")) != 0 { 120 | return fmt.Errorf("Expected 1D, got %v", controlBuf[0:2]) 121 | } 122 | 123 | // Sequence 124 | var sequence uint32 125 | if err := binary.Read(uncompressor, binary.BigEndian, &sequence); err != nil { 126 | return err 127 | } 128 | 129 | // Payload key length 130 | var dataLength uint32 131 | if err = binary.Read(uncompressor, binary.BigEndian, &dataLength); err != nil { 132 | return err 133 | } 134 | 135 | data := make(client.Data, int(dataLength)) 136 | for j := 0; j < int(dataLength); j++ { 137 | var length uint32 138 | 139 | if err = binary.Read(uncompressor, binary.BigEndian, &length); err != nil { 140 | return err 141 | } 142 | k := make([]byte, int(length)) 143 | if _, err = uncompressor.Read(k); err != nil { 144 | return err 145 | } 146 | 147 | if err = binary.Read(uncompressor, binary.BigEndian, &length); err != nil { 148 | return err 149 | } 150 | v := make([]byte, int(length)) 151 | if _, err = uncompressor.Read(v); err != nil { 152 | return err 153 | } 154 | 155 | data[string(k)] = string(v) 156 | } 157 | 158 | lines = append(lines, data) 159 | } 160 | 161 | conn.SetWriteDeadline(time.Now().Add(s.options.WriteTimeout)) 162 | conn.Write([]byte("ackack")) // TODO: What exactly is ack supposed to be here? 163 | 164 | for _, data := range lines { 165 | dataCh <- data 166 | } 167 | return nil 168 | } 169 | 170 | func (s *Server) Close() error { 171 | return s.listener.Close() 172 | } 173 | -------------------------------------------------------------------------------- /statistics.go: -------------------------------------------------------------------------------- 1 | package butteredscones 2 | 3 | import ( 4 | "encoding/json" 5 | "os" 6 | "sync" 7 | "time" 8 | ) 9 | 10 | // Statistics keeps stats about the current operation of the program. It is 11 | // meant to keep snapshot-in-time stats, as opposed to counters or timers that 12 | // statsd offers. 13 | // 14 | // Statistics may be exposed by APIs that allow human- or machine-readable 15 | // monitoring. 16 | type Statistics struct { 17 | clients map[string]*ClientStatistics 18 | clientsLock sync.RWMutex 19 | 20 | fileReaderPool *FileReaderPoolStatistics 21 | 22 | files map[string]*FileStatistics 23 | filesLock sync.RWMutex 24 | } 25 | 26 | const ( 27 | // The client is sending data 28 | clientStatusSending = "sending" 29 | 30 | // The client failed to send data and is waiting to retry 31 | clientStatusRetrying = "retrying" 32 | ) 33 | 34 | type ClientStatistics struct { 35 | Status string `json:"status"` 36 | 37 | // The number of lines sent successfully to the client 38 | LinesSent int `json:"lines_sent"` 39 | 40 | // The last time lines were successfully sent to this client 41 | LastSendTime time.Time `json:"last_send_time"` 42 | 43 | // The number of lines in the last chunk successfully sent to this client 44 | LastChunkSize int `json:"last_chunk_size"` 45 | } 46 | 47 | type FileReaderPoolStatistics struct { 48 | // The number of files in the pool that are available to be read 49 | Available int `json:"available"` 50 | 51 | // The number of files in the pool that are locked, ready to be sent, but 52 | // haven't been yet. 53 | Locked int `json:"locked"` 54 | } 55 | 56 | type FileStatistics struct { 57 | // The current size of the file. 58 | Size int64 `json:"size"` 59 | 60 | // The current position (in bytes) that has been read into the file. This 61 | // might be greater than SnapshotPosition if there are lines buffered into 62 | // memory that haven't been acknowledged by the server 63 | Position int64 `json:"position"` 64 | 65 | // The last time the file was read from into the in-memory buffer. 66 | LastRead time.Time `json:"last_read"` 67 | 68 | // The current position (in bytes) that has been successfully sent and 69 | // acknowledged by the remote server. 70 | SnapshotPosition int64 `json:"snapshot_position"` 71 | 72 | // The last time a line from this file was successfully sent and acknowledged 73 | // by the remote server. 74 | LastSnapshot time.Time `json:"last_snapshot"` 75 | } 76 | 77 | var GlobalStatistics *Statistics = NewStatistics() 78 | 79 | func NewStatistics() *Statistics { 80 | return &Statistics{ 81 | clients: make(map[string]*ClientStatistics), 82 | fileReaderPool: &FileReaderPoolStatistics{}, 83 | files: make(map[string]*FileStatistics), 84 | } 85 | } 86 | 87 | func (s *Statistics) SetClientStatus(clientName string, status string) { 88 | s.filesLock.Lock() 89 | defer s.filesLock.Unlock() 90 | 91 | stats := s.ensureClientStatisticsCreated(clientName) 92 | stats.Status = status 93 | } 94 | 95 | func (s *Statistics) IncrementClientLinesSent(clientName string, linesSent int) { 96 | s.filesLock.Lock() 97 | defer s.filesLock.Unlock() 98 | 99 | stats := s.ensureClientStatisticsCreated(clientName) 100 | stats.LastChunkSize = linesSent 101 | stats.LinesSent += linesSent 102 | stats.LastSendTime = time.Now() 103 | } 104 | 105 | func (s *Statistics) UpdateFileReaderPoolStatistics(available int, locked int) { 106 | s.fileReaderPool.Available = available 107 | s.fileReaderPool.Locked = locked 108 | } 109 | 110 | func (s *Statistics) SetFilePosition(filePath string, position int64) { 111 | s.filesLock.Lock() 112 | defer s.filesLock.Unlock() 113 | 114 | stats := s.ensureFileStatisticsCreated(filePath) 115 | stats.Position = position 116 | stats.LastRead = time.Now() 117 | } 118 | 119 | func (s *Statistics) SetFileSnapshotPosition(filePath string, snapshotPosition int64) { 120 | s.filesLock.Lock() 121 | defer s.filesLock.Unlock() 122 | 123 | stats := s.ensureFileStatisticsCreated(filePath) 124 | stats.SnapshotPosition = snapshotPosition 125 | stats.LastSnapshot = time.Now() 126 | } 127 | 128 | func (s *Statistics) DeleteFileStatistics(filePath string) { 129 | s.filesLock.Lock() 130 | defer s.filesLock.Unlock() 131 | 132 | delete(s.files, filePath) 133 | } 134 | 135 | // UpdateFileSizeStatistics updates the Size attribute of each file, so it's 136 | // easier to compare how much progress butteredscones has made through a file. 137 | // 138 | // UpdateFileSizeStatistics should be called before displaying statistics to 139 | // an end user. 140 | func (s *Statistics) UpdateFileSizeStatistics() { 141 | s.filesLock.RLock() 142 | filePaths := make([]string, 0, len(s.files)) 143 | for filePath, _ := range s.files { 144 | filePaths = append(filePaths, filePath) 145 | } 146 | s.filesLock.RUnlock() 147 | 148 | for _, filePath := range filePaths { 149 | if stats := s.files[filePath]; stats != nil { 150 | fileInfo, err := os.Stat(filePath) 151 | if err != nil { 152 | // unknown size; maybe it was deleted? 153 | stats.Size = int64(-1) 154 | } else { 155 | stats.Size = fileInfo.Size() 156 | } 157 | } 158 | } 159 | } 160 | 161 | func (s *Statistics) ensureClientStatisticsCreated(clientName string) *ClientStatistics { 162 | // assumes lock is held by the caller 163 | if _, ok := s.clients[clientName]; !ok { 164 | s.clients[clientName] = &ClientStatistics{} 165 | } 166 | 167 | return s.clients[clientName] 168 | } 169 | 170 | func (s *Statistics) ensureFileStatisticsCreated(filePath string) *FileStatistics { 171 | // assumes lock is held by the caller 172 | if _, ok := s.files[filePath]; !ok { 173 | s.files[filePath] = &FileStatistics{} 174 | } 175 | 176 | return s.files[filePath] 177 | } 178 | 179 | func (s *Statistics) MarshalJSON() ([]byte, error) { 180 | structure := map[string]interface{}{ 181 | "clients": s.clients, 182 | "file_reader_pool": s.fileReaderPool, 183 | "files": s.files, 184 | } 185 | 186 | return json.Marshal(structure) 187 | } 188 | -------------------------------------------------------------------------------- /file_reader_test.go: -------------------------------------------------------------------------------- 1 | package butteredscones 2 | 3 | import ( 4 | "io/ioutil" 5 | "os" 6 | "testing" 7 | "time" 8 | ) 9 | 10 | func TestLineReaderReadingFileWithFields(t *testing.T) { 11 | file, err := os.Open("fixtures/basic.log") 12 | if err != nil { 13 | t.Fatal(err) 14 | } 15 | 16 | reader, err := NewFileReader(file, map[string]string{"type": "syslog"}, 1, 0) 17 | if err != nil { 18 | t.Fatal(err) 19 | } 20 | 21 | select { 22 | case chunk := <-reader.C: 23 | if chunk[0].Data["line"] != "line1" { 24 | t.Fatalf("Expected \"line1\", got %q", chunk[0].Data["line"]) 25 | } 26 | if chunk[0].Data["type"] != "syslog" { 27 | t.Fatalf("Expected \"type\":\"syslog\", got %q", chunk[0].Data["type"]) 28 | } 29 | if chunk[0].HighWaterMark.Position != 6 { 30 | t.Fatalf("Expected HighWaterMark.Position=6, got %d", chunk[0].HighWaterMark.Position) 31 | } 32 | case <-time.After(250 * time.Millisecond): 33 | t.Fatalf("Timeout") 34 | } 35 | 36 | select { 37 | case chunk := <-reader.C: 38 | if chunk[0].Data["line"] != "line2" { 39 | t.Fatalf("Expected \"line2\", got %q", chunk[0].Data["line"]) 40 | } 41 | if chunk[0].Data["type"] != "syslog" { 42 | t.Fatalf("Expected \"type\":\"syslog\", got %q", chunk[0].Data["type"]) 43 | } 44 | if chunk[0].HighWaterMark.Position != 12 { 45 | t.Fatalf("Expected HighWaterMark.Position=12, got %d", chunk[0].HighWaterMark.Position) 46 | } 47 | case <-time.After(250 * time.Millisecond): 48 | t.Fatalf("Timeout") 49 | } 50 | 51 | select { 52 | case _, ok := <-reader.C: 53 | if ok { 54 | t.Fatalf("Expected channel to be closed after EOF, but was not") 55 | } 56 | case <-time.After(250 * time.Millisecond): 57 | t.Fatalf("Timeout") 58 | } 59 | } 60 | 61 | func TestLineReaderReadingWindowsEndings(t *testing.T) { 62 | file, err := os.Open("fixtures/windows.log") 63 | if err != nil { 64 | t.Fatal(err) 65 | } 66 | 67 | reader, err := NewFileReader(file, map[string]string{"type": "syslog"}, 1, 0) 68 | if err != nil { 69 | t.Fatal(err) 70 | } 71 | 72 | select { 73 | case chunk := <-reader.C: 74 | if chunk[0].Data["line"] != "line1" { 75 | t.Fatalf("Expected \"line1\", got %q", chunk[0].Data["line"]) 76 | } 77 | if chunk[0].Data["type"] != "syslog" { 78 | t.Fatalf("Expected \"type\":\"syslog\", got %q", chunk[0].Data["type"]) 79 | } 80 | if chunk[0].HighWaterMark.Position != 7 { 81 | t.Fatalf("Expected HighWaterMark.Position=7, got %d", chunk[0].HighWaterMark.Position) 82 | } 83 | case <-time.After(250 * time.Millisecond): 84 | t.Fatalf("Timeout") 85 | } 86 | 87 | select { 88 | case chunk := <-reader.C: 89 | if chunk[0].Data["line"] != "line2" { 90 | t.Fatalf("Expected \"line2\", got %q", chunk[0].Data["line"]) 91 | } 92 | if chunk[0].Data["type"] != "syslog" { 93 | t.Fatalf("Expected \"type\":\"syslog\", got %q", chunk[0].Data["type"]) 94 | } 95 | if chunk[0].HighWaterMark.Position != 14 { 96 | t.Fatalf("Expected HighWaterMark.Position=14, got %d", chunk[0].HighWaterMark.Position) 97 | } 98 | case <-time.After(250 * time.Millisecond): 99 | t.Fatalf("Timeout") 100 | } 101 | 102 | select { 103 | case _, ok := <-reader.C: 104 | if ok { 105 | t.Fatalf("Expected channel to be closed after EOF, but was not") 106 | } 107 | case <-time.After(250 * time.Millisecond): 108 | t.Fatalf("Timeout") 109 | } 110 | } 111 | 112 | func TestLineReaderPartialLine(t *testing.T) { 113 | tmpFile, err := ioutil.TempFile("", "butteredscones") 114 | if err != nil { 115 | t.Fatal(err) 116 | } 117 | defer tmpFile.Close() 118 | defer os.Remove(tmpFile.Name()) 119 | 120 | // We write a complete line, then a partial line. FileReader is supposed to 121 | // read one line successfully, EOF without the partial line sent. 122 | _, err = tmpFile.Write([]byte("line1\npartial line")) 123 | if err != nil { 124 | t.Fatal(err) 125 | } 126 | 127 | file, err := os.Open(tmpFile.Name()) 128 | if err != nil { 129 | t.Fatal(err) 130 | } 131 | 132 | reader, err := NewFileReader(file, map[string]string{"type": "syslog"}, 1, 0) 133 | if err != nil { 134 | t.Fatal(err) 135 | } 136 | 137 | select { 138 | case chunk := <-reader.C: 139 | if chunk[0].Data["line"] != "line1" { 140 | t.Fatalf("Expected \"line1\", got %q", chunk[0].Data["line"]) 141 | } 142 | if chunk[0].Data["type"] != "syslog" { 143 | t.Fatalf("Expected \"type\":\"syslog\", got %q", chunk[0].Data["type"]) 144 | } 145 | if chunk[0].HighWaterMark.Position != 6 { 146 | t.Fatalf("Expected HighWaterMark.Position=6, got %d", chunk[0].HighWaterMark.Position) 147 | } 148 | case <-time.After(250 * time.Millisecond): 149 | t.Fatalf("Timeout") 150 | } 151 | 152 | select { 153 | case _, ok := <-reader.C: 154 | if ok { 155 | t.Fatalf("Expected channel to be closed after EOF, but was not") 156 | } 157 | case <-time.After(250 * time.Millisecond): 158 | t.Fatalf("Timeout") 159 | } 160 | } 161 | 162 | func TestLineReaderLongLine(t *testing.T) { 163 | tmpFile, err := ioutil.TempFile("", "butteredscones") 164 | if err != nil { 165 | t.Fatal(err) 166 | } 167 | defer tmpFile.Close() 168 | defer os.Remove(tmpFile.Name()) 169 | 170 | // We write a complete line, then a partial line. FileReader is supposed to 171 | // read one line successfully, EOF without the partial line sent. 172 | _, err = tmpFile.Write([]byte("long line\nline2\n")) 173 | if err != nil { 174 | t.Fatal(err) 175 | } 176 | 177 | file, err := os.Open(tmpFile.Name()) 178 | if err != nil { 179 | t.Fatal(err) 180 | } 181 | 182 | reader, err := NewFileReader(file, map[string]string{"type": "syslog"}, 1, len("long lin")) 183 | if err != nil { 184 | t.Fatal(err) 185 | } 186 | 187 | select { 188 | case chunk := <-reader.C: 189 | if chunk[0].Data["line"] != "line2" { 190 | t.Fatalf("Expected \"line2\", got %q", chunk[0].Data["line"]) 191 | } 192 | if chunk[0].Data["type"] != "syslog" { 193 | t.Fatalf("Expected \"type\":\"syslog\", got %q", chunk[0].Data["type"]) 194 | } 195 | if chunk[0].HighWaterMark.Position != 16 { 196 | t.Fatalf("Expected HighWaterMark.Position=6, got %d", chunk[0].HighWaterMark.Position) 197 | } 198 | case <-time.After(250 * time.Millisecond): 199 | t.Fatalf("Timeout") 200 | } 201 | 202 | select { 203 | case _, ok := <-reader.C: 204 | if ok { 205 | t.Fatalf("Expected channel to be closed after EOF, but was not") 206 | } 207 | case <-time.After(250 * time.Millisecond): 208 | t.Fatalf("Timeout") 209 | } 210 | } 211 | -------------------------------------------------------------------------------- /supervisor.go: -------------------------------------------------------------------------------- 1 | package butteredscones 2 | 3 | import ( 4 | "os" 5 | "path/filepath" 6 | "sync" 7 | "time" 8 | 9 | "github.com/digitalocean/butteredscones/client" 10 | "github.com/technoweenie/grohl" 11 | ) 12 | 13 | const ( 14 | supervisorReaderChunkSize = 64 15 | ) 16 | 17 | type Supervisor struct { 18 | files []FileConfiguration 19 | clients []client.Client 20 | snapshotter Snapshotter 21 | 22 | // Optional settings 23 | SpoolSize int 24 | MaxLength int 25 | 26 | // How frequently to glob for new files that may have appeared 27 | GlobRefresh time.Duration 28 | globTimer *time.Timer 29 | 30 | readerPool *FileReaderPool 31 | readyChunks chan *readyChunk 32 | // A separate channel for retries to avoid deadlocking when multiple clients 33 | // need to retry. 34 | retryChunks chan *readyChunk 35 | 36 | stopRequest chan interface{} 37 | routineWg sync.WaitGroup 38 | } 39 | 40 | type readyChunk struct { 41 | Chunk []*FileData 42 | LockedReaders []*FileReader 43 | } 44 | 45 | func NewSupervisor(files []FileConfiguration, clients []client.Client, snapshotter Snapshotter, maxLength int) *Supervisor { 46 | spoolSize := 1024 47 | 48 | return &Supervisor{ 49 | files: files, 50 | clients: clients, 51 | snapshotter: snapshotter, 52 | 53 | // Can be adjusted by clients later before calling Start 54 | SpoolSize: spoolSize, 55 | MaxLength: maxLength, 56 | GlobRefresh: 10 * time.Second, 57 | } 58 | } 59 | 60 | // Start pulls things together and plays match-maker. 61 | func (s *Supervisor) Start() { 62 | s.stopRequest = make(chan interface{}) 63 | 64 | s.readerPool = NewFileReaderPool() 65 | s.readyChunks = make(chan *readyChunk, len(s.clients)) 66 | s.retryChunks = make(chan *readyChunk, len(s.clients)) 67 | 68 | s.routineWg.Add(1) 69 | go func() { 70 | s.populateReaderPool() 71 | s.routineWg.Done() 72 | }() 73 | 74 | s.routineWg.Add(1) 75 | go func() { 76 | s.populateReadyChunks() 77 | s.routineWg.Done() 78 | }() 79 | 80 | for _, cli := range s.clients { 81 | s.routineWg.Add(1) 82 | go func(c client.Client) { 83 | s.sendReadyChunksToClient(c) 84 | s.routineWg.Done() 85 | }(cli) 86 | } 87 | } 88 | 89 | // Stop stops the supervisor cleanly, making sure all progress has been snapshotted 90 | // before exiting. 91 | func (s *Supervisor) Stop() { 92 | close(s.stopRequest) 93 | s.routineWg.Wait() 94 | } 95 | 96 | // Reads chunks from available file readers, putting together ready 'chunks' 97 | // that can be sent to clients. 98 | func (s *Supervisor) populateReadyChunks() { 99 | logger := grohl.NewContext(grohl.Data{"ns": "Supervisor", "fn": "populateReadyChunks"}) 100 | 101 | backoff := &ExponentialBackoff{Minimum: 50 * time.Millisecond, Maximum: 5000 * time.Millisecond} 102 | for { 103 | available, locked := s.readerPool.Counts() 104 | GlobalStatistics.UpdateFileReaderPoolStatistics(available, locked) 105 | 106 | currentChunk := &readyChunk{ 107 | Chunk: make([]*FileData, 0), 108 | LockedReaders: make([]*FileReader, 0), 109 | } 110 | 111 | for len(currentChunk.Chunk) < s.SpoolSize { 112 | if reader := s.readerPool.LockNext(); reader != nil { 113 | select { 114 | case <-s.stopRequest: 115 | return 116 | case chunk := <-reader.C: 117 | if chunk != nil { 118 | currentChunk.Chunk = append(currentChunk.Chunk, chunk...) 119 | currentChunk.LockedReaders = append(currentChunk.LockedReaders, reader) 120 | 121 | if len(chunk) > 0 { 122 | if hwm := chunk[len(chunk)-1].HighWaterMark; hwm != nil { 123 | GlobalStatistics.SetFilePosition(hwm.FilePath, hwm.Position) 124 | } 125 | } 126 | } else { 127 | // The reader hit EOF or another error. Remove it and it'll get 128 | // picked up by populateReaderPool again if it still needs to be 129 | // read. 130 | logger.Log(grohl.Data{"status": "EOF", "file": reader.FilePath()}) 131 | 132 | s.readerPool.Remove(reader) 133 | GlobalStatistics.DeleteFileStatistics(reader.FilePath()) 134 | } 135 | default: 136 | // The reader didn't have anything queued up for us. Unlock the 137 | // reader and move on. 138 | s.readerPool.Unlock(reader) 139 | } 140 | } else { 141 | // If there are no more readers, send the chunk ASAP so we can get 142 | // the next chunk in line 143 | logger.Log(grohl.Data{"msg": "no readers available", "resolution": "sending current chunk"}) 144 | break 145 | } 146 | } 147 | 148 | if len(currentChunk.Chunk) > 0 { 149 | select { 150 | case <-s.stopRequest: 151 | return 152 | case s.readyChunks <- currentChunk: 153 | backoff.Reset() 154 | } 155 | } else { 156 | select { 157 | case <-s.stopRequest: 158 | return 159 | case <-time.After(backoff.Next()): 160 | grohl.Log(grohl.Data{"msg": "no lines available to send", "resolution": "backing off"}) 161 | } 162 | } 163 | } 164 | } 165 | 166 | // sendReadyChunksToClient reads from the readyChunks channel for a particular 167 | // client, sending those chunks to the remote system. This function is also 168 | // responsible for snapshotting progress and unlocking the readers after it has 169 | // successfully sent. 170 | func (s *Supervisor) sendReadyChunksToClient(client client.Client) { 171 | backoff := &ExponentialBackoff{Minimum: 50 * time.Millisecond, Maximum: 5000 * time.Millisecond} 172 | for { 173 | var readyChunk *readyChunk 174 | select { 175 | case <-s.stopRequest: 176 | return 177 | case readyChunk = <-s.retryChunks: 178 | // got a retry chunk; use it 179 | default: 180 | // pull from the default readyChunk queue 181 | select { 182 | case <-s.stopRequest: 183 | return 184 | case readyChunk = <-s.readyChunks: 185 | // got a chunk 186 | } 187 | } 188 | 189 | if readyChunk != nil { 190 | GlobalStatistics.SetClientStatus(client.Name(), clientStatusSending) 191 | if err := s.sendChunk(client, readyChunk.Chunk); err != nil { 192 | grohl.Report(err, grohl.Data{"msg": "failed to send chunk", "resolution": "retrying"}) 193 | GlobalStatistics.SetClientStatus(client.Name(), clientStatusRetrying) 194 | 195 | // Put the chunk back on the queue for someone else to try 196 | select { 197 | case <-s.stopRequest: 198 | return 199 | case s.retryChunks <- readyChunk: 200 | // continue 201 | } 202 | 203 | // Backoff 204 | select { 205 | case <-s.stopRequest: 206 | return 207 | case <-time.After(backoff.Next()): 208 | // continue 209 | } 210 | } else { 211 | backoff.Reset() 212 | GlobalStatistics.IncrementClientLinesSent(client.Name(), len(readyChunk.Chunk)) 213 | 214 | // Snapshot progress 215 | if err := s.acknowledgeChunk(readyChunk.Chunk); err != nil { 216 | grohl.Report(err, grohl.Data{"msg": "failed to acknowledge progress", "resolution": "skipping"}) 217 | } 218 | 219 | s.readerPool.UnlockAll(readyChunk.LockedReaders) 220 | } 221 | } 222 | } 223 | } 224 | 225 | func (s *Supervisor) sendChunk(c client.Client, chunk []*FileData) error { 226 | lines := make([]client.Data, 0, len(chunk)) 227 | for _, fileData := range chunk { 228 | lines = append(lines, fileData.Data) 229 | } 230 | 231 | return c.Send(lines) 232 | } 233 | 234 | func (s *Supervisor) acknowledgeChunk(chunk []*FileData) error { 235 | marks := make([]*HighWaterMark, 0, len(chunk)) 236 | for _, fileData := range chunk { 237 | marks = append(marks, fileData.HighWaterMark) 238 | } 239 | 240 | err := s.snapshotter.SetHighWaterMarks(marks) 241 | if err == nil { 242 | // Update statistics 243 | for _, mark := range marks { 244 | GlobalStatistics.SetFileSnapshotPosition(mark.FilePath, mark.Position) 245 | } 246 | } 247 | 248 | return err 249 | } 250 | 251 | // populateReaderPool periodically globs for new files or files that previously 252 | // hit EOF and creates file readers for them. 253 | func (s *Supervisor) populateReaderPool() { 254 | logger := grohl.NewContext(grohl.Data{"ns": "Supervisor", "fn": "populateReaderPool"}) 255 | 256 | timer := time.NewTimer(0) 257 | for { 258 | select { 259 | case <-s.stopRequest: 260 | return 261 | case <-timer.C: 262 | logTimer := logger.Timer(grohl.Data{}) 263 | for _, config := range s.files { 264 | for _, path := range config.Paths { 265 | matches, err := filepath.Glob(path) 266 | if err != nil { 267 | logger.Report(err, grohl.Data{"path": path, "msg": "failed to glob", "resolution": "skipping path"}) 268 | continue 269 | } 270 | 271 | for _, filePath := range matches { 272 | if err = s.startFileReader(filePath, config.Fields); err != nil { 273 | logger.Report(err, grohl.Data{"path": path, "filePath": filePath, "msg": "failed to start reader", "resolution": "skipping file"}) 274 | } 275 | } 276 | } 277 | } 278 | logTimer.Finish() 279 | timer.Reset(s.GlobRefresh) 280 | } 281 | } 282 | } 283 | 284 | // startFileReader starts an individual file reader at a given path, if one 285 | // isn't already running. 286 | func (s *Supervisor) startFileReader(filePath string, fields map[string]string) error { 287 | // There's already a reader in the pool for this path 288 | if s.readerPool.IsPathInPool(filePath) { 289 | return nil 290 | } 291 | 292 | highWaterMark, err := s.snapshotter.HighWaterMark(filePath) 293 | if err != nil { 294 | return err 295 | } 296 | 297 | file, err := os.Open(filePath) 298 | if err != nil { 299 | return err 300 | } 301 | 302 | stat, err := file.Stat() 303 | if err != nil { 304 | file.Close() 305 | return err 306 | } 307 | 308 | // If the file's current size isn't beyond the high water mark, it'll 309 | // immediately EOF so there's no use in creating a reader for it. 310 | if stat.Size() <= highWaterMark.Position { 311 | file.Close() 312 | return nil 313 | } 314 | 315 | _, err = file.Seek(highWaterMark.Position, os.SEEK_SET) 316 | if err != nil { 317 | file.Close() 318 | return err 319 | } 320 | GlobalStatistics.SetFilePosition(filePath, highWaterMark.Position) 321 | GlobalStatistics.SetFileSnapshotPosition(filePath, highWaterMark.Position) 322 | 323 | reader, err := NewFileReader(file, fields, supervisorReaderChunkSize, s.MaxLength) 324 | if err != nil { 325 | file.Close() 326 | return err 327 | } 328 | 329 | s.readerPool.Add(reader) 330 | return nil 331 | } 332 | --------------------------------------------------------------------------------