├── .gitignore ├── main ├── build.cmd ├── build-linux.cmd └── main.go ├── .gitattributes ├── README.md ├── rate_limiter.go ├── trier.go ├── LICENSE ├── scanner_writer.go └── logger.go /.gitignore: -------------------------------------------------------------------------------- 1 | cloudwatchlogs-agent 2 | cloudwatchlogs-agent.exe 3 | .idea -------------------------------------------------------------------------------- /main/build.cmd: -------------------------------------------------------------------------------- 1 | @Echo off 2 | 3 | go build -ldflags "-s" 4 | mv main cloudwatchlogs-agent.exe -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text eol=lf 2 | 3 | *.ico binary 4 | *.jpg binary 5 | *.gif binary 6 | *.png binary -------------------------------------------------------------------------------- /main/build-linux.cmd: -------------------------------------------------------------------------------- 1 | @Echo off 2 | 3 | setlocal 4 | 5 | SET GOOS=linux 6 | SET GOARCH=amd64 7 | go build -a -ldflags "-s" 8 | mv main cloudwatchlogs-agent -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # cloudwatchlogs-agent 2 | AWS CloudWatch Logs Agent written in Go with zero runtime dependencies optimized to run within [Boxfuse instances](https://boxfuse.com). 3 | 4 | This agent is released under the The MIT License (MIT). 5 | 6 | It is based on the original Go agent by Jason Mooberry (which unfortunately is not available online anymore). -------------------------------------------------------------------------------- /rate_limiter.go: -------------------------------------------------------------------------------- 1 | package logger 2 | 3 | import "time" 4 | 5 | type RateLimiter struct { 6 | s chan struct{} 7 | } 8 | 9 | func NewRateLimiter(n int, d time.Duration) *RateLimiter { 10 | 11 | r := &RateLimiter{make(chan struct{}, n)} 12 | 13 | go func() { 14 | for { 15 | SLEEP: 16 | time.Sleep(d) 17 | for i := 0; i < n; i++ { 18 | select { 19 | case _, ok := <-r.s: 20 | if !ok { 21 | return 22 | } 23 | default: 24 | goto SLEEP 25 | } 26 | } 27 | } 28 | }() 29 | 30 | return r 31 | 32 | } 33 | 34 | func (r *RateLimiter) Ready() bool { 35 | r.s <- struct{}{} 36 | return true 37 | } 38 | 39 | func (r *RateLimiter) Close() { 40 | close(r.s) 41 | } 42 | -------------------------------------------------------------------------------- /trier.go: -------------------------------------------------------------------------------- 1 | package logger 2 | 3 | import ( 4 | "errors" 5 | "math/rand" 6 | "time" 7 | ) 8 | 9 | type Trier struct { 10 | interval time.Duration 11 | expiration time.Time 12 | } 13 | 14 | var ( 15 | MaxSleepInterval = 10 * time.Second 16 | ErrMaxTries = errors.New("max tries") 17 | ) 18 | 19 | func init() { 20 | rand.Seed(time.Now().UnixNano()) 21 | } 22 | 23 | func NewTrier(maxTotalTime time.Duration) *Trier { 24 | return &Trier{ 25 | interval: time.Second, 26 | expiration: time.Now().Add(maxTotalTime), 27 | } 28 | } 29 | 30 | func (t *Trier) Try() bool { 31 | return time.Now().Before(t.expiration) 32 | } 33 | 34 | func (t *Trier) Wait() { 35 | 36 | // interval +- jitter 37 | w := t.interval + (t.interval / 2) - time.Duration(rand.Int63n(int64(t.interval))) 38 | 39 | if w > MaxSleepInterval { 40 | w = MaxSleepInterval 41 | } 42 | 43 | time.Sleep(w) 44 | 45 | t.interval *= 2 // exp growth 46 | 47 | } 48 | 49 | func (t *Trier) TryFunc(f func() (error, bool)) error { 50 | for ; t.Try(); t.Wait() { 51 | if err, retry := f(); err == nil || !retry { 52 | return err 53 | } 54 | } 55 | return ErrMaxTries 56 | } 57 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Boxfuse GmbH 4 | Copyright (c) 2015 Jason Mooberry 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in 14 | all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | THE SOFTWARE. -------------------------------------------------------------------------------- /main/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "io" 6 | "log" 7 | "os" 8 | "time" 9 | "github.com/aws/aws-sdk-go/aws" 10 | "github.com/aws/aws-sdk-go/aws/credentials" 11 | "github.com/aws/aws-sdk-go/aws/session" 12 | "strings" 13 | ".." 14 | ) 15 | 16 | var ( 17 | stderr = flag.Bool("stderr", false, "true if this logs messages from stderr instead of stdout") 18 | x = flag.Bool("X", false, "show internal logs") 19 | ) 20 | 21 | func init() { 22 | flag.Parse() 23 | } 24 | 25 | func main() { 26 | version := "1.0.2" 27 | 28 | instance, _ := os.Hostname() 29 | 30 | envVar := os.Getenv("BOXFUSE_ENV") 31 | if envVar == "" { 32 | log.Fatal("Missing BOXFUSE_ENV environment variable") 33 | } 34 | env := "boxfuse/" + envVar 35 | 36 | image := os.Getenv("BOXFUSE_IMAGE_COORDINATES") 37 | if image == "" { 38 | log.Fatal("Missing BOXFUSE_IMAGE_COORDINATES environment variable") 39 | } 40 | 41 | app := strings.Split(image, ":")[0] 42 | 43 | endpoint := os.Getenv("BOXFUSE_CLOUDWATCHLOGS_ENDPOINT") 44 | endpointMsg := ""; 45 | var awsSession *session.Session 46 | if endpoint != "" { 47 | endpointMsg = " at " + endpoint; 48 | awsSession = session.New(&aws.Config{Region: aws.String("us-east-1"), Credentials: credentials.NewStaticCredentials("dummy", "dummy", "")}) 49 | } else { 50 | awsSession = session.New() 51 | } 52 | 53 | level := "INFO" 54 | if *stderr { 55 | level = "ERROR" 56 | } 57 | 58 | log.Println("Boxfuse CloudWatch Logs Agent " + version + " redirecting " + level + " logs for " + image + " to CloudWatch Logs" + endpointMsg + " (group: " + env + ", stream: " + app + ") ...") 59 | 60 | logger1, err := logger.NewLogger(awsSession, endpoint, env, app, level, time.Second, image, instance, x) 61 | if err != nil { 62 | log.Fatal(err) 63 | } 64 | 65 | if _, err := io.Copy(logger1, os.Stdin); err != nil { 66 | log.Println("copy err", err) 67 | } 68 | if err := logger1.Close(); err != nil { 69 | log.Println(err) 70 | } 71 | log.Println("Exiting...") 72 | os.Exit(0) 73 | } 74 | -------------------------------------------------------------------------------- /scanner_writer.go: -------------------------------------------------------------------------------- 1 | package logger 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "io" 7 | "sync" 8 | ) 9 | 10 | type ( 11 | ScannerWriter struct { 12 | buf []byte 13 | max_buf int 14 | 15 | me sync.Mutex 16 | closed bool 17 | 18 | splitFunc bufio.SplitFunc 19 | tokenFunc func(token []byte) error 20 | } 21 | ) 22 | 23 | var ( 24 | ExceededBufferSizeLimitError = fmt.Errorf("exceeded buffer size limit") 25 | WriterClosedError = fmt.Errorf("cannot write to closed writer") 26 | ) 27 | 28 | func NewScannerWriter(splitFunc bufio.SplitFunc, max_buf int, tokenFunc func([]byte) error) *ScannerWriter { 29 | return &ScannerWriter{ 30 | splitFunc: splitFunc, 31 | tokenFunc: tokenFunc, 32 | max_buf: max_buf, 33 | } 34 | } 35 | 36 | func (sc *ScannerWriter) Write(data []byte) (int, error) { 37 | 38 | sc.me.Lock() 39 | defer sc.me.Unlock() 40 | 41 | if sc.closed { 42 | return 0, WriterClosedError 43 | } 44 | 45 | data_len := len(data) 46 | 47 | if sc.buf != nil { 48 | data = append(sc.buf, data...) 49 | sc.buf = nil 50 | } 51 | 52 | for len(data) > 0 { 53 | 54 | adv, token, err := sc.splitFunc(data, false) 55 | if err != nil { 56 | return 0, err 57 | } 58 | 59 | if token == nil { 60 | if adv == 0 { 61 | // read more requests are buffered until next write 62 | if len(sc.buf)+len(data) > sc.max_buf { 63 | return 0, ExceededBufferSizeLimitError 64 | } 65 | sc.buf = append(sc.buf, data...) 66 | return data_len, nil 67 | } 68 | } else if err := sc.tokenFunc(token); err != nil { 69 | return 0, err 70 | } 71 | 72 | if adv > 0 { 73 | data = data[adv:] 74 | } 75 | 76 | } 77 | 78 | return data_len, nil 79 | 80 | } 81 | 82 | func (sc *ScannerWriter) Flush() error { 83 | 84 | sc.me.Lock() 85 | defer sc.me.Unlock() 86 | 87 | return sc.flush() 88 | 89 | } 90 | 91 | func (sc *ScannerWriter) flush() error { 92 | 93 | if sc.closed { 94 | return WriterClosedError 95 | } 96 | 97 | if len(sc.buf) == 0 { 98 | sc.buf = nil 99 | return nil 100 | } 101 | 102 | _, token, err := sc.splitFunc(sc.buf, true) 103 | if err != nil { 104 | if err == io.EOF { 105 | return nil 106 | } 107 | return err 108 | } 109 | if len(token) > 0 { 110 | if err := sc.tokenFunc(token); err != nil { 111 | return err 112 | } 113 | } 114 | 115 | sc.buf = nil 116 | 117 | return nil 118 | 119 | } 120 | 121 | func (sc *ScannerWriter) Close() error { 122 | 123 | sc.me.Lock() 124 | defer sc.me.Unlock() 125 | 126 | if sc.closed { 127 | return WriterClosedError 128 | } 129 | 130 | if err := sc.flush(); err != nil { 131 | return err 132 | } 133 | 134 | sc.closed = true 135 | sc.buf = nil 136 | sc.splitFunc = nil 137 | sc.tokenFunc = nil 138 | 139 | return nil 140 | 141 | } 142 | -------------------------------------------------------------------------------- /logger.go: -------------------------------------------------------------------------------- 1 | package logger 2 | 3 | import ( 4 | "bufio" 5 | "encoding/json" 6 | "errors" 7 | "fmt" 8 | "io" 9 | "net/http" 10 | "net/url" 11 | "os" 12 | "runtime" 13 | "strings" 14 | "time" 15 | 16 | "github.com/aws/aws-sdk-go/aws" 17 | "github.com/aws/aws-sdk-go/aws/awserr" 18 | "github.com/aws/aws-sdk-go/aws/session" 19 | "github.com/aws/aws-sdk-go/service/cloudwatchlogs" 20 | "strconv" 21 | ) 22 | 23 | type ( 24 | Logger struct { 25 | w io.Writer 26 | 27 | // Service exposed for direct actions 28 | Service *cloudwatchlogs.CloudWatchLogs 29 | 30 | // logging tokens 31 | group *string 32 | stream *string 33 | sequenceToken *string 34 | 35 | // internal 36 | sw *ScannerWriter 37 | done chan struct{} 38 | } 39 | 40 | LogMessage struct { 41 | Instance *string `json:"instance"` 42 | Image *string `json:"image"` 43 | Level *string `json:"level"` 44 | Message *string `json:"message"` 45 | } 46 | ) 47 | 48 | const ( 49 | MaxMessageLength = 32 << 10 50 | ) 51 | 52 | var ( 53 | // this is how long a batch will continue to be retried, in the event CloudWatch is 54 | // not available. At which point the batch is dumped to stderr 55 | MaxRetryTime = time.Hour 56 | 57 | // the buffer length of the log event channel 58 | EventLogBufferLength = 64 << 10 59 | 60 | // this occurs when the buffered channel receiving log writes blocks 61 | ErrStreamBackedUp = errors.New("stream backed up") 62 | ) 63 | 64 | func NewLogger(sess *session.Session, endpoint, group, stream, level string, flushInterval time.Duration, image, instance string, x *bool) (*Logger, error) { 65 | config := aws.NewConfig() 66 | config.Endpoint = &endpoint 67 | l := &Logger{ 68 | Service: cloudwatchlogs.New(sess, config), 69 | group: &group, 70 | stream: &stream, 71 | done: make(chan struct{}), 72 | } 73 | 74 | events := make(chan *cloudwatchlogs.InputLogEvent, EventLogBufferLength) 75 | 76 | go func() { 77 | 78 | flushTime := time.NewTicker(flushInterval) 79 | defer flushTime.Stop() 80 | 81 | var logEvents []*cloudwatchlogs.InputLogEvent 82 | 83 | for { 84 | func() { 85 | defer func() { 86 | if e := recover(); e != nil { 87 | fmt.Fprintln(os.Stderr, "panic:", e) 88 | } 89 | }() 90 | select { 91 | case e := <-events: 92 | logEvents = append(logEvents, e) 93 | case <-flushTime.C: 94 | if len(logEvents) > 0 { 95 | l.flush(logEvents, x) 96 | logEvents = nil 97 | } 98 | case <-l.done: 99 | for { 100 | select { 101 | case e := <-events: 102 | logEvents = append(logEvents, e) 103 | default: 104 | l.flush(logEvents, x) 105 | l.done <- struct{}{} 106 | close(l.done) 107 | runtime.Goexit() 108 | } 109 | } 110 | } 111 | }() 112 | } 113 | 114 | }() 115 | 116 | l.sw = NewScannerWriter(bufio.ScanLines, MaxMessageLength, func(token []byte) error { 117 | message := string(token) 118 | 119 | m := &LogMessage{ 120 | Instance : &instance, 121 | Image : &image, 122 | Level : &level, 123 | Message : &message} 124 | 125 | json, _ := json.Marshal(m) 126 | s := string(json) 127 | 128 | if (*x) { 129 | println("J: " + s) 130 | } 131 | 132 | e := &cloudwatchlogs.InputLogEvent{ 133 | Timestamp: aws.Int64(time.Now().UnixNano() / int64(time.Millisecond)), 134 | Message: aws.String(s), 135 | } 136 | 137 | select { 138 | case events <- e: 139 | default: 140 | // we're backed up, drop to stderr 141 | fmt.Fprintf(os.Stderr, "%#v\n", e) 142 | // this error will never be caught because 143 | // no one ever checks the return values of log.* calls 144 | // but return it anyway to be a good citizen 145 | return ErrStreamBackedUp 146 | } 147 | 148 | return nil 149 | 150 | }) 151 | 152 | return l, nil 153 | 154 | } 155 | 156 | func eventLength(e *cloudwatchlogs.InputLogEvent) int { 157 | return len(*e.Message) + 26 // padding per spec 158 | } 159 | 160 | func (l *Logger) flush(logEvents []*cloudwatchlogs.InputLogEvent, x *bool) { 161 | 162 | // The maximum rate of a PutLogEvents request is 5 requests per second per log stream. 163 | rate := NewRateLimiter(5, time.Second) 164 | defer rate.Close() 165 | 166 | for len(logEvents) > 0 && rate.Ready() { 167 | 168 | var ( 169 | batchSize int 170 | batch []*cloudwatchlogs.InputLogEvent 171 | ) 172 | 173 | // None of the log events in the batch can be more than 2 hours in the future. 174 | // None of the log events in the batch can be older than 14 days or the retention period of the log group. 175 | // The log events in the batch must be in chronological ordered by their timestamp. 176 | const ( 177 | // The maximum batch size is 1,048,576 bytes, and this size is calculated as the sum of all messages in UTF-8, plus 26 bytes for each log entry. 178 | MaxBatchSize = 1 << 20 179 | // The maximum number of log events in a batch is 10,000. 180 | MaxBatchCount = 10000 181 | ) 182 | 183 | for batchSize < MaxBatchSize && 184 | len(batch) < MaxBatchCount && 185 | len(logEvents) > 0 { 186 | batch = append(batch, logEvents[0]) 187 | batchSize += eventLength(logEvents[0]) 188 | logEvents = logEvents[1:] 189 | } 190 | 191 | input := &cloudwatchlogs.PutLogEventsInput{ 192 | LogEvents: batch, 193 | LogGroupName: l.group, 194 | LogStreamName: l.stream, 195 | SequenceToken: l.sequenceToken, 196 | } 197 | 198 | if err := NewTrier(MaxRetryTime).TryFunc(func() (error, bool) { 199 | if (*x) { 200 | println("P: " + strconv.Itoa(len(input.LogEvents))) 201 | } 202 | resp, err := l.Service.PutLogEvents(input) 203 | if (*x) { 204 | println(fmt.Sprintf("R: %v", resp)) 205 | println(fmt.Sprintf("E: %v", err)) 206 | } 207 | 208 | if err != nil { 209 | if awsErr, ok := err.(awserr.Error); ok { 210 | switch awsErr.Code() { 211 | case "DataAlreadyAcceptedException": 212 | fmt.Fprintln(os.Stderr, "batch already added..") 213 | return nil, false 214 | case "ResourceNotFoundException": 215 | fmt.Fprintln(os.Stderr, "group or stream not found, creating...") 216 | if _, err := l.Service.CreateLogGroup(&cloudwatchlogs.CreateLogGroupInput{ 217 | LogGroupName: l.group, 218 | }); err != nil { 219 | fmt.Fprintf(os.Stderr, "create group err: %v", err) 220 | } 221 | if _, err = l.Service.CreateLogStream(&cloudwatchlogs.CreateLogStreamInput{ 222 | LogGroupName: l.group, 223 | LogStreamName: l.stream, 224 | }); err != nil { 225 | fmt.Fprintf(os.Stderr, "create stream err: %v", err) 226 | } 227 | return errors.New("retry"), true 228 | case "InvalidSequenceTokenException": 229 | // parse token from error (jank aws) 230 | // The given sequenceToken is invalid. The next expected sequenceToken is: 49540114571107725906840645449746451546762543407852177650 231 | msg := awsErr.Message() 232 | if i := strings.LastIndex(msg, " "); i > -1 { 233 | token := strings.TrimSpace(msg[i:]) 234 | input.SequenceToken = &token 235 | } 236 | return err, true 237 | // Returned if a parameter of the request is incorrectly specified. 238 | case "InvalidParameterException": 239 | fmt.Fprintln(os.Stderr, "aws error", awsErr.Code(), awsErr.Message(), awsErr.OrigErr()) 240 | return err, false 241 | } 242 | 243 | fmt.Fprintln(os.Stderr, "aws error", awsErr.Code(), awsErr.Message(), awsErr.OrigErr()) 244 | fmt.Fprintln(os.Stderr, "retrying...") 245 | 246 | return err, true 247 | } 248 | 249 | // Generic AWS error with Code, Message, and original error (if any) 250 | if reqErr, ok := err.(awserr.RequestFailure); ok { 251 | // A Service error occurred 252 | fmt.Fprintln(os.Stderr, "aws fail", reqErr.Code(), reqErr.Message(), reqErr.OrigErr()) 253 | return reqErr, false 254 | } 255 | 256 | // This case should never be hit, the SDK should always return an 257 | // error which satisfies the awserr.Error interface. 258 | fmt.Fprintf(os.Stderr, "unexpected err: %v\n", err) 259 | return err, false 260 | } 261 | 262 | l.sequenceToken = resp.NextSequenceToken 263 | 264 | return nil, false 265 | 266 | }); err != nil { 267 | failBatch(batch) 268 | } 269 | 270 | } 271 | 272 | } 273 | 274 | func failBatch(batch []*cloudwatchlogs.InputLogEvent) { 275 | // batch failed, drop it and move on 276 | fmt.Fprint(os.Stderr, "batch failed: ") 277 | if err := json.NewEncoder(os.Stderr).Encode(batch); err != nil { 278 | fmt.Fprintf(os.Stderr, "%#v\n", batch) 279 | } 280 | } 281 | 282 | func (l *Logger) Write(b []byte) (int, error) { 283 | return l.sw.Write(b) 284 | } 285 | 286 | func (l *Logger) WriteJSON(v interface{}) error { 287 | return json.NewEncoder(l).Encode(v) 288 | } 289 | 290 | func (l *Logger) WriteRoundTrip(resp *http.Response, duration time.Duration) error { 291 | type ( 292 | Request struct { 293 | Method string 294 | URL *url.URL 295 | Header http.Header 296 | ContentLength int64 297 | } 298 | Response struct { 299 | StatusCode int 300 | Header http.Header 301 | ContentLength int64 302 | } 303 | Payload struct { 304 | Type string 305 | Request Request 306 | Response Response 307 | Duration time.Duration 308 | } 309 | ) 310 | return l.WriteJSON(&Payload{ 311 | Type: "roundtrip", 312 | Request: Request{ 313 | Method: resp.Request.Method, 314 | URL: resp.Request.URL, 315 | Header: resp.Request.Header, 316 | ContentLength: resp.Request.ContentLength, 317 | }, 318 | Response: Response{ 319 | StatusCode: resp.StatusCode, 320 | Header: resp.Header, 321 | ContentLength: resp.ContentLength, 322 | }, 323 | Duration: duration, 324 | }) 325 | } 326 | 327 | func (l *Logger) WriteError(err error) error { 328 | type Payload struct { 329 | Type string 330 | FunctionName string 331 | FileName string 332 | Line int 333 | Error string 334 | } 335 | pc, fn, line, _ := runtime.Caller(1) 336 | return l.WriteJSON(&Payload{ 337 | Type: "error", 338 | FunctionName: runtime.FuncForPC(pc).Name(), 339 | FileName: fn, 340 | Line: line, 341 | Error: err.Error(), 342 | }) 343 | } 344 | 345 | func (l *Logger) Close() error { 346 | l.done <- struct{}{} 347 | <-l.done 348 | return nil 349 | } 350 | --------------------------------------------------------------------------------