├── .gitignore ├── Readme.md ├── internal └── queue │ ├── queue_test.go │ └── queue.go ├── datadog_test.go └── datadog.go /.gitignore: -------------------------------------------------------------------------------- 1 | .envrc -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | Easily send structured logs to [Datadog](https://www.datadoghq.com/) over TCP. 2 | 3 | [![GoDoc](https://godoc.org/github.com/matthewmueller/go-datadog?status.svg)](https://godoc.org/github.com/matthewmueller/go-datadog) 4 | 5 | ## Features 6 | 7 | - Implements `io.Writer` 8 | 9 | ```go 10 | key := os.Getenv("DATADOG_API_KEY") 11 | dd, err := datadog.Dial(&datadog.Config{APIKey: key}) 12 | defer dd.Close() 13 | client.Write([]byte("some log")) 14 | ``` 15 | 16 | - Implements `github.com/apex/log.Handler` 17 | 18 | ```go 19 | key := os.Getenv("DATADOG_API_KEY") 20 | dd, err := datadog.Dial(&datadog.Config{APIKey: key}) 21 | defer dd.Close() 22 | log := log.Logger{ 23 | Level: log.InfoLevel, 24 | Handler: dd, 25 | } 26 | log.Info("some log") 27 | ``` 28 | 29 | ## License 30 | 31 | MIT -------------------------------------------------------------------------------- /internal/queue/queue_test.go: -------------------------------------------------------------------------------- 1 | package queue_test 2 | 3 | import ( 4 | "sync" 5 | "testing" 6 | "time" 7 | 8 | "github.com/matthewmueller/go-datadog/internal/queue" 9 | ) 10 | 11 | // concurrency friendly buffer 12 | type buf struct { 13 | mu sync.Mutex 14 | b string 15 | } 16 | 17 | func (b *buf) Write(s string) { 18 | b.mu.Lock() 19 | defer b.mu.Unlock() 20 | b.b += s 21 | } 22 | 23 | func (b *buf) String() string { 24 | b.mu.Lock() 25 | defer b.mu.Unlock() 26 | return b.b 27 | } 28 | 29 | // func Test 30 | func TestQueue(t *testing.T) { 31 | var b buf 32 | q := queue.New(1, 1) 33 | block := make(chan struct{}) 34 | 35 | go func() { 36 | time.Sleep(200 * time.Millisecond) 37 | b.Write("2") 38 | block <- struct{}{} 39 | }() 40 | 41 | if err := q.Push(func() { 42 | b.Write("1") 43 | <-block 44 | }); err != nil { 45 | t.Fatal(err) 46 | } 47 | 48 | if err := q.Push(func() { 49 | b.Write("3") 50 | }); err != nil { 51 | t.Fatal(err) 52 | } 53 | 54 | q.Wait() 55 | if b.String() != "123" { 56 | t.Fatal(b.String() + " != 123") 57 | } 58 | } 59 | 60 | func TestClose(t *testing.T) { 61 | q := queue.New(1, 1) 62 | block := make(chan struct{}) 63 | 64 | if err := q.Push(func() { 65 | <-block 66 | }); err != nil { 67 | t.Fatal(err) 68 | } 69 | 70 | go func() { 71 | time.Sleep(200 * time.Millisecond) 72 | block <- struct{}{} 73 | }() 74 | 75 | q.Close() 76 | 77 | if err := q.Push(func() { 78 | }); err == nil { 79 | t.Fatal("expecting an error") 80 | } else if err != queue.ErrQueueClosed { 81 | t.Fatal(err) 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /internal/queue/queue.go: -------------------------------------------------------------------------------- 1 | package queue 2 | 3 | import ( 4 | "errors" 5 | "sync" 6 | ) 7 | 8 | // ErrQueueClosed returned when we close the queue 9 | var ErrQueueClosed = errors.New("queue is closed") 10 | 11 | // Queue that will never block, if the queue 12 | // is at capacity it will silently drop new 13 | // requests until we have additional capacity. 14 | type Queue struct { 15 | wg *sync.WaitGroup 16 | closed chan struct{} 17 | jobs chan func() 18 | } 19 | 20 | // New queue 21 | func New(capacity int, concurrency int) *Queue { 22 | if capacity == 0 { 23 | panic("capacity must be greater than or equal to 1") 24 | } 25 | if concurrency == 0 { 26 | panic("concurrency must be greater than or equal to 1") 27 | } 28 | 29 | jobs := make(chan func(), capacity-1) 30 | closed := make(chan struct{}) 31 | wg := &sync.WaitGroup{} 32 | 33 | // concurrent workers 34 | for i := 0; i < concurrency; i++ { 35 | go worker(wg, jobs) 36 | } 37 | 38 | return &Queue{wg, closed, jobs} 39 | } 40 | 41 | func worker(wg *sync.WaitGroup, jobs chan func()) { 42 | for job := range jobs { 43 | job() 44 | wg.Done() 45 | } 46 | } 47 | 48 | // Push a function into the queue be called 49 | // when ready. This will never block but it 50 | // may drop functions if we're at capacity. 51 | func (g *Queue) Push(fn func()) error { 52 | g.wg.Add(1) 53 | 54 | // handle closed, there may be pushes 55 | // that are passed this check. Let them 56 | // be processed 57 | select { 58 | case <-g.closed: 59 | g.wg.Done() 60 | return ErrQueueClosed 61 | default: 62 | } 63 | 64 | g.jobs <- fn 65 | return nil 66 | } 67 | 68 | // Wait until the queue has been drained 69 | func (g *Queue) Wait() { 70 | g.wg.Wait() 71 | } 72 | 73 | // Close the queue and wait until it's been drained 74 | func (g *Queue) Close() { 75 | close(g.closed) 76 | g.wg.Wait() 77 | } 78 | -------------------------------------------------------------------------------- /datadog_test.go: -------------------------------------------------------------------------------- 1 | // TODO: somehow verify the logs are in datadog 2 | package datadog_test 3 | 4 | import ( 5 | "bytes" 6 | "fmt" 7 | "os" 8 | "testing" 9 | 10 | "github.com/apex/log" 11 | "github.com/matthewmueller/go-datadog" 12 | ) 13 | 14 | func TestEnv(t *testing.T) { 15 | key := os.Getenv("DATADOG_API_KEY") 16 | if key == "" { 17 | t.Fatal("no DATADOG_API_KEY set") 18 | } 19 | } 20 | 21 | func TestConnect(t *testing.T) { 22 | key := os.Getenv("DATADOG_API_KEY") 23 | dd, err := datadog.Dial(&datadog.Config{ 24 | APIKey: key, 25 | }) 26 | if err != nil { 27 | t.Fatal(err) 28 | } 29 | if err := dd.Close(); err != nil { 30 | t.Fatal(err) 31 | } 32 | } 33 | 34 | func TestWrite(t *testing.T) { 35 | key := os.Getenv("DATADOG_API_KEY") 36 | dd, err := datadog.Dial(&datadog.Config{ 37 | APIKey: key, 38 | }) 39 | if err != nil { 40 | t.Fatal(err) 41 | } 42 | defer dd.Close() 43 | 44 | msg := `me too` 45 | n, err := dd.Write([]byte(msg)) 46 | if err != nil { 47 | t.Fatal(err) 48 | } else if len(msg) != n { 49 | t.Fatal("length mismatch") 50 | } 51 | } 52 | 53 | func TestMultilineWrite(t *testing.T) { 54 | key := os.Getenv("DATADOG_API_KEY") 55 | dd, err := datadog.Dial(&datadog.Config{ 56 | APIKey: key, 57 | }) 58 | if err != nil { 59 | t.Fatal(err) 60 | } 61 | defer dd.Close() 62 | 63 | msg := `me 64 | too 65 | too` 66 | n, err := dd.Write([]byte(msg)) 67 | if err != nil { 68 | t.Fatal(err) 69 | } else if len(msg) != n { 70 | t.Fatal("length mismatch") 71 | } 72 | } 73 | 74 | func TestApex(t *testing.T) { 75 | key := os.Getenv("DATADOG_API_KEY") 76 | dd, err := datadog.Dial(&datadog.Config{ 77 | APIKey: key, 78 | }) 79 | if err != nil { 80 | t.Fatal(err) 81 | } 82 | defer dd.Close() 83 | 84 | log := log.Logger{ 85 | Level: log.InfoLevel, 86 | Handler: dd, 87 | } 88 | 89 | log.WithField("some", "error").Error("error") 90 | log.WithField("some", "warning").Warn("warning") 91 | } 92 | 93 | func TestApexCloseEarly(t *testing.T) { 94 | var stderr bytes.Buffer 95 | 96 | key := os.Getenv("DATADOG_API_KEY") 97 | dd, err := datadog.Dial(&datadog.Config{APIKey: key}) 98 | if err != nil { 99 | t.Fatal(err) 100 | } 101 | 102 | log := log.Logger{ 103 | Level: log.InfoLevel, 104 | Handler: dd, 105 | } 106 | 107 | log.WithField("some", "error").Error("error") 108 | log.WithField("some", "warning").Warn("warning") 109 | dd.Close() 110 | log.WithField("some", "info").Info("info") 111 | 112 | fmt.Println(stderr.String()) 113 | // if stderr.String() != 114 | } 115 | -------------------------------------------------------------------------------- /datadog.go: -------------------------------------------------------------------------------- 1 | package datadog 2 | 3 | import ( 4 | "bytes" 5 | "crypto/tls" 6 | "encoding/json" 7 | "errors" 8 | "fmt" 9 | "io" 10 | "net" 11 | "os" 12 | "sync" 13 | "time" 14 | 15 | "github.com/cenkalti/backoff" 16 | "github.com/matthewmueller/go-datadog/internal/queue" 17 | 18 | "github.com/apex/log" 19 | ) 20 | 21 | var endpoint = "intake.logs.datadoghq.com:10516" 22 | var timeout = 20 * time.Second 23 | var errNoKey = errors.New("No API key provided. Generate one here: https://app.datadoghq.com/account/settings#api") 24 | 25 | // Config struct 26 | type Config struct { 27 | // API Key (required) 28 | APIKey string 29 | 30 | // Optional 31 | // See: https://docs.datadoghq.com/logs/#reserved-attributes 32 | Host string 33 | Service string 34 | Source string 35 | } 36 | 37 | // Dial datadog 38 | func Dial(cfg *Config) (*Datadog, error) { 39 | if cfg.APIKey == "" { 40 | return nil, errNoKey 41 | } 42 | 43 | d := &Datadog{ 44 | Config: cfg, 45 | Queue: queue.New(100, 1), 46 | } 47 | 48 | // establish the initial connection 49 | if err := d.dial(); err != nil { 50 | return nil, err 51 | } 52 | 53 | return d, nil 54 | } 55 | 56 | // Datadog struct 57 | type Datadog struct { 58 | Config *Config 59 | Queue *queue.Queue 60 | 61 | // mu protects the connection 62 | mu sync.Mutex 63 | conn net.Conn 64 | } 65 | 66 | var _ io.WriteCloser = (*Datadog)(nil) 67 | var _ log.Handler = (*Datadog)(nil) 68 | 69 | // Dial with reconnect 70 | func (d *Datadog) redial() error { 71 | backo := backoff.NewExponentialBackOff() 72 | retry: 73 | err := d.dial() 74 | if err == nil { 75 | return nil 76 | } 77 | sleep := backo.NextBackOff() 78 | if sleep == backoff.Stop { 79 | return errors.New("failed to reconnect") 80 | } 81 | time.Sleep(sleep) 82 | goto retry 83 | } 84 | 85 | func (d *Datadog) dial() error { 86 | dialer := &net.Dialer{ 87 | KeepAlive: 5 * time.Minute, 88 | Timeout: timeout, 89 | } 90 | conn, err := dialer.Dial("tcp", endpoint) 91 | if err != nil { 92 | return err 93 | } 94 | sslConn := tls.Client(conn, &tls.Config{ 95 | ServerName: "*.logs.datadoghq.com", 96 | }) 97 | // test the handshake beforehand 98 | if err := sslConn.Handshake(); err != nil { 99 | return err 100 | } 101 | 102 | // update the connection 103 | d.mu.Lock() 104 | d.conn = sslConn 105 | d.mu.Unlock() 106 | 107 | return nil 108 | } 109 | 110 | // HandleLog implements log.Handler 111 | // 112 | // Doesn't start blocking until the channel is full 113 | // If the queue is closed, this function returns 114 | // and error immediately 115 | func (d *Datadog) HandleLog(l *log.Entry) error { 116 | return d.Queue.Push(func() { d.send(l) }) 117 | } 118 | 119 | // Send the entry to datadog 120 | // TODO: any better way to handle errors here? 121 | func (d *Datadog) send(e *log.Entry) error { 122 | entry := map[string]interface{}{} 123 | entry["host"] = d.Config.Host 124 | entry["service"] = d.Config.Service 125 | entry["source"] = d.Config.Source 126 | 127 | for k, v := range e.Fields { 128 | entry[k] = v 129 | } 130 | 131 | entry["level"] = e.Level 132 | entry["message"] = e.Message 133 | entry["timestamp"] = e.Timestamp.Format(time.RFC3339) 134 | 135 | buf, err := json.Marshal(entry) 136 | if err != nil { 137 | fmt.Fprintln(os.Stderr, err) 138 | return err 139 | } 140 | 141 | if _, err := d.Write(buf); err != nil { 142 | fmt.Fprintln(os.Stderr, err) 143 | return err 144 | } 145 | 146 | return nil 147 | } 148 | 149 | // Write to the tcp connection 150 | // TODO: retry until we reach n == len(buf) 151 | func (d *Datadog) Write(b []byte) (int, error) { 152 | var buf bytes.Buffer 153 | buf.WriteString(d.Config.APIKey) 154 | buf.WriteString(" ") 155 | buf.Write(b) 156 | buf.WriteString("\n") 157 | 158 | d.conn.SetWriteDeadline(time.Now().Add(5 * time.Second)) 159 | n, err := io.Copy(d.conn, &buf) 160 | if err != nil { 161 | if err := d.redial(); err != nil { 162 | return int(n), err 163 | } 164 | return d.Write(b) 165 | } 166 | return len(b), nil 167 | } 168 | 169 | // Close the datadog connection 170 | func (d *Datadog) Close() error { 171 | // ignore any new logs that come in 172 | // process the logs that we have 173 | d.Queue.Close() 174 | 175 | // close the TCP connection 176 | return d.conn.Close() 177 | } 178 | 179 | // Flush the queue 180 | func (d *Datadog) Flush() { 181 | d.Queue.Wait() 182 | } 183 | --------------------------------------------------------------------------------