├── runtime_metrics
    ├── fds_darwin.go
    ├── fds_linux.go
    ├── runtime_metrics_test.go
    └── runtime_metrics.go
├── identifier.go
├── TODO
├── error.go
├── healthd
    ├── debouncer.go
    ├── mock_time.go
    ├── debouncer_test.go
    ├── poll.go
    ├── poll_test.go
    ├── healthd_test.go
    ├── api.go
    └── healthd.go
├── stack
    ├── stack.go
    ├── frame.go
    └── stack_test.go
├── LICENSE
├── json_polling_sink_test.go
├── sinks
    ├── bugsnag
    │   ├── sink_test.go
    │   ├── api_test.go
    │   ├── sink.go
    │   └── api.go
    └── librato
    │   ├── sink_test.go
    │   └── sink.go
├── json_polling_sink_http.go
├── json_polling_sink_http_test.go
├── interval_aggregation_clone.go
├── cmd
    ├── healthtop
    │   ├── main.go
    │   ├── hosts.go
    │   └── jobs.go
    └── healthd
    │   └── main.go
├── json_writer_sink.go
├── interval_aggregation_merge.go
├── json_polling_sink.go
├── error_test.go
├── interval_aggregation_merge_test.go
├── health_test.go
├── writer_sink.go
├── json_writer_sink_test.go
├── interval_aggregation_clone_test.go
├── interval_aggregation.go
├── aggregator.go
├── health.go
├── writer_sink_test.go
├── aggregator_test.go
├── statsd_sink.go
├── statsd_sink_test.go
└── README.md


/runtime_metrics/fds_darwin.go:
--------------------------------------------------------------------------------
1 | package runtime_metrics
2 | 
3 | func getFDUsage() (uint64, error) {
4 | 	return 0, nil
5 | }
6 | 


--------------------------------------------------------------------------------
/runtime_metrics/fds_linux.go:
--------------------------------------------------------------------------------
 1 | package runtime_metrics
 2 | 
 3 | import (
 4 | 	"io/ioutil"
 5 | )
 6 | 
 7 | func getFDUsage() (uint64, error) {
 8 | 	fds, err := ioutil.ReadDir("/proc/self/fd")
 9 | 	if err != nil {
10 | 		return 0, err
11 | 	}
12 | 	return uint64(len(fds)), nil
13 | }
14 | 


--------------------------------------------------------------------------------
/identifier.go:
--------------------------------------------------------------------------------
 1 | package health
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"os"
 6 | )
 7 | 
 8 | var Identifier = getIdentifier()
 9 | 
10 | func getIdentifier() string {
11 | 	pid := os.Getpid()
12 | 	host, err := os.Hostname()
13 | 	if err != nil {
14 | 		host = "hostname_errored"
15 | 	}
16 | 
17 | 	return fmt.Sprintf("%s.%d", host, pid)
18 | }
19 | 


--------------------------------------------------------------------------------
/TODO:
--------------------------------------------------------------------------------
1 |  - make sure bugsnag works w/o Hostname and ReleaseStage (eg defaults work)
2 |  - make sure healthtop default sort works or at least figure out what it is
3 |  - in readme make sure I have the right syntax for http.Handle.
4 |  - in readme make sure I get samples of how everything is logged.
5 |  - screenshots for healthtop
6 |  - remove self-logging in healthd


--------------------------------------------------------------------------------
/error.go:
--------------------------------------------------------------------------------
 1 | package health
 2 | 
 3 | import (
 4 | 	"github.com/gocraft/health/stack"
 5 | )
 6 | 
 7 | type MutedError struct {
 8 | 	Err error
 9 | }
10 | 
11 | type UnmutedError struct {
12 | 	Err     error
13 | 	Stack   *stack.Trace
14 | 	Emitted bool
15 | }
16 | 
17 | func (e *MutedError) Error() string {
18 | 	return e.Err.Error()
19 | }
20 | 
21 | func (e *UnmutedError) Error() string {
22 | 	return e.Err.Error()
23 | }
24 | 
25 | func Mute(err error) *MutedError {
26 | 	return &MutedError{Err: err}
27 | }
28 | 
29 | func wrapErr(err error) error {
30 | 	switch err := err.(type) {
31 | 	case *MutedError, *UnmutedError:
32 | 		return err
33 | 	default:
34 | 		return &UnmutedError{Err: err, Stack: stack.NewTrace(2)}
35 | 	}
36 | }
37 | 


--------------------------------------------------------------------------------
/healthd/debouncer.go:
--------------------------------------------------------------------------------
 1 | package healthd
 2 | 
 3 | import (
 4 | 	"time"
 5 | )
 6 | 
 7 | // A) don't fire more than every 2 seconds B) the time between an input and output should be at most 2 seconds
 8 | func debouncer(doit chan<- struct{}, needitdone <-chan struct{}, threshold time.Duration, sleepTime time.Duration) {
 9 | 	var oldestNeedItDone time.Time
10 | 
11 | 	for {
12 | 		select {
13 | 		case <-needitdone:
14 | 			if oldestNeedItDone.IsZero() {
15 | 				oldestNeedItDone = now()
16 | 			}
17 | 		default:
18 | 			// This sleep time is the max error that we'll be off by.
19 | 			time.Sleep(sleepTime)
20 | 		}
21 | 
22 | 		if !oldestNeedItDone.IsZero() && (now().Sub(oldestNeedItDone) > threshold) {
23 | 			doit <- struct{}{}
24 | 			oldestNeedItDone = time.Time{} // Zero the object
25 | 		}
26 | 	}
27 | }
28 | 


--------------------------------------------------------------------------------
/healthd/mock_time.go:
--------------------------------------------------------------------------------
 1 | package healthd
 2 | 
 3 | import (
 4 | 	"sync"
 5 | 	"time"
 6 | )
 7 | 
 8 | var nowMock time.Time
 9 | var nowMut sync.RWMutex
10 | 
11 | func now() time.Time {
12 | 	nowMut.RLock()
13 | 	defer nowMut.RUnlock()
14 | 	if nowMock.IsZero() {
15 | 		return time.Now()
16 | 	}
17 | 	return nowMock
18 | }
19 | 
20 | func setNowMock(t string) {
21 | 	var err error
22 | 	nowMut.Lock()
23 | 	defer nowMut.Unlock()
24 | 	nowMock, err = time.Parse(time.RFC3339, t)
25 | 	if err != nil {
26 | 		panic(err)
27 | 	}
28 | }
29 | 
30 | func advanceNowMock(dur time.Duration) {
31 | 	nowMut.Lock()
32 | 	defer nowMut.Unlock()
33 | 	if nowMock.IsZero() {
34 | 		panic("nowMock is not set")
35 | 	}
36 | 	nowMock = nowMock.Add(dur)
37 | }
38 | 
39 | func resetNowMock() {
40 | 	nowMut.Lock()
41 | 	defer nowMut.Unlock()
42 | 	nowMock = time.Time{}
43 | }
44 | 


--------------------------------------------------------------------------------
/stack/stack.go:
--------------------------------------------------------------------------------
 1 | package stack
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"runtime"
 6 | )
 7 | 
 8 | // MaxStackDepth is the maximum number of stackframes on any error.
 9 | var MaxStackDepth = 50
10 | 
11 | type Trace struct {
12 | 	stack  []uintptr
13 | 	frames []Frame
14 | }
15 | 
16 | func NewTrace(skip int) *Trace {
17 | 	stack := make([]uintptr, MaxStackDepth)
18 | 	length := runtime.Callers(2+skip, stack)
19 | 	return &Trace{
20 | 		stack: stack[:length],
21 | 	}
22 | }
23 | 
24 | // StackFrames returns an array of frames containing information about the stack.
25 | func (t *Trace) Frames() []Frame {
26 | 	if t.frames == nil {
27 | 		t.frames = make([]Frame, len(t.stack))
28 | 
29 | 		for i, pc := range t.stack {
30 | 			t.frames[i] = NewFrame(pc)
31 | 		}
32 | 	}
33 | 
34 | 	return t.frames
35 | }
36 | 
37 | // Stack returns a formatted callstack.
38 | func (t *Trace) Stack() []byte {
39 | 	buf := bytes.Buffer{}
40 | 
41 | 	for _, frame := range t.Frames() {
42 | 		buf.WriteString(frame.String())
43 | 		buf.WriteRune('\n')
44 | 	}
45 | 
46 | 	return buf.Bytes()
47 | }
48 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Jonathan Novak
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | this software and associated documentation files (the "Software"), to deal in
 7 | the Software without restriction, including without limitation the rights to
 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software is furnished to do so,
10 | subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 


--------------------------------------------------------------------------------
/json_polling_sink_test.go:
--------------------------------------------------------------------------------
 1 | package health
 2 | 
 3 | import (
 4 | 	"errors"
 5 | 	"testing"
 6 | 	"time"
 7 | 
 8 | 	"github.com/stretchr/testify/assert"
 9 | )
10 | 
11 | func TestJsonPollingSink(t *testing.T) {
12 | 	setNowMock("2011-09-09T23:36:13Z")
13 | 	defer resetNowMock()
14 | 
15 | 	sink := NewJsonPollingSink(time.Minute, time.Minute*5)
16 | 
17 | 	sink.EmitEvent("myjob", "myevent", nil)
18 | 	sink.EmitEventErr("myjob", "myevent", errors.New("myerr"), nil)
19 | 	sink.EmitTiming("myjob", "myevent", 100, nil)
20 | 	sink.EmitGauge("myjob", "myevent", 3.14, nil)
21 | 	sink.EmitComplete("myjob", Success, 9, nil)
22 | 
23 | 	time.Sleep(10 * time.Millisecond) // we need to make sure we process the above metrics before we get the metrics.
24 | 	intervals := sink.GetMetrics()
25 | 
26 | 	sink.ShutdownServer()
27 | 
28 | 	assert.Equal(t, 1, len(intervals))
29 | 
30 | 	intAgg := intervals[0]
31 | 	assert.EqualValues(t, 1, intAgg.Events["myevent"])
32 | 	assert.EqualValues(t, 3.14, intAgg.Gauges["myevent"])
33 | 	assert.EqualValues(t, 1, intAgg.EventErrs["myevent"].Count)
34 | 	assert.EqualValues(t, 1, intAgg.Timers["myevent"].Count)
35 | 	assert.EqualValues(t, 1, intAgg.Jobs["myjob"].Count)
36 | }
37 | 


--------------------------------------------------------------------------------
/healthd/debouncer_test.go:
--------------------------------------------------------------------------------
 1 | package healthd
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 	"time"
 6 | )
 7 | 
 8 | func TestDebouncer(t *testing.T) {
 9 | 	doit := make(chan struct{})
10 | 	needitdone := make(chan struct{})
11 | 
12 | 	setNowMock("2011-09-09T23:36:13Z")
13 | 	defer resetNowMock()
14 | 
15 | 	go debouncer(doit, needitdone, time.Second*2, time.Millisecond)
16 | 
17 | 	needitdone <- struct{}{}
18 | 	needitdone <- struct{}{}
19 | 
20 | 	time.Sleep(time.Millisecond * 2)
21 | 
22 | 	select {
23 | 	case <-doit:
24 | 		t.Error("Did it too soon")
25 | 	default:
26 | 		// cool
27 | 	}
28 | 
29 | 	advanceNowMock(time.Second * 1)
30 | 	time.Sleep(time.Millisecond * 2) // Need the goroutine to wake up
31 | 
32 | 	select {
33 | 	case <-doit:
34 | 		t.Error("Did it too soon")
35 | 	default:
36 | 		// cool
37 | 	}
38 | 
39 | 	advanceNowMock(time.Second * 2)
40 | 	time.Sleep(time.Millisecond * 2) // Need the goroutine to wake up
41 | 
42 | 	select {
43 | 	case <-doit:
44 | 		// cool
45 | 	default:
46 | 		t.Error("never did it")
47 | 	}
48 | 
49 | 	time.Sleep(time.Millisecond * 2) // Need the goroutine to wake up
50 | 
51 | 	select {
52 | 	case <-doit:
53 | 		t.Error("should only do it once")
54 | 	default:
55 | 		// cool
56 | 	}
57 | }
58 | 


--------------------------------------------------------------------------------
/sinks/bugsnag/sink_test.go:
--------------------------------------------------------------------------------
 1 | package bugsnag
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"github.com/gocraft/health"
 6 | 	"github.com/gocraft/health/stack"
 7 | 	"github.com/stretchr/testify/assert"
 8 | 	"net/http"
 9 | 	"testing"
10 | 	"time"
11 | )
12 | 
13 | func TestSink(t *testing.T) {
14 | 	config := &Config{
15 | 		APIKey:       "abcd",
16 | 		Endpoint:     "http://localhost:5052/",
17 | 		ReleaseStage: "staging",
18 | 		AppVersion:   "1.0",
19 | 		Hostname:     "",
20 | 	}
21 | 
22 | 	s := NewSink(config)
23 | 	defer s.ShutdownServer()
24 | 
25 | 	n := notifyHandler{
26 | 		PayloadChan: make(chan *payload, 2),
27 | 	}
28 | 
29 | 	go http.ListenAndServe(":5052", n)
30 | 
31 | 	err := &health.UnmutedError{Err: fmt.Errorf("err str"), Stack: stack.NewTrace(2)}
32 | 	s.EmitEventErr("thejob", "theevent", err, nil)
33 | 
34 | 	p := <-n.PayloadChan
35 | 	evt := p.Events[0]
36 | 	assert.Equal(t, evt.Context, "thejob")
37 | 
38 | 	ex := evt.Exceptions[0]
39 | 	assert.Equal(t, ex.ErrorClass, "theevent")
40 | 	assert.Equal(t, ex.Message, "err str")
41 | 
42 | 	err.Emitted = true
43 | 	s.EmitEventErr("thejob", "theevent2", err, nil)
44 | 
45 | 	time.Sleep(1 * time.Millisecond)
46 | 
47 | 	select {
48 | 	case <-n.PayloadChan:
49 | 		t.Errorf("did not expect payload")
50 | 	default:
51 | 		// yay
52 | 	}
53 | }
54 | 


--------------------------------------------------------------------------------
/json_polling_sink_http.go:
--------------------------------------------------------------------------------
 1 | package health
 2 | 
 3 | import (
 4 | 	"encoding/json"
 5 | 	"fmt"
 6 | 	"net/http"
 7 | 	"time"
 8 | )
 9 | 
10 | type HealthAggregationsResponse struct {
11 | 	InstanceId           string                 `json:"instance_id"`
12 | 	IntervalDuration     time.Duration          `json:"interval_duration"`
13 | 	IntervalAggregations []*IntervalAggregation `json:"aggregations"`
14 | }
15 | 
16 | func (s *JsonPollingSink) StartServer(addr string) {
17 | 	go http.ListenAndServe(addr, s)
18 | }
19 | 
20 | func (s *JsonPollingSink) ServeHTTP(rw http.ResponseWriter, r *http.Request) {
21 | 	rw.Header().Set("Content-Type", "application/json; charset=utf-8")
22 | 	if r.URL.Path == "/health" {
23 | 		metrics := s.GetMetrics()
24 | 		response := &HealthAggregationsResponse{
25 | 			InstanceId:           Identifier,
26 | 			IntervalDuration:     s.intervalDuration,
27 | 			IntervalAggregations: metrics,
28 | 		}
29 | 		jsonData, err := json.MarshalIndent(response, "", "\t")
30 | 		if err != nil {
31 | 			renderError(rw, err)
32 | 			return
33 | 		}
34 | 		fmt.Fprintf(rw, string(jsonData))
35 | 	} else {
36 | 		renderNotFound(rw)
37 | 	}
38 | }
39 | 
40 | func renderNotFound(rw http.ResponseWriter) {
41 | 	rw.WriteHeader(404)
42 | 	fmt.Fprintf(rw, `{"error": "not_found"}`)
43 | }
44 | 
45 | func renderError(rw http.ResponseWriter, err error) {
46 | 	rw.WriteHeader(500)
47 | 	fmt.Fprintf(rw, `{"error": "%s"}`, err.Error())
48 | }
49 | 


--------------------------------------------------------------------------------
/json_polling_sink_http_test.go:
--------------------------------------------------------------------------------
 1 | package health
 2 | 
 3 | import (
 4 | 	"encoding/json"
 5 | 	"fmt"
 6 | 	"github.com/stretchr/testify/assert"
 7 | 	"net/http"
 8 | 	"net/http/httptest"
 9 | 	"testing"
10 | 	"time"
11 | )
12 | 
13 | func TestJsonPollingSinkServerSuccess(t *testing.T) {
14 | 	sink := NewJsonPollingSink(time.Minute, time.Minute*5)
15 | 	defer sink.ShutdownServer()
16 | 
17 | 	sink.EmitEvent("myjob", "myevent", nil)
18 | 	sink.EmitEventErr("myjob", "myevent", fmt.Errorf("myerr"), nil)
19 | 	sink.EmitTiming("myjob", "myevent", 100, nil)
20 | 	sink.EmitGauge("myjob", "myevent", 3.14, nil)
21 | 	sink.EmitComplete("myjob", Success, 9, nil)
22 | 
23 | 	time.Sleep(10 * time.Millisecond)
24 | 
25 | 	recorder := httptest.NewRecorder()
26 | 	request, _ := http.NewRequest("GET", "/health", nil)
27 | 
28 | 	sink.ServeHTTP(recorder, request)
29 | 
30 | 	assert.Equal(t, 200, recorder.Code)
31 | 
32 | 	var resp HealthAggregationsResponse
33 | 	err := json.Unmarshal(recorder.Body.Bytes(), &resp)
34 | 	assert.NoError(t, err)
35 | 	assert.Equal(t, 1, len(resp.IntervalAggregations))
36 | 	assert.Equal(t, map[string]int64{"myevent": 1}, resp.IntervalAggregations[0].Events)
37 | }
38 | 
39 | func TestJsonPollingSinkServerNotFound(t *testing.T) {
40 | 	sink := NewJsonPollingSink(time.Minute, time.Minute*5)
41 | 	defer sink.ShutdownServer()
42 | 
43 | 	recorder := httptest.NewRecorder()
44 | 	request, _ := http.NewRequest("GET", "/wat", nil)
45 | 	sink.ServeHTTP(recorder, request)
46 | 	assert.Equal(t, 404, recorder.Code)
47 | }
48 | 


--------------------------------------------------------------------------------
/runtime_metrics/runtime_metrics_test.go:
--------------------------------------------------------------------------------
 1 | package runtime_metrics
 2 | 
 3 | import (
 4 | 	"testing"
 5 | )
 6 | 
 7 | type testReceiver struct {
 8 | 	gauges map[string]float64
 9 | }
10 | 
11 | func TestRuntimeMetrics(t *testing.T) {
12 | 	tr := &testReceiver{
13 | 		gauges: make(map[string]float64),
14 | 	}
15 | 	m := NewRuntimeMetrics(tr, nil)
16 | 	m.Start()
17 | 	defer m.Stop()
18 | 	m.Report()
19 | 
20 | 	expectedKeys := []string{"heap_objects", "alloc", "num_gc", "next_gc", "gc_cpu_fraction", "pause_total_ns", "gc_pause_quantile_50", "gc_pause_quantile_max", "num_cgo_call", "num_goroutines", "num_fds_used"}
21 | 
22 | 	for _, k := range expectedKeys {
23 | 		if _, ok := tr.gauges[k]; !ok {
24 | 			t.Errorf("expected to have key %s but didn't. map=%v", k, tr.gauges)
25 | 		}
26 | 	}
27 | }
28 | 
29 | func (t *testReceiver) Event(eventName string) {
30 | 
31 | }
32 | 
33 | func (t *testReceiver) EventKv(eventName string, kvs map[string]string) {
34 | 
35 | }
36 | 
37 | func (t *testReceiver) EventErr(eventName string, err error) error {
38 | 	return nil
39 | }
40 | 
41 | func (t *testReceiver) EventErrKv(eventName string, err error, kvs map[string]string) error {
42 | 	return nil
43 | }
44 | 
45 | func (t *testReceiver) Timing(eventName string, nanoseconds int64) {
46 | 
47 | }
48 | 
49 | func (t *testReceiver) TimingKv(eventName string, nanoseconds int64, kvs map[string]string) {
50 | 
51 | }
52 | 
53 | func (t *testReceiver) Gauge(eventName string, value float64) {
54 | 	t.gauges[eventName] = value
55 | }
56 | 
57 | func (t *testReceiver) GaugeKv(eventName string, value float64, kvs map[string]string) {
58 | 
59 | }
60 | 


--------------------------------------------------------------------------------
/interval_aggregation_clone.go:
--------------------------------------------------------------------------------
 1 | package health
 2 | 
 3 | // Clone does a deep clone of ia, duplicating all maps and whatnot.
 4 | func (ia *IntervalAggregation) Clone() *IntervalAggregation {
 5 | 	dup := &IntervalAggregation{}
 6 | 	dup.IntervalStart = ia.IntervalStart
 7 | 	dup.SerialNumber = ia.SerialNumber
 8 | 	dup.aggregationMaps = *ia.aggregationMaps.Clone()
 9 | 
10 | 	dup.Jobs = make(map[string]*JobAggregation)
11 | 	for k, v := range ia.Jobs {
12 | 		dup.Jobs[k] = v.Clone()
13 | 	}
14 | 
15 | 	return dup
16 | }
17 | 
18 | func (am *aggregationMaps) Clone() *aggregationMaps {
19 | 	dup := &aggregationMaps{}
20 | 
21 | 	dup.initAggregationMaps()
22 | 
23 | 	for k, v := range am.Events {
24 | 		dup.Events[k] = v
25 | 	}
26 | 
27 | 	for k, v := range am.Gauges {
28 | 		dup.Gauges[k] = v
29 | 	}
30 | 
31 | 	for k, v := range am.Timers {
32 | 		dup.Timers[k] = v.Clone()
33 | 	}
34 | 
35 | 	for k, v := range am.EventErrs {
36 | 		dup.EventErrs[k] = v.Clone()
37 | 	}
38 | 
39 | 	return dup
40 | }
41 | 
42 | func (ta *TimerAggregation) Clone() *TimerAggregation {
43 | 	var dup = *ta
44 | 	return &dup
45 | }
46 | 
47 | func (ec *ErrorCounter) Clone() *ErrorCounter {
48 | 	var dup = *ec
49 | 	return &dup
50 | }
51 | 
52 | func (ja *JobAggregation) Clone() *JobAggregation {
53 | 	dup := &JobAggregation{
54 | 		CountSuccess:         ja.CountSuccess,
55 | 		CountValidationError: ja.CountValidationError,
56 | 		CountPanic:           ja.CountPanic,
57 | 		CountError:           ja.CountError,
58 | 		CountJunk:            ja.CountJunk,
59 | 	}
60 | 
61 | 	dup.aggregationMaps = *ja.aggregationMaps.Clone()
62 | 	dup.TimerAggregation = ja.TimerAggregation
63 | 
64 | 	return dup
65 | }
66 | 


--------------------------------------------------------------------------------
/healthd/poll.go:
--------------------------------------------------------------------------------
 1 | package healthd
 2 | 
 3 | import (
 4 | 	"encoding/json"
 5 | 	"github.com/gocraft/health"
 6 | 	"io/ioutil"
 7 | 	"net/http"
 8 | 	"time"
 9 | )
10 | 
11 | type pollResponse struct {
12 | 	HostPort  string
13 | 	Timestamp time.Time
14 | 
15 | 	Err   error
16 | 	Code  int
17 | 	Nanos int64
18 | 
19 | 	health.HealthAggregationsResponse
20 | }
21 | 
22 | // poll checks a server
23 | func poll(stream *health.Stream, hostPort string, responses chan<- *pollResponse) {
24 | 	job := stream.NewJob("poll")
25 | 
26 | 	var body []byte
27 | 	var err error
28 | 
29 | 	response := &pollResponse{
30 | 		HostPort:  hostPort,
31 | 		Timestamp: now(),
32 | 	}
33 | 
34 | 	start := time.Now()
35 | 
36 | 	client := &http.Client{
37 | 		Timeout: 5 * time.Second,
38 | 	}
39 | 
40 | 	resp, err := client.Get(metricsUrl(hostPort))
41 | 	if err != nil {
42 | 		response.Err = job.EventErr("poll.client.get", err)
43 | 		goto POLL_FINISH
44 | 	}
45 | 	defer resp.Body.Close()
46 | 	body, err = ioutil.ReadAll(resp.Body)
47 | 
48 | 	response.Nanos = time.Since(start).Nanoseconds() // don't mock b/c we need duration
49 | 	response.Code = resp.StatusCode
50 | 
51 | 	if err != nil { // ioutil.ReadAll. We're checking here b/c we still want to capture nanos/code
52 | 		response.Err = job.EventErr("poll.ioutil.read_all", err)
53 | 		goto POLL_FINISH
54 | 	}
55 | 
56 | 	if err := json.Unmarshal(body, &response.HealthAggregationsResponse); err != nil {
57 | 		response.Err = job.EventErr("poll.json.unmarshall", err)
58 | 		goto POLL_FINISH
59 | 	}
60 | 
61 | POLL_FINISH:
62 | 
63 | 	if response.Err != nil {
64 | 		job.CompleteKv(health.Error, health.Kvs{"host_port": hostPort})
65 | 	} else {
66 | 		job.CompleteKv(health.Success, health.Kvs{"host_port": hostPort})
67 | 	}
68 | 
69 | 	responses <- response
70 | }
71 | 
72 | func metricsUrl(hostPort string) string {
73 | 	return "http://" + hostPort + "/health"
74 | }
75 | 


--------------------------------------------------------------------------------
/cmd/healthtop/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"github.com/spf13/cobra"
 6 | 	"time"
 7 | )
 8 | 
 9 | // v2:
10 | // jobs/min vs jobs total vs jobs/sec (eg normalization)
11 | // - errors ??????
12 | // - tests
13 | // - switch views/sorts while inside healthd
14 | 
15 | type healthdStatus struct {
16 | 	lastSuccessAt time.Time
17 | 	lastErrorAt   time.Time
18 | 	lastError     error
19 | }
20 | 
21 | func (s *healthdStatus) FmtNow() string {
22 | 	return time.Now().Format(time.RFC1123)
23 | }
24 | 
25 | func (s *healthdStatus) FmtStatus() string {
26 | 	if s.lastErrorAt.IsZero() && s.lastSuccessAt.IsZero() {
27 | 		return "[starting...]"
28 | 	} else if s.lastErrorAt.After(s.lastSuccessAt) {
29 | 		return fmt.Sprint("[error: '", s.lastError.Error(), "'    LastErrorAt: ", s.lastErrorAt.Format(time.RFC1123), "]")
30 | 	} else {
31 | 		return "[success]"
32 | 	}
33 | }
34 | 
35 | var sourceHostPort string
36 | 
37 | func main() {
38 | 	var cmdRoot = &cobra.Command{
39 | 		Use: "healthtop [command]",
40 | 	}
41 | 	cmdRoot.PersistentFlags().StringVar(&sourceHostPort, "source", "localhost:5032", "source is the host:port of the healthd to query. ex: localhost:5031")
42 | 
43 | 	var sort string
44 | 	var name string
45 | 
46 | 	var cmdJobs = &cobra.Command{
47 | 		Use:   "jobs",
48 | 		Short: "list jobs",
49 | 		Run: func(cmd *cobra.Command, args []string) {
50 | 			jobsLoop(&jobOptions{Name: name, Sort: sort})
51 | 		},
52 | 	}
53 | 
54 | 	cmdJobs.Flags().StringVar(&sort, "sort", "name", "sort ∈ {name, count, count_success, count_XXX, min, max, avg}")
55 | 	cmdJobs.Flags().StringVar(&name, "name", "", "name is a partial match on the name")
56 | 
57 | 	var cmdHosts = &cobra.Command{
58 | 		Use:   "hosts",
59 | 		Short: "list hosts",
60 | 		Run: func(cmd *cobra.Command, args []string) {
61 | 			hostsLoop()
62 | 		},
63 | 	}
64 | 
65 | 	cmdRoot.AddCommand(cmdJobs)
66 | 	cmdRoot.AddCommand(cmdHosts)
67 | 	cmdRoot.Execute()
68 | }
69 | 


--------------------------------------------------------------------------------
/healthd/poll_test.go:
--------------------------------------------------------------------------------
 1 | package healthd
 2 | 
 3 | import (
 4 | 	"encoding/json"
 5 | 	"fmt"
 6 | 	"net/http"
 7 | 	"testing"
 8 | 	"time"
 9 | 
10 | 	"github.com/braintree/manners"
11 | 	"github.com/gocraft/health"
12 | 	"github.com/stretchr/testify/assert"
13 | )
14 | 
15 | func TestPoll(t *testing.T) {
16 | 	setNowMock("2011-09-09T23:36:13Z")
17 | 	defer resetNowMock()
18 | 
19 | 	intAgg := health.NewIntervalAggregation(now())
20 | 	data := &health.HealthAggregationsResponse{
21 | 		InstanceId:           "web22.12345",
22 | 		IntervalDuration:     time.Minute,
23 | 		IntervalAggregations: []*health.IntervalAggregation{intAgg},
24 | 	}
25 | 	stop := serveJson(":5050", data)
26 | 	defer func() {
27 | 		stop()
28 | 	}()
29 | 
30 | 	responses := make(chan *pollResponse, 2)
31 | 	poll(health.NewStream(), ":5050", responses)
32 | 	response := <-responses
33 | 
34 | 	assert.NotNil(t, response)
35 | 	assert.Equal(t, response.HostPort, ":5050")
36 | 	assert.Equal(t, response.Timestamp, now())
37 | 	assert.Nil(t, response.Err)
38 | 	assert.Equal(t, response.Code, 200)
39 | 	assert.True(t, response.Nanos > 0 && response.Nanos < int64(time.Second))
40 | 	assert.Equal(t, response.InstanceId, "web22.12345")
41 | 	// we'll just "trust" that the other stuff gets unmarshalled correctly. We didn't really put anything in there anyway in this test.
42 | }
43 | 
44 | // serveJson will start a server on the hostPort and serve any path the Jsonified data.
45 | // Each successive HTTP request will return the next data.
46 | // If there is only one data, it will be returned on each request.
47 | func serveJson(hostPort string, data ...interface{}) func() bool {
48 | 	var curData = 0
49 | 
50 | 	var f http.HandlerFunc
51 | 	f = func(rw http.ResponseWriter, r *http.Request) {
52 | 		d := data[curData]
53 | 		curData = (curData + 1) % len(data)
54 | 		jsonData, err := json.MarshalIndent(d, "", "\t")
55 | 		if err != nil {
56 | 			panic(err)
57 | 		}
58 | 		fmt.Fprintf(rw, string(jsonData))
59 | 	}
60 | 
61 | 	go manners.ListenAndServe(hostPort, f)
62 | 	time.Sleep(10 * time.Millisecond)
63 | 
64 | 	return manners.Close
65 | }
66 | 


--------------------------------------------------------------------------------
/sinks/bugsnag/api_test.go:
--------------------------------------------------------------------------------
 1 | package bugsnag
 2 | 
 3 | import (
 4 | 	"encoding/json"
 5 | 	"fmt"
 6 | 	"github.com/gocraft/health/stack"
 7 | 	"github.com/stretchr/testify/assert"
 8 | 	"io/ioutil"
 9 | 	"net/http"
10 | 	"strings"
11 | 	"testing"
12 | 	"time"
13 | )
14 | 
15 | func TestNotify(t *testing.T) {
16 | 	config := &Config{
17 | 		APIKey:       "abcd",
18 | 		Endpoint:     "http://localhost:5051/",
19 | 		ReleaseStage: "staging",
20 | 		AppVersion:   "1.0",
21 | 		Hostname:     "",
22 | 	}
23 | 
24 | 	n := notifyHandler{
25 | 		PayloadChan: make(chan *payload, 1),
26 | 	}
27 | 
28 | 	go http.ListenAndServe(":5051", n)
29 | 	time.Sleep(10 * time.Millisecond)
30 | 
31 | 	err := Notify(config, "users/get", "foo.bar", fmt.Errorf("imanerror"), stack.NewTrace(0), make(map[string]string))
32 | 	if err != nil {
33 | 		t.Fatalf("expected no error, got %v", err)
34 | 	}
35 | 
36 | 	p := <-n.PayloadChan
37 | 
38 | 	assert.NotNil(t, p)
39 | 	assert.Equal(t, p.APIKey, "abcd")
40 | 	assert.Equal(t, p.Notifier.Name, "health")
41 | 	assert.Equal(t, len(p.Events), 1)
42 | 
43 | 	evt := p.Events[0]
44 | 	assert.Equal(t, evt.Context, "users/get")
45 | 	assert.Equal(t, evt.App.ReleaseStage, "staging")
46 | 	assert.Equal(t, len(evt.Exceptions), 1)
47 | 
48 | 	ex := evt.Exceptions[0]
49 | 	assert.Equal(t, ex.ErrorClass, "foo.bar")
50 | 	assert.Equal(t, ex.Message, "imanerror")
51 | 
52 | 	frame := ex.Stacktrace[0]
53 | 	assert.True(t, strings.HasSuffix(frame.File, "api_test.go"))
54 | 	assert.Equal(t, frame.Method, "github.com/gocraft/health/sinks/bugsnag:TestNotify")
55 | 
56 | }
57 | 
58 | type notifyHandler struct {
59 | 	PayloadChan chan *payload
60 | }
61 | 
62 | func (h notifyHandler) ServeHTTP(rw http.ResponseWriter, r *http.Request) {
63 | 	body, err := ioutil.ReadAll(r.Body)
64 | 	if err != nil {
65 | 		fmt.Fprintf(rw, "got error in ready body: %v", err)
66 | 		return
67 | 	}
68 | 
69 | 	var resp payload
70 | 	err = json.Unmarshal(body, &resp)
71 | 	if err != nil {
72 | 		fmt.Fprintf(rw, "got error in unmarshal: %v", err)
73 | 		return
74 | 	}
75 | 
76 | 	h.PayloadChan <- &resp
77 | 
78 | 	fmt.Fprintf(rw, "OK")
79 | }
80 | 


--------------------------------------------------------------------------------
/stack/frame.go:
--------------------------------------------------------------------------------
 1 | package stack
 2 | 
 3 | import (
 4 | 	// "bytes"
 5 | 	"fmt"
 6 | 	// "io/ioutil"
 7 | 	"runtime"
 8 | 	"strings"
 9 | )
10 | 
11 | // Frame contains all necessary information about to generate a line in a callstack.
12 | type Frame struct {
13 | 	File            string
14 | 	LineNumber      int
15 | 	Name            string
16 | 	Package         string
17 | 	IsSystemPackage bool
18 | 	ProgramCounter  uintptr
19 | }
20 | 
21 | // NewFrame popoulates a stack frame object from the program counter.
22 | func NewFrame(pc uintptr) Frame {
23 | 	frame := Frame{ProgramCounter: pc}
24 | 	if frame.Func() == nil {
25 | 		return frame
26 | 	}
27 | 	frame.Package, frame.Name = packageAndName(frame.Func())
28 | 
29 | 	// pc -1 because the program counters we use are usually return addresses,
30 | 	// and we want to show the line that corresponds to the function call
31 | 	frame.File, frame.LineNumber = frame.Func().FileLine(pc - 1)
32 | 	frame.IsSystemPackage = isSystemPackage(frame.File, frame.Package)
33 | 
34 | 	return frame
35 | }
36 | 
37 | // Func returns the function that this stackframe corresponds to
38 | func (frame *Frame) Func() *runtime.Func {
39 | 	if frame.ProgramCounter == 0 {
40 | 		return nil
41 | 	}
42 | 	return runtime.FuncForPC(frame.ProgramCounter)
43 | }
44 | 
45 | func (frame *Frame) String() string {
46 | 	return fmt.Sprintf("%s:%d %s", frame.File, frame.LineNumber, frame.Name)
47 | }
48 | 
49 | func packageAndName(fn *runtime.Func) (string, string) {
50 | 	name := fn.Name()
51 | 	pkg := ""
52 | 
53 | 	// we first remove the path prefix if there is one.
54 | 	if lastslash := strings.LastIndex(name, "/"); lastslash >= 0 {
55 | 		pkg += name[:lastslash] + "/"
56 | 		name = name[lastslash+1:]
57 | 	}
58 | 	if period := strings.Index(name, "."); period >= 0 {
59 | 		pkg += name[:period]
60 | 		name = name[period+1:]
61 | 	}
62 | 
63 | 	return pkg, name
64 | }
65 | 
66 | var goroot = runtime.GOROOT()
67 | 
68 | // isSystemPackage returns true iff the package is a system package like 'runtime' or 'net/http'
69 | func isSystemPackage(file, pkg string) bool {
70 | 	return strings.HasPrefix(file, goroot)
71 | }
72 | 


--------------------------------------------------------------------------------
/json_writer_sink.go:
--------------------------------------------------------------------------------
 1 | package health
 2 | 
 3 | import (
 4 | 	"encoding/json"
 5 | 	"fmt"
 6 | 	"io"
 7 | )
 8 | 
 9 | type JsonWriterSink struct {
10 | 	io.Writer
11 | }
12 | 
13 | func (j *JsonWriterSink) EmitEvent(job string, event string, kvs map[string]string) {
14 | 
15 | 	b, err := json.Marshal(struct {
16 | 		Job       string
17 | 		Event     string
18 | 		Timestamp string
19 | 		Kvs       map[string]string
20 | 	}{job, event, timestamp(), kvs})
21 | 
22 | 	if err != nil {
23 | 		return
24 | 	}
25 | 	j.Write(b)
26 | }
27 | 
28 | func (j *JsonWriterSink) EmitEventErr(job string, event string, err error, kvs map[string]string) {
29 | 
30 | 	b, err := json.Marshal(struct {
31 | 		Job       string
32 | 		Event     string
33 | 		Timestamp string
34 | 		Err       string
35 | 		Kvs       map[string]string
36 | 	}{job, event, timestamp(), fmt.Sprint(err), kvs})
37 | 
38 | 	if err != nil {
39 | 		return
40 | 	}
41 | 	j.Write(b)
42 | }
43 | 
44 | func (j *JsonWriterSink) EmitTiming(job string, event string, nanoseconds int64, kvs map[string]string) {
45 | 
46 | 	b, err := json.Marshal(struct {
47 | 		Job         string
48 | 		Event       string
49 | 		Timestamp   string
50 | 		Nanoseconds int64
51 | 		Kvs         map[string]string
52 | 	}{job, event, timestamp(), nanoseconds, kvs})
53 | 
54 | 	if err != nil {
55 | 		return
56 | 	}
57 | 	j.Write(b)
58 | }
59 | 
60 | func (j *JsonWriterSink) EmitGauge(job string, event string, value float64, kvs map[string]string) {
61 | 
62 | 	b, err := json.Marshal(struct {
63 | 		Job       string
64 | 		Event     string
65 | 		Timestamp string
66 | 		Value     float64
67 | 		Kvs       map[string]string
68 | 	}{job, event, timestamp(), value, kvs})
69 | 
70 | 	if err != nil {
71 | 		return
72 | 	}
73 | 	j.Write(b)
74 | }
75 | 
76 | func (j *JsonWriterSink) EmitComplete(job string, status CompletionStatus, nanoseconds int64, kvs map[string]string) {
77 | 
78 | 	b, err := json.Marshal(struct {
79 | 		Job         string
80 | 		Status      string
81 | 		Timestamp   string
82 | 		Nanoseconds int64
83 | 		Kvs         map[string]string
84 | 	}{job, status.String(), timestamp(), nanoseconds, kvs})
85 | 
86 | 	if err != nil {
87 | 		return
88 | 	}
89 | 	j.Write(b)
90 | }
91 | 


--------------------------------------------------------------------------------
/interval_aggregation_merge.go:
--------------------------------------------------------------------------------
 1 | package health
 2 | 
 3 | // Merge merges intAgg into ia, mutating ia.
 4 | // Requires that ia and intAgg are a fully valid with no nil maps.
 5 | func (ia *IntervalAggregation) Merge(intAgg *IntervalAggregation) {
 6 | 	ia.aggregationMaps.merge(&intAgg.aggregationMaps)
 7 | 
 8 | 	for k, v := range intAgg.Jobs {
 9 | 		if existingJob, ok := ia.Jobs[k]; ok {
10 | 			existingJob.merge(v)
11 | 		} else {
12 | 			ia.Jobs[k] = v.Clone()
13 | 		}
14 | 	}
15 | 
16 | 	ia.SerialNumber++
17 | }
18 | 
19 | func (intoJob *JobAggregation) merge(fromJob *JobAggregation) {
20 | 	intoJob.aggregationMaps.merge(&fromJob.aggregationMaps)
21 | 	intoJob.TimerAggregation.merge(&fromJob.TimerAggregation)
22 | 	intoJob.CountSuccess += fromJob.CountSuccess
23 | 	intoJob.CountValidationError += fromJob.CountValidationError
24 | 	intoJob.CountPanic += fromJob.CountPanic
25 | 	intoJob.CountError += fromJob.CountError
26 | 	intoJob.CountJunk += fromJob.CountJunk
27 | }
28 | 
29 | func (intoTa *TimerAggregation) merge(fromTa *TimerAggregation) {
30 | 	intoTa.Count += fromTa.Count
31 | 	intoTa.NanosSum += fromTa.NanosSum
32 | 	intoTa.NanosSumSquares += fromTa.NanosSumSquares
33 | 	if fromTa.NanosMin < intoTa.NanosMin {
34 | 		intoTa.NanosMin = fromTa.NanosMin
35 | 	}
36 | 	if fromTa.NanosMax > intoTa.NanosMax {
37 | 		intoTa.NanosMax = fromTa.NanosMax
38 | 	}
39 | }
40 | 
41 | func (intoAm *aggregationMaps) merge(fromAm *aggregationMaps) {
42 | 	for k, v := range fromAm.Events {
43 | 		intoAm.Events[k] += v
44 | 	}
45 | 
46 | 	for k, v := range fromAm.Gauges {
47 | 		intoAm.Gauges[k] = v
48 | 	}
49 | 
50 | 	for k, v := range fromAm.Timers {
51 | 		if existingTimer, ok := intoAm.Timers[k]; ok {
52 | 			existingTimer.merge(v)
53 | 		} else {
54 | 			intoAm.Timers[k] = v.Clone()
55 | 		}
56 | 	}
57 | 
58 | 	for k, v := range fromAm.EventErrs {
59 | 		if existingErrCounter, ok := intoAm.EventErrs[k]; ok {
60 | 			existingErrCounter.Count += v.Count
61 | 
62 | 			// merging two ring buffers given our shitty implementation is problematic.
63 | 			for _, err := range v.errorSamples {
64 | 				if err != nil {
65 | 					existingErrCounter.addError(err)
66 | 				}
67 | 			}
68 | 		} else {
69 | 			intoAm.EventErrs[k] = v.Clone()
70 | 		}
71 | 	}
72 | }
73 | 


--------------------------------------------------------------------------------
/sinks/bugsnag/sink.go:
--------------------------------------------------------------------------------
 1 | package bugsnag
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"github.com/gocraft/health"
 6 | 	"os"
 7 | )
 8 | 
 9 | // This sink emits to a StatsD deaemon by sending it a UDP packet.
10 | type Sink struct {
11 | 	*Config
12 | 	cmdChan  chan *cmdEventErr
13 | 	doneChan chan int
14 | }
15 | 
16 | type cmdEventErr struct {
17 | 	Job   string
18 | 	Event string
19 | 	Err   *health.UnmutedError
20 | 	Kvs   map[string]string
21 | }
22 | 
23 | func NewSink(config *Config) *Sink {
24 | 	const maxChanSize = 25
25 | 
26 | 	if config.Endpoint == "" {
27 | 		config.Endpoint = "https://notify.bugsnag.com/"
28 | 	}
29 | 
30 | 	s := &Sink{
31 | 		Config:   config,
32 | 		cmdChan:  make(chan *cmdEventErr, maxChanSize),
33 | 		doneChan: make(chan int),
34 | 	}
35 | 
36 | 	go errorProcessingLoop(s)
37 | 
38 | 	return s
39 | }
40 | 
41 | func (s *Sink) EmitEvent(job string, event string, kvs map[string]string) {
42 | 	// no-op
43 | }
44 | 
45 | func (s *Sink) EmitEventErr(job string, event string, inputErr error, kvs map[string]string) {
46 | 	switch inputErr := inputErr.(type) {
47 | 	case *health.UnmutedError:
48 | 		if !inputErr.Emitted {
49 | 			s.cmdChan <- &cmdEventErr{Job: job, Event: event, Err: inputErr, Kvs: kvs}
50 | 		}
51 | 	case *health.MutedError:
52 | 	// Do nothing!
53 | 	default: // eg, case error:
54 | 		// This shouldn't happen, all errors passed in here should be wrapped.
55 | 	}
56 | }
57 | 
58 | func (s *Sink) EmitTiming(job string, event string, nanos int64, kvs map[string]string) {
59 | 	// no-op
60 | }
61 | 
62 | func (s *Sink) EmitGauge(job string, event string, value float64, kvs map[string]string) {
63 | 	// no-op
64 | }
65 | 
66 | func (s *Sink) EmitComplete(job string, status health.CompletionStatus, nanos int64, kvs map[string]string) {
67 | 	// no-op
68 | }
69 | 
70 | func (s *Sink) ShutdownServer() {
71 | 	s.doneChan <- 1
72 | }
73 | 
74 | func errorProcessingLoop(sink *Sink) {
75 | 	cmdChan := sink.cmdChan
76 | 	doneChan := sink.doneChan
77 | 
78 | PROCESSING_LOOP:
79 | 	for {
80 | 		select {
81 | 		case <-doneChan:
82 | 			break PROCESSING_LOOP
83 | 		case cmd := <-cmdChan:
84 | 			if err := Notify(sink.Config, cmd.Job, cmd.Event, cmd.Err, cmd.Err.Stack, cmd.Kvs); err != nil {
85 | 				fmt.Fprintf(os.Stderr, "bugsnag.Notify: could not notify bugsnag. err=%v\n", err)
86 | 			}
87 | 		}
88 | 	}
89 | }
90 | 


--------------------------------------------------------------------------------
/stack/stack_test.go:
--------------------------------------------------------------------------------
 1 | package stack
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"regexp"
 6 | 	"testing"
 7 | 	// "github.com/stretchr/testify/assert"
 8 | )
 9 | 
10 | func level2() *Trace {
11 | 	return NewTrace(0)
12 | }
13 | 
14 | func level1() *Trace {
15 | 	return level2()
16 | }
17 | 
18 | func level0() *Trace {
19 | 	return level1()
20 | }
21 | 
22 | func assertFrame(t *testing.T, frame *Frame, file string, line int, fun string) {
23 | 	testName := fmt.Sprintf("[file: %s line: %d fun: %s]", file, line, fun)
24 | 
25 | 	if !regexp.MustCompile(file).MatchString(frame.File) {
26 | 		t.Errorf("assertFrame: %s didn't match file in %v", testName, frame)
27 | 	}
28 | 
29 | 	if frame.LineNumber != line {
30 | 		t.Errorf("assertFrame: %s didn't match line in %v", testName, frame)
31 | 	}
32 | 
33 | 	if frame.Name != fun {
34 | 		t.Errorf("assertFrame: %s didn't match function name in %v", testName, frame)
35 | 	}
36 | }
37 | 
38 | func TestNewTrace(t *testing.T) {
39 | 	trace := level0()
40 | 
41 | 	frames := trace.Frames()
42 | 
43 | 	// Yes, this is a persnickety test that will fail as the file is modified. Sorry guise.
44 | 	assertFrame(t, &frames[0], "stack_test\\.go", 11, "level2")
45 | 	assertFrame(t, &frames[1], "stack_test\\.go", 15, "level1")
46 | 	assertFrame(t, &frames[2], "stack_test\\.go", 19, "level0")
47 | 	assertFrame(t, &frames[3], "stack_test\\.go", 39, "TestNewTrace")
48 | }
49 | 
50 | type someT struct{}
51 | 
52 | func (s someT) level2() *Trace {
53 | 	return NewTrace(0)
54 | }
55 | 
56 | func (s someT) level1() *Trace {
57 | 	return s.level2()
58 | }
59 | 
60 | func (s someT) level0() *Trace {
61 | 	return s.level1()
62 | }
63 | 
64 | func TestNewTraceWithTypes(t *testing.T) {
65 | 	obj := &someT{}
66 | 	trace := obj.level0()
67 | 
68 | 	frames := trace.Frames()
69 | 
70 | 	// Yes, this is a persnickety test that will fail as the file is modified. Sorry guise.
71 | 	assertFrame(t, &frames[0], "stack_test\\.go", 53, "someT.level2")
72 | 	assertFrame(t, &frames[1], "stack_test\\.go", 57, "someT.level1")
73 | 	assertFrame(t, &frames[2], "stack_test\\.go", 61, "someT.level0")
74 | 	assertFrame(t, &frames[3], "stack_test\\.go", 66, "TestNewTraceWithTypes")
75 | }
76 | 
77 | func TestStackPrint(t *testing.T) {
78 | 	trace := level0()
79 | 	stack := trace.Stack()
80 | 	reg := regexp.MustCompile("stack_test\\.go:11 level2\n.+stack_test\\.go:15 level1\n.+stack_test\\.go:19 level0")
81 | 
82 | 	if !reg.Match(trace.Stack()) {
83 | 		t.Errorf("trace didn't match. Got:\n%s\n", string(stack))
84 | 	}
85 | }
86 | 


--------------------------------------------------------------------------------
/sinks/librato/sink_test.go:
--------------------------------------------------------------------------------
 1 | package librato
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"github.com/gocraft/health"
 6 | 	"github.com/stretchr/testify/assert"
 7 | 	"testing"
 8 | 	"time"
 9 | )
10 | 
11 | func TestNewShutdown(t *testing.T) {
12 | 	s := New("a", "b", "c")
13 | 	defer s.Stop()
14 | 
15 | 	assert.Equal(t, "a", s.libratoUser)
16 | 	assert.Equal(t, "b", s.libratoApiKey)
17 | 	assert.Equal(t, "c", s.prefix)
18 | }
19 | 
20 | func TestEmit(t *testing.T) {
21 | 	s := New("a", "b", "c")
22 | 
23 | 	s.EmitEvent("cool", "story", nil)
24 | 	s.EmitEvent("cool", "story", nil)
25 | 	s.EmitEvent("cool", "story", nil)
26 | 
27 | 	s.EmitEventErr("sad", "day", fmt.Errorf("ok"), nil)
28 | 	s.EmitEventErr("sad", "day", fmt.Errorf("ok"), nil)
29 | 
30 | 	s.EmitTiming("rad", "dino", 6000000, nil)
31 | 	s.EmitTiming("bad", "dino", 12000000, nil)
32 | 
33 | 	s.EmitComplete("tylersmith", health.Success, 22000000, nil)
34 | 	s.EmitComplete("tylersmart", health.Junk, 8000000, nil)
35 | 
36 | 	time.Sleep(3 * time.Millisecond)
37 | 	s.Stop()
38 | 
39 | 	assert.Equal(t, int64(3), s.counters["c.story.count"])
40 | 	assert.Equal(t, int64(3), s.counters["c.cool.story.count"])
41 | 	assert.Equal(t, int64(2), s.counters["c.day.error.count"])
42 | 	assert.Equal(t, int64(2), s.counters["c.sad.day.error.count"])
43 | 
44 | 	g := s.timers["c.dino.timing"]
45 | 	assert.Equal(t, int64(2), g.Count)
46 | 	assert.Equal(t, 18.0, g.Sum)
47 | 	assert.Equal(t, 6.0, g.Min)
48 | 	assert.Equal(t, 12.0, g.Max)
49 | 	assert.Equal(t, 180.0, g.SumSquares)
50 | 	assert.Equal(t, defaultTimerAttributes, g.Attributes)
51 | 
52 | 	g = s.timers["c.rad.dino.timing"]
53 | 	assert.Equal(t, int64(1), g.Count)
54 | 	assert.Equal(t, 6.0, g.Sum)
55 | 	assert.Equal(t, 6.0, g.Min)
56 | 	assert.Equal(t, 6.0, g.Max)
57 | 	assert.Equal(t, 36.0, g.SumSquares)
58 | 
59 | 	g = s.timers["c.bad.dino.timing"]
60 | 	assert.Equal(t, int64(1), g.Count)
61 | 	assert.Equal(t, 12.0, g.Sum)
62 | 	assert.Equal(t, 12.0, g.Min)
63 | 	assert.Equal(t, 12.0, g.Max)
64 | 	assert.Equal(t, 144.0, g.SumSquares)
65 | 
66 | 	g = s.timers["c.tylersmith.success.timing"]
67 | 	assert.Equal(t, int64(1), g.Count)
68 | 	assert.Equal(t, 22.0, g.Sum)
69 | 	assert.Equal(t, 22.0, g.Min)
70 | 	assert.Equal(t, 22.0, g.Max)
71 | 	assert.Equal(t, 484.0, g.SumSquares)
72 | 
73 | 	g = s.timers["c.tylersmart.junk.timing"]
74 | 	assert.Equal(t, int64(1), g.Count)
75 | 	assert.Equal(t, 8.0, g.Sum)
76 | 	assert.Equal(t, 8.0, g.Min)
77 | 	assert.Equal(t, 8.0, g.Max)
78 | 	assert.Equal(t, 64.0, g.SumSquares)
79 | 
80 | }
81 | 


--------------------------------------------------------------------------------
/json_polling_sink.go:
--------------------------------------------------------------------------------
 1 | package health
 2 | 
 3 | import (
 4 | 	"time"
 5 | )
 6 | 
 7 | type JsonPollingSink struct {
 8 | 	intervalDuration  time.Duration
 9 | 	cmdChan           chan *emitCmd
10 | 	doneChan          chan int
11 | 	doneDoneChan      chan int
12 | 	intervalsChanChan chan chan []*IntervalAggregation
13 | }
14 | 
15 | type cmdKind int
16 | 
17 | const (
18 | 	cmdKindEvent cmdKind = iota
19 | 	cmdKindEventErr
20 | 	cmdKindTiming
21 | 	cmdKindGauge
22 | 	cmdKindComplete
23 | )
24 | 
25 | type emitCmd struct {
26 | 	Kind   cmdKind
27 | 	Job    string
28 | 	Event  string
29 | 	Err    error
30 | 	Nanos  int64
31 | 	Value  float64
32 | 	Status CompletionStatus
33 | }
34 | 
35 | func NewJsonPollingSink(intervalDuration time.Duration, retain time.Duration) *JsonPollingSink {
36 | 	const buffSize = 4096 // random-ass-guess
37 | 
38 | 	s := &JsonPollingSink{
39 | 		intervalDuration:  intervalDuration,
40 | 		cmdChan:           make(chan *emitCmd, buffSize),
41 | 		doneChan:          make(chan int),
42 | 		doneDoneChan:      make(chan int),
43 | 		intervalsChanChan: make(chan chan []*IntervalAggregation),
44 | 	}
45 | 
46 | 	go startAggregator(intervalDuration, retain, s)
47 | 
48 | 	return s
49 | }
50 | 
51 | func (s *JsonPollingSink) ShutdownServer() {
52 | 	s.doneChan <- 1
53 | 	<-s.doneDoneChan
54 | }
55 | 
56 | func (s *JsonPollingSink) EmitEvent(job string, event string, kvs map[string]string) {
57 | 	s.cmdChan <- &emitCmd{Kind: cmdKindEvent, Job: job, Event: event}
58 | }
59 | 
60 | func (s *JsonPollingSink) EmitEventErr(job string, event string, inputErr error, kvs map[string]string) {
61 | 	s.cmdChan <- &emitCmd{Kind: cmdKindEventErr, Job: job, Event: event, Err: inputErr}
62 | }
63 | 
64 | func (s *JsonPollingSink) EmitTiming(job string, event string, nanos int64, kvs map[string]string) {
65 | 	s.cmdChan <- &emitCmd{Kind: cmdKindTiming, Job: job, Event: event, Nanos: nanos}
66 | }
67 | 
68 | func (s *JsonPollingSink) EmitGauge(job string, event string, value float64, kvs map[string]string) {
69 | 	s.cmdChan <- &emitCmd{Kind: cmdKindGauge, Job: job, Event: event, Value: value}
70 | }
71 | 
72 | func (s *JsonPollingSink) EmitComplete(job string, status CompletionStatus, nanos int64, kvs map[string]string) {
73 | 	s.cmdChan <- &emitCmd{Kind: cmdKindComplete, Job: job, Status: status, Nanos: nanos}
74 | }
75 | 
76 | func (s *JsonPollingSink) GetMetrics() []*IntervalAggregation {
77 | 	intervalsChan := make(chan []*IntervalAggregation)
78 | 	s.intervalsChanChan <- intervalsChan
79 | 	return <-intervalsChan
80 | }
81 | 


--------------------------------------------------------------------------------
/error_test.go:
--------------------------------------------------------------------------------
 1 | package health
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"github.com/stretchr/testify/assert"
 6 | 	"testing"
 7 | )
 8 | 
 9 | func TestUnmutedErrors(t *testing.T) {
10 | 	stream := NewStream()
11 | 	sink := &testSink{}
12 | 	stream.AddSink(sink)
13 | 	job := stream.NewJob("myjob")
14 | 
15 | 	origErr := fmt.Errorf("wat")
16 | 	retErr := job.EventErr("abcd", origErr)
17 | 
18 | 	// retErr is an UnmutedError with Emitted=true
19 | 	if retErr, ok := retErr.(*UnmutedError); ok {
20 | 		assert.True(t, retErr.Emitted)
21 | 		assert.Equal(t, retErr.Err, origErr)
22 | 	} else {
23 | 		t.Errorf("expected retErr to be an *UnmutedError")
24 | 	}
25 | 
26 | 	// LastErr has Emitted=false, WasUnmuted=true
27 | 	assert.NotNil(t, sink.LastErr)
28 | 	assert.True(t, sink.LastErrUnmuted)
29 | 	assert.False(t, sink.LastErrEmitted)
30 | 
31 | 	// Log it again!
32 | 	retErr2 := job.EventErr("abcdefg", retErr)
33 | 
34 | 	// retErr is an UnmutedError with Emitted=true
35 | 	if retErr2, ok := retErr2.(*UnmutedError); ok {
36 | 		assert.True(t, retErr2.Emitted)
37 | 		assert.Equal(t, retErr2.Err, origErr) // We don't endlessly wrap UnmutedErrors inside UnmutedErrors
38 | 	} else {
39 | 		t.Errorf("expected retErr to be an *UnmutedError")
40 | 	}
41 | 
42 | 	// LastErr has Emitted=false, WasUnmuted=true
43 | 	assert.NotNil(t, sink.LastErr)
44 | 	assert.True(t, sink.LastErrUnmuted)
45 | 	assert.True(t, sink.LastErrEmitted)
46 | }
47 | 
48 | func TestMutedErrors(t *testing.T) {
49 | 	stream := NewStream()
50 | 	sink := &testSink{}
51 | 	stream.AddSink(sink)
52 | 	job := stream.NewJob("myjob")
53 | 
54 | 	origErr := fmt.Errorf("wat")
55 | 	mutedOrig := Mute(origErr)
56 | 	retErr := job.EventErr("abcd", mutedOrig)
57 | 
58 | 	// retErr is an UnmutedError with Emitted=true
59 | 	if retErr, ok := retErr.(*MutedError); ok {
60 | 		assert.Equal(t, retErr.Err, origErr)
61 | 	} else {
62 | 		t.Errorf("expected retErr to be an *MutedError")
63 | 	}
64 | 
65 | 	// LastErr has Emitted=false, WasUnmuted=true
66 | 	assert.NotNil(t, sink.LastErr)
67 | 	assert.True(t, sink.LastErrMuted)
68 | 
69 | 	// Log it again!
70 | 	retErr2 := job.EventErr("abcdefg", retErr)
71 | 
72 | 	// retErr is an UnmutedError with Emitted=true
73 | 	if retErr2, ok := retErr2.(*MutedError); ok {
74 | 		assert.Equal(t, retErr2.Err, origErr) // We don't endlessly wrap MutedErrors inside MutedErrors
75 | 	} else {
76 | 		t.Errorf("expected retErr to be an *MutedError")
77 | 	}
78 | 
79 | 	// LastErr has Emitted=false, WasUnmuted=true
80 | 	assert.NotNil(t, sink.LastErr)
81 | 	assert.True(t, sink.LastErrMuted)
82 | }
83 | 


--------------------------------------------------------------------------------
/interval_aggregation_merge_test.go:
--------------------------------------------------------------------------------
 1 | package health
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"testing"
 6 | 
 7 | 	"github.com/stretchr/testify/assert"
 8 | )
 9 | 
10 | // Let's leverage clone's fixture data and make sure we can merge into a new blank aggregation to get the same data.
11 | func TestMergeBasic(t *testing.T) {
12 | 	setNowMock("2011-09-09T23:36:13Z")
13 | 	defer resetNowMock()
14 | 
15 | 	a := aggregatorWithData()
16 | 	intAgg := a.intervalAggregations[0]
17 | 	assertAggregationData(t, intAgg)
18 | 	newAgg := NewIntervalAggregation(intAgg.IntervalStart)
19 | 	newAgg.Merge(intAgg)
20 | 	assertAggregationData(t, newAgg)
21 | }
22 | 
23 | func TestMerge(t *testing.T) {
24 | 	setNowMock("2011-09-09T23:36:13Z")
25 | 	defer resetNowMock()
26 | 
27 | 	// Make two aggregations, merge together:
28 | 	a := aggregatorWithData()
29 | 	intAgg := a.intervalAggregations[0]
30 | 	a2 := aggregatorWithData()
31 | 	intAgg2 := a2.intervalAggregations[0]
32 | 
33 | 	// Modify a gauge:
34 | 	a2.EmitGauge("job0", "gauge1", 5.5)
35 | 
36 | 	intAgg.Merge(intAgg2)
37 | 
38 | 	// same number of events:
39 | 	assert.Equal(t, 300, len(intAgg.Jobs))
40 | 	assert.Equal(t, 1200, len(intAgg.Events))
41 | 	assert.Equal(t, 1200, len(intAgg.Timers))
42 | 	assert.Equal(t, 1200, len(intAgg.Gauges))
43 | 	assert.Equal(t, 1200, len(intAgg.EventErrs))
44 | 
45 | 	// Spot-check events:
46 | 	assert.EqualValues(t, 2, intAgg.Events["event0"])
47 | 
48 | 	// Spot-check gauges:
49 | 	assert.EqualValues(t, 3.14, intAgg.Gauges["gauge0"])
50 | 	assert.EqualValues(t, 5.5, intAgg.Gauges["gauge1"]) // 5.5 takes precedence over 3.14 (argument to merge takes precedence.)
51 | 
52 | 	// Spot-check timings:
53 | 	assert.EqualValues(t, 2, intAgg.Timers["timing0"].Count)
54 | 	assert.EqualValues(t, 24, intAgg.Timers["timing0"].NanosSum)
55 | 
56 | 	// Spot-check event-errs:
57 | 	assert.EqualValues(t, 2, intAgg.EventErrs["err0"].Count)
58 | 	assert.EqualValues(t, []error{fmt.Errorf("wat")}, intAgg.EventErrs["err0"].getErrorSamples())
59 | 
60 | 	// Spot-check jobs:
61 | 	job := intAgg.Jobs["job0"]
62 | 	assert.EqualValues(t, 2, job.CountSuccess)
63 | 	assert.EqualValues(t, 0, job.CountError)
64 | 	assert.EqualValues(t, 2, job.Events["event0"])
65 | 	assert.EqualValues(t, 0, job.Events["event4"])
66 | 	assert.EqualValues(t, 3.14, job.Gauges["gauge0"])
67 | 	assert.EqualValues(t, 2, job.Timers["timing0"].Count)
68 | 	assert.EqualValues(t, 24, job.Timers["timing0"].NanosSum)
69 | 	assert.EqualValues(t, 2, job.EventErrs["err0"].Count)
70 | 	assert.Equal(t, []error{fmt.Errorf("wat")}, job.EventErrs["err0"].getErrorSamples())
71 | }
72 | 


--------------------------------------------------------------------------------
/cmd/healthd/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"github.com/gocraft/health"
 6 | 	"github.com/gocraft/health/healthd"
 7 | 	"os"
 8 | 	"strings"
 9 | 	"time"
10 | )
11 | 
12 | // TODO's:
13 | // - in /overall, make interval_start and interval_duration make sense.
14 | //   - maybe add interval_end?
15 | // - some easy way of exposing errors
16 | // - add to health the concept of a daemon/service name.
17 | //   - so getting /health gives you:
18 | //     - name (eg, metroid_api), version?, git sha?, process, time ranges, and then the metrics.
19 | 
20 | // nice to have's
21 | // - handle the case when time goes backwards.
22 | // - we need some way to not return all data from clients on every request. Necessary?
23 | func main() {
24 | 	// Get inputs. Read from env variables for now (command line options?)
25 | 	monitoredHostPorts := getMonitoredHostPorts()
26 | 	serverHostPort := getServerHostPort()
27 | 	healthHostPort := getHealthHostPort()
28 | 
29 | 	// Monitor ourselves. This will make our own instrumentation show up in the healthd output
30 | 	// I'm not totally sure we want to do this, but (shrug) seems reasonable right now.
31 | 	monitoredHostPorts = append(monitoredHostPorts, healthHostPort)
32 | 
33 | 	// Setup our health stream.
34 | 	// Log to stdout and a setup an polling sink
35 | 	stream := health.NewStream()
36 | 	stream.AddSink(&health.WriterSink{os.Stdout})
37 | 	jsonPollingSink := health.NewJsonPollingSink(time.Minute, time.Minute*5)
38 | 	jsonPollingSink.StartServer(healthHostPort)
39 | 	stream.AddSink(jsonPollingSink)
40 | 
41 | 	// Say we're starting!
42 | 	stream.EventKv("starting", health.Kvs{
43 | 		"monitored_host_ports": strings.Join(monitoredHostPorts, ","),
44 | 		"server_host_port":     serverHostPort,
45 | 		"health_host_port":     healthHostPort,
46 | 	})
47 | 
48 | 	// Start the healthd aggregators in a goroutine(s)
49 | 	healthd.StartNewHealthD(monitoredHostPorts, serverHostPort, stream)
50 | 
51 | 	// Block
52 | 	select {}
53 | }
54 | 
55 | func getHealthHostPort() string {
56 | 	ret := os.Getenv("HEALTH_HOSTPORT")
57 | 	if ret == "" {
58 | 		ret = ":5030"
59 | 	}
60 | 	return ret
61 | }
62 | 
63 | func getMonitoredHostPorts() []string {
64 | 	hps := os.Getenv("HEALTHD_MONITORED_HOSTPORTS")
65 | 	if hps == "" {
66 | 		fmt.Println("no hosts to monitor. Pass them in with the environment variable HEALTHD_MONITORED_HOSTPORTS")
67 | 		fmt.Println("example: $ HEALTHD_MONITORED_HOSTPORTS=web31:5020,web32:5020 ./healthd")
68 | 		os.Exit(1)
69 | 	}
70 | 	return strings.Split(hps, ",")
71 | }
72 | 
73 | func getServerHostPort() string {
74 | 	ret := os.Getenv("HEALTHD_SERVER_HOSTPORT")
75 | 	if ret == "" {
76 | 		ret = ":5031"
77 | 	}
78 | 	return ret
79 | }
80 | 


--------------------------------------------------------------------------------
/runtime_metrics/runtime_metrics.go:
--------------------------------------------------------------------------------
  1 | package runtime_metrics
  2 | 
  3 | import (
  4 | 	"github.com/gocraft/health"
  5 | 	"runtime"
  6 | 	"runtime/debug"
  7 | 	"time"
  8 | )
  9 | 
 10 | type RuntimeMetrics struct {
 11 | 	stream       health.EventReceiver
 12 | 	options      Options
 13 | 	stopChan     chan bool
 14 | 	stopStopChan chan bool
 15 | }
 16 | 
 17 | type Options struct {
 18 | 	Interval time.Duration
 19 | 
 20 | 	Memory     bool
 21 | 	GC         bool
 22 | 	GCQuantile bool
 23 | 	Goroutines bool
 24 | 	Cgo        bool
 25 | 	FDs        bool
 26 | }
 27 | 
 28 | func NewRuntimeMetrics(stream health.EventReceiver, options *Options) *RuntimeMetrics {
 29 | 	rm := &RuntimeMetrics{
 30 | 		stream:       stream,
 31 | 		stopChan:     make(chan bool),
 32 | 		stopStopChan: make(chan bool),
 33 | 	}
 34 | 
 35 | 	if options != nil {
 36 | 		rm.options = *options
 37 | 	} else {
 38 | 		rm.options = Options{time.Second * 5, true, true, true, true, true, true}
 39 | 	}
 40 | 
 41 | 	return rm
 42 | }
 43 | 
 44 | func (rm *RuntimeMetrics) Start() {
 45 | 	go rm.metricsPoller()
 46 | }
 47 | 
 48 | func (rm *RuntimeMetrics) Stop() {
 49 | 	rm.stopChan <- true
 50 | 	<-rm.stopStopChan
 51 | }
 52 | 
 53 | func (rm *RuntimeMetrics) metricsPoller() {
 54 | 	ticker := time.NewTicker(rm.options.Interval)
 55 | 
 56 | METRICS_POOLER_LOOP:
 57 | 	for {
 58 | 		select {
 59 | 		case <-rm.stopChan:
 60 | 			break METRICS_POOLER_LOOP
 61 | 		case <-ticker.C:
 62 | 			rm.Report()
 63 | 		}
 64 | 	}
 65 | 
 66 | 	ticker.Stop()
 67 | 	rm.stopStopChan <- true
 68 | }
 69 | 
 70 | func (rm *RuntimeMetrics) Report() {
 71 | 	var mem runtime.MemStats
 72 | 	runtime.ReadMemStats(&mem)
 73 | 
 74 | 	if rm.options.Memory {
 75 | 		// bytes allocated and not yet freed
 76 | 		rm.reportGauge("alloc", float64(mem.Alloc))
 77 | 
 78 | 		// total number of allocated objects
 79 | 		rm.reportGauge("heap_objects", float64(mem.HeapObjects))
 80 | 	}
 81 | 
 82 | 	if rm.options.GC {
 83 | 		rm.reportGauge("pause_total_ns", float64(mem.PauseTotalNs))
 84 | 		rm.reportGauge("num_gc", float64(mem.NumGC))
 85 | 		rm.reportGauge("next_gc", float64(mem.NextGC))
 86 | 		rm.reportGauge("gc_cpu_fraction", mem.GCCPUFraction)
 87 | 	}
 88 | 
 89 | 	if rm.options.GCQuantile {
 90 | 		var gc debug.GCStats
 91 | 		gc.PauseQuantiles = make([]time.Duration, 3)
 92 | 		debug.ReadGCStats(&gc)
 93 | 		rm.reportGauge("gc_pause_quantile_50", float64(gc.PauseQuantiles[1]/1000)/1000.0)
 94 | 		rm.reportGauge("gc_pause_quantile_max", float64(gc.PauseQuantiles[2]/1000)/1000.0)
 95 | 	}
 96 | 
 97 | 	if rm.options.Goroutines {
 98 | 		rm.reportGauge("num_goroutines", float64(runtime.NumGoroutine()))
 99 | 	}
100 | 
101 | 	if rm.options.Cgo {
102 | 		rm.reportGauge("num_cgo_call", float64(runtime.NumCgoCall()))
103 | 	}
104 | 
105 | 	if rm.options.FDs {
106 | 		if num, err := getFDUsage(); err == nil {
107 | 			rm.reportGauge("num_fds_used", float64(num))
108 | 		}
109 | 	}
110 | }
111 | 
112 | func (rm *RuntimeMetrics) reportGauge(key string, val float64) {
113 | 	rm.stream.Gauge(key, val)
114 | }
115 | 


--------------------------------------------------------------------------------
/cmd/healthtop/hosts.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"fmt"
  6 | 	"github.com/buger/goterm"
  7 | 	"github.com/gocraft/health/healthd"
  8 | 	"io/ioutil"
  9 | 	"net/http"
 10 | 	"strings"
 11 | 	"time"
 12 | )
 13 | 
 14 | func hostsLoop() {
 15 | 	secondTicker := time.Tick(1 * time.Second)
 16 | 
 17 | 	var lastApiResponse *healthd.ApiResponseHosts
 18 | 	var hStatus healthdStatus
 19 | 
 20 | 	responses := make(chan *healthd.ApiResponseHosts)
 21 | 	errors := make(chan error)
 22 | 
 23 | 	go pollHealthDHosts(responses, errors)
 24 | 	for {
 25 | 		select {
 26 | 		case <-secondTicker:
 27 | 			go pollHealthDHosts(responses, errors)
 28 | 			printHosts(lastApiResponse, &hStatus)
 29 | 		case resp := <-responses:
 30 | 			lastApiResponse = resp
 31 | 			hStatus.lastSuccessAt = time.Now()
 32 | 			printHosts(lastApiResponse, &hStatus)
 33 | 		case err := <-errors:
 34 | 			hStatus.lastErrorAt = time.Now()
 35 | 			hStatus.lastError = err
 36 | 		}
 37 | 	}
 38 | }
 39 | 
 40 | func pollHealthDHosts(responses chan *healthd.ApiResponseHosts, errors chan error) {
 41 | 	var body []byte
 42 | 
 43 | 	uri := "http://" + sourceHostPort + "/healthd/hosts"
 44 | 
 45 | 	resp, err := http.Get(uri)
 46 | 	if err != nil {
 47 | 		errors <- err
 48 | 		return
 49 | 	}
 50 | 	defer resp.Body.Close()
 51 | 	body, err = ioutil.ReadAll(resp.Body)
 52 | 	if err != nil {
 53 | 		errors <- err
 54 | 		return
 55 | 	}
 56 | 
 57 | 	var response healthd.ApiResponseHosts
 58 | 	if err := json.Unmarshal(body, &response); err != nil {
 59 | 		errors <- err
 60 | 		return
 61 | 	}
 62 | 
 63 | 	responses <- &response
 64 | }
 65 | 
 66 | func printHosts(lastApiResponse *healthd.ApiResponseHosts, status *healthdStatus) {
 67 | 	goterm.Clear() // Clear current screen
 68 | 	goterm.MoveCursor(1, 1)
 69 | 	defer goterm.Flush()
 70 | 	goterm.Println("Current Time:", status.FmtNow(), "   Status:", status.FmtStatus())
 71 | 
 72 | 	//
 73 | 	if lastApiResponse == nil {
 74 | 		goterm.Println("no data yet")
 75 | 		return
 76 | 	}
 77 | 
 78 | 	columns := []string{
 79 | 		"Host:Port",
 80 | 		"Status",
 81 | 		"Last Checked",
 82 | 		"Last Response Time",
 83 | 	}
 84 | 
 85 | 	for i, s := range columns {
 86 | 		columns[i] = goterm.Bold(goterm.Color(s, goterm.BLACK))
 87 | 	}
 88 | 
 89 | 	table := goterm.NewTable(0, goterm.Width()-1, 5, ' ', 0)
 90 | 	fmt.Fprintf(table, "%s\n", strings.Join(columns, "\t"))
 91 | 
 92 | 	for _, host := range lastApiResponse.Hosts {
 93 | 		printHost(table, host)
 94 | 	}
 95 | 
 96 | 	goterm.Println(table)
 97 | }
 98 | 
 99 | func printHost(table *goterm.Table, host *healthd.HostStatus) {
100 | 	success := host.LastCode == 200 && host.LastErr == ""
101 | 	var status string
102 | 	if success {
103 | 		status = "Success"
104 | 	} else if host.LastCheckTime.IsZero() {
105 | 		status = "Unknown"
106 | 	} else {
107 | 		status = "Failure: " + host.LastErr
108 | 	}
109 | 
110 | 	printCellString(host.HostPort, table, true, false, false)
111 | 	printCellString(status, table, false, success, !success)
112 | 	printCellString(host.LastCheckTime.Format(time.RFC1123), table, false, false, false)
113 | 	printCellNanos(int64(host.LastNanos), table, false, false, false)
114 | 	fmt.Fprintf(table, "\n")
115 | }
116 | 


--------------------------------------------------------------------------------
/health_test.go:
--------------------------------------------------------------------------------
  1 | package health
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"github.com/stretchr/testify/assert"
  6 | 	"testing"
  7 | )
  8 | 
  9 | type testSink struct {
 10 | 	LastEmitKind string // "Event", "EventErr", ..., "Complete"
 11 | 	LastJob      string
 12 | 	LastEvent    string
 13 | 
 14 | 	LastErr        error
 15 | 	LastErrEmitted bool
 16 | 	LastErrUnmuted bool
 17 | 	LastErrMuted   bool
 18 | 	LastErrRaw     bool
 19 | 
 20 | 	LastNanos  int64
 21 | 	LastValue  float64
 22 | 	LastKvs    map[string]string
 23 | 	LastStatus CompletionStatus
 24 | }
 25 | 
 26 | func (s *testSink) EmitEvent(job string, event string, kvs map[string]string) {
 27 | 	s.LastEmitKind = "Event"
 28 | 	s.LastJob = job
 29 | 	s.LastEvent = event
 30 | 	s.LastKvs = kvs
 31 | }
 32 | 
 33 | func (s *testSink) EmitEventErr(job string, event string, inputErr error, kvs map[string]string) {
 34 | 	s.LastEmitKind = "EventErr"
 35 | 	s.LastJob = job
 36 | 	s.LastEvent = event
 37 | 	s.LastKvs = kvs
 38 | 	s.LastErr = inputErr
 39 | 
 40 | 	switch inputErr := inputErr.(type) {
 41 | 	case *UnmutedError:
 42 | 		s.LastErrUnmuted = true
 43 | 		s.LastErrEmitted = inputErr.Emitted
 44 | 	case *MutedError:
 45 | 		s.LastErrMuted = true
 46 | 	default: // eg, case error:
 47 | 		s.LastErrRaw = true
 48 | 	}
 49 | }
 50 | func (s *testSink) EmitTiming(job string, event string, nanos int64, kvs map[string]string) {
 51 | 	s.LastEmitKind = "Timing"
 52 | 	s.LastJob = job
 53 | 	s.LastEvent = event
 54 | 	s.LastKvs = kvs
 55 | 	s.LastNanos = nanos
 56 | }
 57 | func (s *testSink) EmitGauge(job string, event string, value float64, kvs map[string]string) {
 58 | 	s.LastEmitKind = "Gauge"
 59 | 	s.LastJob = job
 60 | 	s.LastEvent = event
 61 | 	s.LastKvs = kvs
 62 | 	s.LastValue = value
 63 | }
 64 | func (s *testSink) EmitComplete(job string, status CompletionStatus, nanos int64, kvs map[string]string) {
 65 | 	s.LastEmitKind = "Complete"
 66 | 	s.LastJob = job
 67 | 	s.LastKvs = kvs
 68 | 	s.LastNanos = nanos
 69 | 	s.LastStatus = status
 70 | }
 71 | 
 72 | func successFunc() error {
 73 | 	return nil
 74 | }
 75 | 
 76 | func errorFunc() error {
 77 | 	return errors.New("sad_day")
 78 | }
 79 | 
 80 | func panicFunc() error {
 81 | 	panic("wat")
 82 | 	return nil
 83 | }
 84 | 
 85 | func TestRun(t *testing.T) {
 86 | 	s := NewStream()
 87 | 
 88 | 	ts := &testSink{}
 89 | 	s.AddSink(ts)
 90 | 
 91 | 	err := s.Run("foo", successFunc)
 92 | 	assert.NoError(t, err)
 93 | 
 94 | 	assert.Equal(t, "Complete", ts.LastEmitKind)
 95 | 	assert.Equal(t, "foo", ts.LastJob)
 96 | 	assert.Equal(t, Success, ts.LastStatus)
 97 | 
 98 | 	err = s.Run("foo", errorFunc)
 99 | 	assert.Equal(t, "sad_day", err.Error())
100 | 
101 | 	assert.Equal(t, "Complete", ts.LastEmitKind)
102 | 	assert.Equal(t, "foo", ts.LastJob)
103 | 	assert.Equal(t, Error, ts.LastStatus)
104 | 
105 | 	err = s.Run("foo", panicFunc)
106 | 	assert.Equal(t, "wat", err.Error())
107 | 
108 | 	assert.Equal(t, "Complete", ts.LastEmitKind)
109 | 	assert.Equal(t, "foo", ts.LastJob)
110 | 	assert.Equal(t, Panic, ts.LastStatus)
111 | 
112 | 	// Panicing will fire an EventErr and then a Complete(Panic)
113 | 	// This test relies on the fact that LastErr isn't cleared when a Complete comes in
114 | 	assert.Equal(t, "wat", ts.LastErr.Error())
115 | 
116 | 	// Now just make sure that job also has a similar Run function:
117 | 	j := s.NewJob("bob")
118 | 	err = j.Run(successFunc)
119 | 	assert.NoError(t, err)
120 | 
121 | 	assert.Equal(t, "Complete", ts.LastEmitKind)
122 | 	assert.Equal(t, "bob", ts.LastJob)
123 | 	assert.Equal(t, Success, ts.LastStatus)
124 | }
125 | 


--------------------------------------------------------------------------------
/writer_sink.go:
--------------------------------------------------------------------------------
  1 | package health
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"fmt"
  6 | 	"io"
  7 | 	"sort"
  8 | 	"time"
  9 | )
 10 | 
 11 | // This sink writes bytes in a format that a human might like to read in a logfile
 12 | // This can be used to log to Stdout:
 13 | //   .AddSink(&WriterSink{os.Stdout})
 14 | // And to a file:
 15 | //   f, err := os.OpenFile(fname, os.O_RDWR|os.O_CREATE|os.O_APPEND, 0666)
 16 | //   .AddSink(&WriterSink{f})
 17 | // And to syslog:
 18 | //   w, err := syslog.New(LOG_INFO, "wat")
 19 | //   .AddSink(&WriterSink{w})
 20 | type WriterSink struct {
 21 | 	io.Writer
 22 | }
 23 | 
 24 | func (s *WriterSink) EmitEvent(job string, event string, kvs map[string]string) {
 25 | 	var b bytes.Buffer
 26 | 	b.WriteRune('[')
 27 | 	b.WriteString(timestamp())
 28 | 	b.WriteString("]: job:")
 29 | 	b.WriteString(job)
 30 | 	b.WriteString(" event:")
 31 | 	b.WriteString(event)
 32 | 	writeMapConsistently(&b, kvs)
 33 | 	b.WriteRune('\n')
 34 | 	s.Writer.Write(b.Bytes())
 35 | }
 36 | 
 37 | func (s *WriterSink) EmitEventErr(job string, event string, inputErr error, kvs map[string]string) {
 38 | 	var b bytes.Buffer
 39 | 	b.WriteRune('[')
 40 | 	b.WriteString(timestamp())
 41 | 	b.WriteString("]: job:")
 42 | 	b.WriteString(job)
 43 | 	b.WriteString(" event:")
 44 | 	b.WriteString(event)
 45 | 	b.WriteString(" err:")
 46 | 	b.WriteString(inputErr.Error())
 47 | 	writeMapConsistently(&b, kvs)
 48 | 	b.WriteRune('\n')
 49 | 	s.Writer.Write(b.Bytes())
 50 | }
 51 | 
 52 | func (s *WriterSink) EmitTiming(job string, event string, nanos int64, kvs map[string]string) {
 53 | 	var b bytes.Buffer
 54 | 	b.WriteRune('[')
 55 | 	b.WriteString(timestamp())
 56 | 	b.WriteString("]: job:")
 57 | 	b.WriteString(job)
 58 | 	b.WriteString(" event:")
 59 | 	b.WriteString(event)
 60 | 	b.WriteString(" time:")
 61 | 	writeNanoseconds(&b, nanos)
 62 | 	writeMapConsistently(&b, kvs)
 63 | 	b.WriteRune('\n')
 64 | 	s.Writer.Write(b.Bytes())
 65 | }
 66 | 
 67 | func (s *WriterSink) EmitGauge(job string, event string, value float64, kvs map[string]string) {
 68 | 	var b bytes.Buffer
 69 | 	b.WriteRune('[')
 70 | 	b.WriteString(timestamp())
 71 | 	b.WriteString("]: job:")
 72 | 	b.WriteString(job)
 73 | 	b.WriteString(" event:")
 74 | 	b.WriteString(event)
 75 | 	b.WriteString(" gauge:")
 76 | 	fmt.Fprintf(&b, "%g", value)
 77 | 	writeMapConsistently(&b, kvs)
 78 | 	b.WriteRune('\n')
 79 | 	s.Writer.Write(b.Bytes())
 80 | }
 81 | 
 82 | func (s *WriterSink) EmitComplete(job string, status CompletionStatus, nanos int64, kvs map[string]string) {
 83 | 	var b bytes.Buffer
 84 | 	b.WriteRune('[')
 85 | 	b.WriteString(timestamp())
 86 | 	b.WriteString("]: job:")
 87 | 	b.WriteString(job)
 88 | 	b.WriteString(" status:")
 89 | 	b.WriteString(status.String())
 90 | 	b.WriteString(" time:")
 91 | 	writeNanoseconds(&b, nanos)
 92 | 	writeMapConsistently(&b, kvs)
 93 | 	b.WriteRune('\n')
 94 | 	s.Writer.Write(b.Bytes())
 95 | }
 96 | 
 97 | func timestamp() string {
 98 | 	return time.Now().UTC().Format(time.RFC3339Nano)
 99 | }
100 | 
101 | func writeMapConsistently(b *bytes.Buffer, kvs map[string]string) {
102 | 	if kvs == nil {
103 | 		return
104 | 	}
105 | 	keys := make([]string, 0, len(kvs))
106 | 	for k := range kvs {
107 | 		keys = append(keys, k)
108 | 	}
109 | 	sort.Strings(keys)
110 | 	keysLenMinusOne := len(keys) - 1
111 | 
112 | 	b.WriteString(" kvs:[")
113 | 	for i, k := range keys {
114 | 		b.WriteString(k)
115 | 		b.WriteRune(':')
116 | 		b.WriteString(kvs[k])
117 | 
118 | 		if i != keysLenMinusOne {
119 | 			b.WriteRune(' ')
120 | 		}
121 | 	}
122 | 	b.WriteRune(']')
123 | }
124 | 
125 | func writeNanoseconds(b *bytes.Buffer, nanos int64) {
126 | 	switch {
127 | 	case nanos > 2000000:
128 | 		fmt.Fprintf(b, "%d ms", nanos/1000000)
129 | 	case nanos > 2000:
130 | 		fmt.Fprintf(b, "%d μs", nanos/1000)
131 | 	default:
132 | 		fmt.Fprintf(b, "%d ns", nanos)
133 | 	}
134 | }
135 | 


--------------------------------------------------------------------------------
/json_writer_sink_test.go:
--------------------------------------------------------------------------------
  1 | package health
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"encoding/json"
  6 | 	"errors"
  7 | 	"testing"
  8 | 
  9 | 	"github.com/stretchr/testify/assert"
 10 | )
 11 | 
 12 | type testJsonEvent struct {
 13 | 	Job         string
 14 | 	Event       string
 15 | 	Timestamp   string
 16 | 	Err         string
 17 | 	Nanoseconds int64
 18 | 	Value       float64
 19 | 	Status      string
 20 | 	Kvs         map[string]string
 21 | }
 22 | 
 23 | func TestJsonWriterSinkEvent(t *testing.T) {
 24 | 	var buf bytes.Buffer
 25 | 	someKvs := map[string]string{"foo": "bar", "qux": "dog"}
 26 | 	sink := JsonWriterSink{&buf}
 27 | 	sink.EmitEvent("myjob", "myevent", someKvs)
 28 | 
 29 | 	dec := json.NewDecoder(&buf)
 30 | 	event := &testJsonEvent{}
 31 | 	err := dec.Decode(event)
 32 | 
 33 | 	assert.NoError(t, err)
 34 | 	assert.Equal(t, "bar", event.Kvs["foo"])
 35 | 	assert.Equal(t, "myjob", event.Job)
 36 | 	assert.Equal(t, "myevent", event.Event)
 37 | }
 38 | 
 39 | func TestJsonWriterSinkEventErr(t *testing.T) {
 40 | 	var buf bytes.Buffer
 41 | 	sink := JsonWriterSink{&buf}
 42 | 	someKvs := map[string]string{"foo": "bar", "qux": "dog"}
 43 | 	sink.EmitEventErr("myjob", "myevent", errors.New("test err"), someKvs)
 44 | 
 45 | 	dec := json.NewDecoder(&buf)
 46 | 	event := &testJsonEvent{}
 47 | 	err := dec.Decode(event)
 48 | 
 49 | 	assert.NoError(t, err)
 50 | 	assert.Equal(t, "bar", event.Kvs["foo"])
 51 | 	assert.Equal(t, "myjob", event.Job)
 52 | 	assert.Equal(t, "myevent", event.Event)
 53 | 	assert.Equal(t, "test err", event.Err)
 54 | }
 55 | 
 56 | func TestJsonWriterSinkEventTiming(t *testing.T) {
 57 | 	var buf bytes.Buffer
 58 | 	sink := JsonWriterSink{&buf}
 59 | 	someKvs := map[string]string{"foo": "bar", "qux": "dog"}
 60 | 	sink.EmitTiming("myjob", "myevent", 34567890, someKvs)
 61 | 
 62 | 	event := &testJsonEvent{}
 63 | 	dec := json.NewDecoder(&buf)
 64 | 	err := dec.Decode(event)
 65 | 
 66 | 	assert.NoError(t, err)
 67 | 	assert.Equal(t, "bar", event.Kvs["foo"])
 68 | 	assert.Equal(t, "myjob", event.Job)
 69 | 	assert.Equal(t, "myevent", event.Event)
 70 | 	assert.EqualValues(t, 34567890, event.Nanoseconds)
 71 | }
 72 | 
 73 | func TestJsonWriterSinkEventGauge(t *testing.T) {
 74 | 	var buf bytes.Buffer
 75 | 	sink := JsonWriterSink{&buf}
 76 | 	someKvs := map[string]string{"foo": "bar", "qux": "dog"}
 77 | 	sink.EmitGauge("myjob", "myevent", 3.14, someKvs)
 78 | 
 79 | 	event := &testJsonEvent{}
 80 | 	dec := json.NewDecoder(&buf)
 81 | 	err := dec.Decode(event)
 82 | 
 83 | 	assert.NoError(t, err)
 84 | 	assert.Equal(t, "bar", event.Kvs["foo"])
 85 | 	assert.Equal(t, "myjob", event.Job)
 86 | 	assert.Equal(t, "myevent", event.Event)
 87 | 	assert.EqualValues(t, 3.14, event.Value)
 88 | }
 89 | 
 90 | func TestJsonWriterSinkEventComplete(t *testing.T) {
 91 | 	var buf bytes.Buffer
 92 | 	dec := json.NewDecoder(&buf)
 93 | 	for kind, kindStr := range completionStatusToString {
 94 | 		sink := JsonWriterSink{&buf}
 95 | 		sink.EmitComplete("myjob", kind, 1204000, nil)
 96 | 
 97 | 		event := &testJsonEvent{}
 98 | 		err := dec.Decode(event)
 99 | 
100 | 		assert.NoError(t, err)
101 | 
102 | 		assert.Equal(t, "myjob", event.Job)
103 | 		assert.Equal(t, kindStr, event.Status)
104 | 		assert.EqualValues(t, 1204000, event.Nanoseconds)
105 | 		buf.Reset()
106 | 	}
107 | }
108 | 
109 | func BenchmarkJsonWriterSinkEmitBlankEvent(b *testing.B) {
110 | 	var buf bytes.Buffer
111 | 	sink := JsonWriterSink{&buf}
112 | 	b.ResetTimer()
113 | 	for i := 0; i < b.N; i++ {
114 | 		buf.Reset()
115 | 		sink.EmitEvent("myjob", "myevent", nil)
116 | 	}
117 | 	b.ReportAllocs()
118 | }
119 | 
120 | func BenchmarkJsonWriterSinkEmitSmallEvent(b *testing.B) {
121 | 	var buf bytes.Buffer
122 | 	someKvs := map[string]string{"foo": "bar", "qux": "dog"}
123 | 	sink := JsonWriterSink{&buf}
124 | 	b.ResetTimer()
125 | 	for i := 0; i < b.N; i++ {
126 | 		buf.Reset()
127 | 		sink.EmitEvent("myjob", "myevent", someKvs)
128 | 	}
129 | 	b.ReportAllocs()
130 | }
131 | 


--------------------------------------------------------------------------------
/healthd/healthd_test.go:
--------------------------------------------------------------------------------
  1 | package healthd
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"net/http"
  6 | 	"net/http/httptest"
  7 | 	"testing"
  8 | 	"time"
  9 | 
 10 | 	"github.com/gocraft/health"
 11 | 	"github.com/stretchr/testify/assert"
 12 | )
 13 | 
 14 | func TestHealthD(t *testing.T) {
 15 | 	// Make two sinks:
 16 | 	sink := health.NewJsonPollingSink(time.Minute, time.Minute*5)
 17 | 	sink.StartServer(":6050")
 18 | 	sink.EmitEvent("foo", "bar", nil)
 19 | 	sink.EmitTiming("foo", "baz", 1234, nil)
 20 | 	sink.EmitComplete("foo", health.Success, 5678, nil)
 21 | 
 22 | 	sink2 := health.NewJsonPollingSink(time.Minute, time.Minute*5)
 23 | 	sink2.StartServer(":6051")
 24 | 	sink2.EmitEvent("foo", "bar", nil)
 25 | 	sink2.EmitTiming("foo", "baz", 4321, nil)
 26 | 	sink2.EmitComplete("foo", health.ValidationError, 8765, nil)
 27 | 
 28 | 	hd := StartNewHealthD([]string{":6050", ":6051"}, ":6060", health.NewStream())
 29 | 
 30 | 	defer func() {
 31 | 		hd.Stop()
 32 | 		time.Sleep(time.Millisecond)
 33 | 	}()
 34 | 
 35 | 	time.Sleep(time.Millisecond * 15)
 36 | 
 37 | 	testAggregations(t, hd)
 38 | 	testAggregationsOverall(t, hd)
 39 | 	testJobs(t, hd)
 40 | 	testHosts(t, hd)
 41 | 
 42 | }
 43 | 
 44 | func testAggregations(t *testing.T, hd *HealthD) {
 45 | 	recorder := httptest.NewRecorder()
 46 | 	request, _ := http.NewRequest("GET", "/healthd/aggregations", nil)
 47 | 	hd.apiRouter().ServeHTTP(recorder, request)
 48 | 	assert.Equal(t, 200, recorder.Code)
 49 | 
 50 | 	var resp ApiResponseAggregations
 51 | 	err := json.Unmarshal(recorder.Body.Bytes(), &resp)
 52 | 
 53 | 	assert.NoError(t, err)
 54 | 	assert.Equal(t, len(resp.Aggregations), 1)
 55 | 	assertFooBarAggregation(t, resp.Aggregations[0])
 56 | }
 57 | 
 58 | func testAggregationsOverall(t *testing.T, hd *HealthD) {
 59 | 	recorder := httptest.NewRecorder()
 60 | 	request, _ := http.NewRequest("GET", "/healthd/aggregations/overall", nil)
 61 | 	hd.apiRouter().ServeHTTP(recorder, request)
 62 | 	assert.Equal(t, 200, recorder.Code)
 63 | 
 64 | 	var resp ApiResponseAggregationsOverall
 65 | 	err := json.Unmarshal(recorder.Body.Bytes(), &resp)
 66 | 
 67 | 	assert.NoError(t, err)
 68 | 	assert.NotNil(t, resp.Overall)
 69 | 	assertFooBarAggregation(t, resp.Overall)
 70 | }
 71 | 
 72 | func testJobs(t *testing.T, hd *HealthD) {
 73 | 	recorder := httptest.NewRecorder()
 74 | 	request, _ := http.NewRequest("GET", "/healthd/jobs", nil)
 75 | 	hd.apiRouter().ServeHTTP(recorder, request)
 76 | 	assert.Equal(t, 200, recorder.Code)
 77 | 
 78 | 	var resp ApiResponseJobs
 79 | 	err := json.Unmarshal(recorder.Body.Bytes(), &resp)
 80 | 
 81 | 	assert.NoError(t, err)
 82 | 	assert.Equal(t, len(resp.Jobs), 1)
 83 | 	job := resp.Jobs[0]
 84 | 	assert.Equal(t, job.Name, "foo")
 85 | 	assert.EqualValues(t, job.Count, 2)
 86 | 	assert.EqualValues(t, job.CountSuccess, 1)
 87 | 	assert.EqualValues(t, job.CountValidationError, 1)
 88 | 	assert.EqualValues(t, job.CountError, 0)
 89 | 	assert.EqualValues(t, job.CountPanic, 0)
 90 | 	assert.EqualValues(t, job.CountJunk, 0)
 91 | 	assert.EqualValues(t, job.NanosSum, 14443)
 92 | 	assert.EqualValues(t, job.NanosMin, 5678)
 93 | 	assert.EqualValues(t, job.NanosMax, 8765)
 94 | 	assert.InDelta(t, job.NanosAvg, 7221.5, 0.01)
 95 | 	assert.InDelta(t, job.NanosSumSquares, 1.09064909e+08, 0.01)
 96 | 	assert.InDelta(t, job.NanosStdDev, 2182.8386, 0.01)
 97 | }
 98 | 
 99 | func testHosts(t *testing.T, hd *HealthD) {
100 | 	recorder := httptest.NewRecorder()
101 | 	request, _ := http.NewRequest("GET", "/healthd/hosts", nil)
102 | 	hd.apiRouter().ServeHTTP(recorder, request)
103 | 	assert.Equal(t, 200, recorder.Code)
104 | 
105 | 	var resp ApiResponseHosts
106 | 	err := json.Unmarshal(recorder.Body.Bytes(), &resp)
107 | 
108 | 	assert.NoError(t, err)
109 | 	assert.Equal(t, len(resp.Hosts), 2)
110 | 	assert.Equal(t, resp.Hosts[0].HostPort, ":6050")
111 | 	assert.Equal(t, resp.Hosts[1].HostPort, ":6051")
112 | 
113 | 	for _, hs := range resp.Hosts {
114 | 		assert.WithinDuration(t, hs.LastCheckTime, time.Now(), time.Second*2)
115 | 		assert.WithinDuration(t, hs.FirstSuccessfulResponse, time.Now(), time.Second*2)
116 | 		assert.WithinDuration(t, hs.LastSuccessfulResponse, time.Now(), time.Second*2)
117 | 		assert.EqualValues(t, hs.LastInstanceId, health.Identifier)
118 | 		assert.EqualValues(t, hs.LastIntervalDuration, time.Minute)
119 | 		assert.EqualValues(t, hs.LastCode, 200)
120 | 		assert.Equal(t, hs.LastErr, "")
121 | 	}
122 | }
123 | 
124 | // assertFooBarAggregation asserts that intAgg is the aggregation (generally) of the stuff created in TestHealthD
125 | func assertFooBarAggregation(t *testing.T, intAgg *health.IntervalAggregation) {
126 | 	assert.EqualValues(t, intAgg.Events["bar"], 2)
127 | 	assert.EqualValues(t, intAgg.Timers["baz"].Count, 2)
128 | 
129 | 	job := intAgg.Jobs["foo"]
130 | 	assert.NotNil(t, job)
131 | 	assert.EqualValues(t, job.Count, 2)
132 | 	assert.EqualValues(t, job.CountSuccess, 1)
133 | 	assert.EqualValues(t, job.CountValidationError, 1)
134 | }
135 | 


--------------------------------------------------------------------------------
/interval_aggregation_clone_test.go:
--------------------------------------------------------------------------------
  1 | package health
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"testing"
  6 | 	"time"
  7 | 
  8 | 	"github.com/stretchr/testify/assert"
  9 | )
 10 | 
 11 | type eventErr struct {
 12 | 	event string
 13 | 	err   error
 14 | }
 15 | 
 16 | func TestClone(t *testing.T) {
 17 | 	setNowMock("2011-09-09T23:36:13Z")
 18 | 	defer resetNowMock()
 19 | 
 20 | 	a := aggregatorWithData()
 21 | 	intAgg := a.intervalAggregations[0]
 22 | 	assertAggregationData(t, intAgg)
 23 | 	clonedAgg := intAgg.Clone()
 24 | 	assertAggregationData(t, clonedAgg)
 25 | 
 26 | 	// Let's add some data to intAgg and make sure it doesn't propagate to clonedAgg
 27 | 	a.EmitEvent("foo", "bar")
 28 | 	a.EmitTiming("foo", "bar", 100)
 29 | 	a.EmitEventErr("foo", "bar", fmt.Errorf("hi"))
 30 | 	a.EmitGauge("foo", "bar", 3.14)
 31 | 	a.EmitComplete("foo", Error, 99)
 32 | 
 33 | 	assert.Equal(t, 301, len(intAgg.Jobs))
 34 | 
 35 | 	assertAggregationData(t, clonedAgg)
 36 | }
 37 | 
 38 | func BenchmarkClone(b *testing.B) {
 39 | 	setNowMock("2011-09-09T23:36:13Z")
 40 | 	defer resetNowMock()
 41 | 
 42 | 	a := aggregatorWithData()
 43 | 	intAgg := a.intervalAggregations[0]
 44 | 
 45 | 	b.ResetTimer()
 46 | 	for i := 0; i < b.N; i++ {
 47 | 		intAgg.Clone()
 48 | 	}
 49 | }
 50 | 
 51 | func assertAggregationData(t *testing.T, intAgg *IntervalAggregation) {
 52 | 	assert.Equal(t, 300, len(intAgg.Jobs))
 53 | 	assert.Equal(t, 1200, len(intAgg.Events))
 54 | 	assert.Equal(t, 1200, len(intAgg.Timers))
 55 | 	assert.Equal(t, 1200, len(intAgg.Gauges))
 56 | 	assert.Equal(t, 1200, len(intAgg.EventErrs))
 57 | 
 58 | 	// Spot-check events:
 59 | 	assert.EqualValues(t, 1, intAgg.Events["event0"])
 60 | 
 61 | 	// Spot check gauges:
 62 | 	assert.EqualValues(t, 3.14, intAgg.Gauges["gauge0"])
 63 | 
 64 | 	// Spot-check timings:
 65 | 	assert.EqualValues(t, 1, intAgg.Timers["timing0"].Count)
 66 | 	assert.EqualValues(t, 12, intAgg.Timers["timing0"].NanosSum)
 67 | 
 68 | 	// Spot-check event-errs:
 69 | 	assert.EqualValues(t, 1, intAgg.EventErrs["err0"].Count)
 70 | 	assert.Equal(t, []error{fmt.Errorf("wat")}, intAgg.EventErrs["err0"].getErrorSamples())
 71 | 
 72 | 	// Spot-check jobs:
 73 | 	job := intAgg.Jobs["job0"]
 74 | 	assert.EqualValues(t, 1, job.CountSuccess)
 75 | 	assert.EqualValues(t, 0, job.CountError)
 76 | 	assert.EqualValues(t, 1, job.Events["event0"])
 77 | 	assert.EqualValues(t, 0, job.Events["event4"])
 78 | 	assert.EqualValues(t, 3.14, job.Gauges["gauge0"])
 79 | 	assert.EqualValues(t, 0.0, job.Gauges["gauge4"])
 80 | 	assert.EqualValues(t, 1, job.Timers["timing0"].Count)
 81 | 	assert.EqualValues(t, 12, job.Timers["timing0"].NanosSum)
 82 | 	assert.EqualValues(t, 1, job.EventErrs["err0"].Count)
 83 | 	assert.Equal(t, []error{fmt.Errorf("wat")}, job.EventErrs["err0"].getErrorSamples())
 84 | 
 85 | 	// Nothing foo or bar related
 86 | 	_, ok := intAgg.Jobs["foo"]
 87 | 	assert.False(t, ok)
 88 | 	assert.EqualValues(t, 0, intAgg.Events["bar"])
 89 | 	assert.Nil(t, intAgg.Timers["bar"])
 90 | 	assert.Nil(t, intAgg.EventErrs["bar"])
 91 | 
 92 | }
 93 | 
 94 | func aggregatorWithData() *aggregator {
 95 | 	a := newAggregator(time.Minute, time.Minute*5)
 96 | 
 97 | 	// We want 300 jobs
 98 | 	// Each job will have 5 events, but we want 1200 events total
 99 | 	// Each job will have 5 timers, but we want 1200 timers total
100 | 	// Each job will have 5 gauges, but we want 1200 gauges total
101 | 	// Each job will have 5 errs, but we want 1200 errs total
102 | 	// Given this 300/1200 dichotomy,
103 | 	//  - the first job will have 4 events, the next job 4 events, etc.
104 | 
105 | 	jobs := []string{}
106 | 	for i := 0; i < 300; i++ {
107 | 		jobs = append(jobs, fmt.Sprintf("job%d", i))
108 | 	}
109 | 
110 | 	events := []string{}
111 | 	for i := 0; i < 1200; i++ {
112 | 		events = append(events, fmt.Sprintf("event%d", i))
113 | 	}
114 | 
115 | 	timings := []string{}
116 | 	for i := 0; i < 1200; i++ {
117 | 		timings = append(timings, fmt.Sprintf("timing%d", i))
118 | 	}
119 | 
120 | 	gauges := []string{}
121 | 	for i := 0; i < 1200; i++ {
122 | 		gauges = append(gauges, fmt.Sprintf("gauge%d", i))
123 | 	}
124 | 
125 | 	eventErrs := []eventErr{}
126 | 	for i := 0; i < 1200; i++ {
127 | 		eventErrs = append(eventErrs, eventErr{
128 | 			event: fmt.Sprintf("err%d", i),
129 | 			err:   fmt.Errorf("wat"),
130 | 		})
131 | 	}
132 | 
133 | 	cur := 0
134 | 	for _, j := range jobs {
135 | 		for i := 0; i < 4; i++ {
136 | 			a.EmitEvent(j, events[cur])
137 | 			cur++
138 | 		}
139 | 	}
140 | 
141 | 	cur = 0
142 | 	for _, j := range jobs {
143 | 		for i := 0; i < 4; i++ {
144 | 			a.EmitEventErr(j, eventErrs[cur].event, eventErrs[cur].err)
145 | 			cur++
146 | 		}
147 | 	}
148 | 
149 | 	cur = 0
150 | 	for _, j := range jobs {
151 | 		for i := 0; i < 4; i++ {
152 | 			a.EmitTiming(j, timings[cur], 12)
153 | 			cur++
154 | 		}
155 | 	}
156 | 
157 | 	cur = 0
158 | 	for _, j := range jobs {
159 | 		for i := 0; i < 4; i++ {
160 | 			a.EmitGauge(j, gauges[cur], 3.14)
161 | 			cur++
162 | 		}
163 | 	}
164 | 
165 | 	for _, j := range jobs {
166 | 		a.EmitComplete(j, Success, 12)
167 | 	}
168 | 
169 | 	return a
170 | }
171 | 


--------------------------------------------------------------------------------
/sinks/bugsnag/api.go:
--------------------------------------------------------------------------------
  1 | package bugsnag
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"encoding/json"
  6 | 	"fmt"
  7 | 	"github.com/gocraft/health/stack"
  8 | 	"io/ioutil"
  9 | 	"net/http"
 10 | )
 11 | 
 12 | type Config struct {
 13 | 	// Your Bugsnag API key, e.g. "c9d60ae4c7e70c4b6c4ebd3e8056d2b8". You can
 14 | 	// find this by clicking Settings on https://bugsnag.com/.
 15 | 	APIKey string
 16 | 
 17 | 	// The Endpoint to notify about crashes. This defaults to
 18 | 	// "https://notify.bugsnag.com/", if you're using Bugsnag Enterprise then
 19 | 	// set it to your internal Bugsnag endpoint.
 20 | 	Endpoint string
 21 | 
 22 | 	// The current release stage. This defaults to "production" and is used to
 23 | 	// filter errors in the Bugsnag dashboard.
 24 | 	ReleaseStage string
 25 | 
 26 | 	// The currently running version of the app. This is used to filter errors
 27 | 	// in the Bugsnag dasboard. If you set this then Bugsnag will only re-open
 28 | 	// resolved errors if they happen in different app versions.
 29 | 	AppVersion string
 30 | 
 31 | 	// The hostname of the current server. This defaults to the return value of
 32 | 	// os.Hostname() and is graphed in the Bugsnag dashboard.
 33 | 	Hostname string
 34 | }
 35 | 
 36 | type payload struct {
 37 | 	APIKey string `json:"apiKey"`
 38 | 
 39 | 	Notifier struct {
 40 | 		Name    string `json:"name"`
 41 | 		Version string `json:"version"`
 42 | 		URL     string `json:"url"`
 43 | 	} `json:"notifier"`
 44 | 
 45 | 	Events []payloadEvent `json:"events"`
 46 | }
 47 | 
 48 | type payloadEvent struct {
 49 | 	PayloadVersion string             `json:"payloadVersion"`
 50 | 	Exceptions     []payloadException `json:"exceptions"`
 51 | 
 52 | 	// threads
 53 | 
 54 | 	Context string `json:"context"`
 55 | 
 56 | 	// groupingHash
 57 | 	// severity
 58 | 	// user
 59 | 
 60 | 	App struct {
 61 | 		// version
 62 | 		ReleaseStage string `json:"releaseStage"`
 63 | 	} `json:"app"`
 64 | 
 65 | 	Device struct {
 66 | 		//osVersion
 67 | 		Hostname string `json:"hostname"`
 68 | 	} `json:"device"`
 69 | 
 70 | 	// meta data
 71 | 
 72 | 	Metadata struct {
 73 | 		Request request           `json:"request"`
 74 | 		Kvs     map[string]string `json:"kvs"`
 75 | 	} `json:"metaData"`
 76 | }
 77 | 
 78 | type payloadException struct {
 79 | 	ErrorClass string         `json:"errorClass"`
 80 | 	Message    string         `json:"message"`
 81 | 	Stacktrace []payloadFrame `json:"stacktrace"`
 82 | }
 83 | 
 84 | type payloadFrame struct {
 85 | 	File       string `json:"file"`
 86 | 	LineNumber int    `json:"lineNumber"`
 87 | 	Method     string `json:"method"`
 88 | 	InProject  bool   `json:"inProject"`
 89 | 	//code
 90 | }
 91 | 
 92 | type request struct {
 93 | 	Url        string `json:"url"`
 94 | 	Parameters string `json:"parameters"`
 95 | }
 96 | 
 97 | // Notify will send the error and stack trace to Bugsnag. Note that this doesn't take advantage of all of Bugsnag's capabilities.
 98 | func Notify(config *Config, jobName string, eventName string, err error, trace *stack.Trace, kvs map[string]string) error {
 99 | 
100 | 	// Make a struct that serializes to the JSON needed for the API request to bugsnag
101 | 	p := newPayload(config, jobName, eventName, err, trace, kvs)
102 | 
103 | 	// JSON serialize it
104 | 	data, err := json.MarshalIndent(p, "", "\t")
105 | 	if err != nil {
106 | 		return err
107 | 	}
108 | 
109 | 	// Post it to the server:
110 | 	client := http.Client{}
111 | 	resp, err := client.Post(config.Endpoint, "application/json", bytes.NewBuffer(data))
112 | 	if err != nil {
113 | 		return err
114 | 	}
115 | 	body, err := ioutil.ReadAll(resp.Body)
116 | 	if err != nil {
117 | 		return err
118 | 	}
119 | 	if string(body) != "OK" {
120 | 		return fmt.Errorf("response from bugsnag wasn't 'OK'")
121 | 	}
122 | 
123 | 	return nil
124 | }
125 | 
126 | func newPayload(config *Config, jobName string, eventName string, err error, trace *stack.Trace, kvs map[string]string) *payload {
127 | 	except := payloadException{
128 | 		ErrorClass: eventName,
129 | 		Message:    err.Error(),
130 | 	}
131 | 	for _, frame := range trace.Frames() {
132 | 		pf := payloadFrame{
133 | 			File:       frame.File,
134 | 			LineNumber: frame.LineNumber,
135 | 			Method:     frame.Package + ":" + frame.Name,
136 | 			InProject:  !frame.IsSystemPackage,
137 | 		}
138 | 		except.Stacktrace = append(except.Stacktrace, pf)
139 | 	}
140 | 
141 | 	evt := payloadEvent{
142 | 		PayloadVersion: "2",
143 | 		Exceptions:     []payloadException{except},
144 | 		Context:        jobName,
145 | 	}
146 | 	evt.App.ReleaseStage = config.ReleaseStage
147 | 	evt.Device.Hostname = config.Hostname
148 | 	evt.Metadata.Kvs = kvs
149 | 
150 | 	if requestUrl, requestUrlExists := kvs["request"]; requestUrlExists {
151 | 		evt.Metadata.Request.Url = requestUrl
152 | 	}
153 | 
154 | 	if formData, formDataExists := kvs["formdata"]; formDataExists {
155 | 		evt.Metadata.Request.Parameters = formData
156 | 	}
157 | 
158 | 	p := payload{
159 | 		APIKey: config.APIKey,
160 | 		Events: []payloadEvent{evt},
161 | 	}
162 | 	p.Notifier.Name = "health"
163 | 	p.Notifier.Version = "1.0"
164 | 	p.Notifier.URL = "https://www.github.com/gocraft/health"
165 | 
166 | 	return &p
167 | }
168 | 


--------------------------------------------------------------------------------
/interval_aggregation.go:
--------------------------------------------------------------------------------
  1 | package health
  2 | 
  3 | import (
  4 | 	"reflect"
  5 | 	"time"
  6 | )
  7 | 
  8 | // IntervalAggregation will hold data for a given aggregation interval.
  9 | type IntervalAggregation struct {
 10 | 	// The start time of the interval
 11 | 	IntervalStart time.Time `json:"interval_start"`
 12 | 
 13 | 	// SerialNumber increments every time the aggregation changes. It does not increment if the aggregation does not change.
 14 | 	SerialNumber int64 `json:"serial_number"`
 15 | 
 16 | 	// Jobs hold a map of job name -> data about that job.
 17 | 	// This includes both primary-job information (success vs error, et all) as well as
 18 | 	// scoping timers/counters by the job.
 19 | 	Jobs map[string]*JobAggregation `json:"jobs"`
 20 | 
 21 | 	// aggregationMaps will hold event/timer information that is not nested per-job.
 22 | 	aggregationMaps
 23 | }
 24 | 
 25 | type aggregationMaps struct {
 26 | 	Timers    map[string]*TimerAggregation `json:"timers"`
 27 | 	Gauges    map[string]float64           `json:"gauges"`
 28 | 	Events    map[string]int64             `json:"events"`
 29 | 	EventErrs map[string]*ErrorCounter     `json:"event_errs"`
 30 | }
 31 | 
 32 | type JobAggregation struct {
 33 | 	aggregationMaps
 34 | 	TimerAggregation
 35 | 
 36 | 	CountSuccess         int64 `json:"count_success"`
 37 | 	CountValidationError int64 `json:"count_validation_error"`
 38 | 	CountPanic           int64 `json:"count_panic"`
 39 | 	CountError           int64 `json:"count_error"`
 40 | 	CountJunk            int64 `json:"count_junk"`
 41 | }
 42 | 
 43 | type TimerAggregation struct {
 44 | 	Count           int64   `json:"count"`
 45 | 	NanosSum        int64   `json:"nanos_sum"`
 46 | 	NanosSumSquares float64 `json:"nanos_sum_squares"` // 3seconds^2 overflows an int64
 47 | 	NanosMin        int64   `json:"nanos_min"`
 48 | 	NanosMax        int64   `json:"nanos_max"`
 49 | }
 50 | 
 51 | type ErrorCounter struct {
 52 | 	Count int64 `json:"count"`
 53 | 
 54 | 	// Let's keep a ring buffer of some errors. I feel like this isn't the best data structure / plan of attack here but works for now.
 55 | 	errorSamples     [5]error
 56 | 	errorSampleIndex int
 57 | }
 58 | 
 59 | func NewIntervalAggregation(intervalStart time.Time) *IntervalAggregation {
 60 | 	intAgg := &IntervalAggregation{
 61 | 		IntervalStart: intervalStart,
 62 | 		Jobs:          make(map[string]*JobAggregation),
 63 | 	}
 64 | 	intAgg.initAggregationMaps()
 65 | 
 66 | 	return intAgg
 67 | }
 68 | 
 69 | func (am *aggregationMaps) initAggregationMaps() {
 70 | 	am.Timers = make(map[string]*TimerAggregation)
 71 | 	am.Gauges = make(map[string]float64)
 72 | 	am.Events = make(map[string]int64)
 73 | 	am.EventErrs = make(map[string]*ErrorCounter)
 74 | }
 75 | 
 76 | func (am *aggregationMaps) getCounterErrs(event string) *ErrorCounter {
 77 | 	ce := am.EventErrs[event]
 78 | 	if ce == nil {
 79 | 		ce = &ErrorCounter{}
 80 | 		am.EventErrs[event] = ce
 81 | 	}
 82 | 	return ce
 83 | }
 84 | 
 85 | func (am *aggregationMaps) getTimers(event string) *TimerAggregation {
 86 | 	t := am.Timers[event]
 87 | 	if t == nil {
 88 | 		t = &TimerAggregation{}
 89 | 		am.Timers[event] = t
 90 | 	}
 91 | 	return t
 92 | }
 93 | 
 94 | func (ec *ErrorCounter) incrementAndAddError(inputErr error) {
 95 | 	ec.Count++
 96 | 	ec.addError(inputErr)
 97 | }
 98 | 
 99 | func (ec *ErrorCounter) addError(inputErr error) {
100 | 	lastErr := ec.errorSamples[ec.errorSampleIndex]
101 | 	if lastErr == nil {
102 | 		ec.errorSamples[ec.errorSampleIndex] = inputErr
103 | 	} else if !reflect.DeepEqual(lastErr, inputErr) {
104 | 		n := len(ec.errorSamples)
105 | 		ec.errorSampleIndex = (ec.errorSampleIndex + 1) % n
106 | 		ec.errorSamples[ec.errorSampleIndex] = inputErr
107 | 	}
108 | }
109 | 
110 | func (ec *ErrorCounter) getErrorSamples() []error {
111 | 	// Count how many non-nil errors are there so we can make a slice of the right size
112 | 	count := 0
113 | 	for _, e := range ec.errorSamples {
114 | 		if e != nil {
115 | 			count++
116 | 		}
117 | 	}
118 | 	ret := make([]error, 0, count)
119 | 
120 | 	// Put non-nil errors in slice
121 | 	for _, e := range ec.errorSamples {
122 | 		if e != nil {
123 | 			ret = append(ret, e)
124 | 		}
125 | 	}
126 | 	return ret
127 | }
128 | 
129 | func (ia *IntervalAggregation) getJobAggregation(job string) *JobAggregation {
130 | 	jobAgg := ia.Jobs[job]
131 | 	if jobAgg == nil {
132 | 		jobAgg = &JobAggregation{}
133 | 		jobAgg.initAggregationMaps()
134 | 		ia.Jobs[job] = jobAgg
135 | 	}
136 | 	return jobAgg
137 | }
138 | 
139 | func (a *TimerAggregation) ingest(nanos int64) {
140 | 	a.Count++
141 | 	a.NanosSum += nanos
142 | 	floatNano := float64(nanos)
143 | 	a.NanosSumSquares += (floatNano * floatNano)
144 | 	if a.Count == 1 || nanos < a.NanosMin {
145 | 		a.NanosMin = nanos
146 | 	}
147 | 	if a.Count == 1 || nanos > a.NanosMax {
148 | 		a.NanosMax = nanos
149 | 	}
150 | }
151 | 
152 | func (a *JobAggregation) ingest(status CompletionStatus, nanos int64) {
153 | 	a.TimerAggregation.ingest(nanos)
154 | 	if status == Success {
155 | 		a.CountSuccess++
156 | 	} else if status == ValidationError {
157 | 		a.CountValidationError++
158 | 	} else if status == Panic {
159 | 		a.CountPanic++
160 | 	} else if status == Error {
161 | 		a.CountError++
162 | 	} else if status == Junk {
163 | 		a.CountJunk++
164 | 	}
165 | }
166 | 


--------------------------------------------------------------------------------
/aggregator.go:
--------------------------------------------------------------------------------
  1 | package health
  2 | 
  3 | import (
  4 | 	"time"
  5 | )
  6 | 
  7 | type aggregator struct {
  8 | 	// How long is each aggregation interval. Eg, 1 minute
  9 | 	intervalDuration time.Duration
 10 | 
 11 | 	// Retain controls how many metrics interval we keep. Eg, 5 minutes
 12 | 	retain time.Duration
 13 | 
 14 | 	// maxIntervals is the maximum length of intervals.
 15 | 	// It is retain / interval.
 16 | 	maxIntervals int
 17 | 
 18 | 	// intervals is a slice of the retained intervals
 19 | 	intervalAggregations []*IntervalAggregation
 20 | }
 21 | 
 22 | func startAggregator(intervalDuration time.Duration, retain time.Duration, sink *JsonPollingSink) {
 23 | 	cmdChan := sink.cmdChan
 24 | 	doneChan := sink.doneChan
 25 | 	intervalsChanChan := sink.intervalsChanChan
 26 | 	ticker := time.Tick(1 * time.Second)
 27 | 
 28 | 	agg := newAggregator(intervalDuration, retain)
 29 | 
 30 | AGGREGATE_LOOP:
 31 | 	for {
 32 | 		select {
 33 | 		case <-doneChan:
 34 | 			sink.doneDoneChan <- 1
 35 | 			break AGGREGATE_LOOP
 36 | 		case cmd := <-cmdChan:
 37 | 			if cmd.Kind == cmdKindEvent {
 38 | 				agg.EmitEvent(cmd.Job, cmd.Event)
 39 | 			} else if cmd.Kind == cmdKindEventErr {
 40 | 				agg.EmitEventErr(cmd.Job, cmd.Event, cmd.Err)
 41 | 			} else if cmd.Kind == cmdKindTiming {
 42 | 				agg.EmitTiming(cmd.Job, cmd.Event, cmd.Nanos)
 43 | 			} else if cmd.Kind == cmdKindGauge {
 44 | 				agg.EmitGauge(cmd.Job, cmd.Event, cmd.Value)
 45 | 			} else if cmd.Kind == cmdKindComplete {
 46 | 				agg.EmitComplete(cmd.Job, cmd.Status, cmd.Nanos)
 47 | 			}
 48 | 		case <-ticker:
 49 | 			agg.getIntervalAggregation() // this has the side effect of sliding the interval window if necessary.
 50 | 		case intervalsChan := <-intervalsChanChan:
 51 | 			intervalsChan <- agg.memorySafeIntervals()
 52 | 		}
 53 | 	}
 54 | }
 55 | 
 56 | func newAggregator(intervalDuration time.Duration, retain time.Duration) *aggregator {
 57 | 	maxIntervals := int(retain / intervalDuration)
 58 | 	return &aggregator{
 59 | 		intervalDuration:     intervalDuration,
 60 | 		retain:               retain,
 61 | 		maxIntervals:         maxIntervals,
 62 | 		intervalAggregations: make([]*IntervalAggregation, 0, maxIntervals),
 63 | 	}
 64 | }
 65 | 
 66 | func (a *aggregator) memorySafeIntervals() []*IntervalAggregation {
 67 | 	ret := make([]*IntervalAggregation, 0, len(a.intervalAggregations))
 68 | 	curAgg := a.getIntervalAggregation()
 69 | 
 70 | 	for _, intAgg := range a.intervalAggregations {
 71 | 		if intAgg == curAgg {
 72 | 			ret = append(ret, intAgg.Clone())
 73 | 		} else {
 74 | 			ret = append(ret, intAgg)
 75 | 		}
 76 | 	}
 77 | 
 78 | 	return ret
 79 | }
 80 | 
 81 | func (a *aggregator) EmitEvent(job string, event string) {
 82 | 	intAgg := a.getIntervalAggregation()
 83 | 	intAgg.Events[event] = intAgg.Events[event] + 1
 84 | 	jobAgg := intAgg.getJobAggregation(job)
 85 | 	jobAgg.Events[event] = jobAgg.Events[event] + 1
 86 | 	intAgg.SerialNumber++
 87 | }
 88 | 
 89 | func (a *aggregator) EmitEventErr(job string, event string, inputErr error) {
 90 | 	intAgg := a.getIntervalAggregation()
 91 | 	errc := intAgg.getCounterErrs(event)
 92 | 	errc.incrementAndAddError(inputErr)
 93 | 	jobAgg := intAgg.getJobAggregation(job)
 94 | 	jerrc := jobAgg.getCounterErrs(event)
 95 | 	jerrc.incrementAndAddError(inputErr)
 96 | 	intAgg.SerialNumber++
 97 | }
 98 | 
 99 | func (a *aggregator) EmitTiming(job string, event string, nanos int64) {
100 | 	intAgg := a.getIntervalAggregation()
101 | 	t := intAgg.getTimers(event)
102 | 	t.ingest(nanos)
103 | 	jobAgg := intAgg.getJobAggregation(job)
104 | 	jt := jobAgg.getTimers(event)
105 | 	jt.ingest(nanos)
106 | 	intAgg.SerialNumber++
107 | }
108 | 
109 | func (a *aggregator) EmitGauge(job string, event string, value float64) {
110 | 	intAgg := a.getIntervalAggregation()
111 | 	intAgg.Gauges[event] = value
112 | 	jobAgg := intAgg.getJobAggregation(job)
113 | 	jobAgg.Gauges[event] = value
114 | 	intAgg.SerialNumber++
115 | }
116 | 
117 | func (a *aggregator) EmitComplete(job string, status CompletionStatus, nanos int64) {
118 | 	intAgg := a.getIntervalAggregation()
119 | 	jobAgg := intAgg.getJobAggregation(job)
120 | 	jobAgg.ingest(status, nanos)
121 | 	intAgg.SerialNumber++
122 | }
123 | 
124 | func (a *aggregator) getIntervalAggregation() *IntervalAggregation {
125 | 	intervalStart := now().Truncate(a.intervalDuration)
126 | 
127 | 	n := len(a.intervalAggregations)
128 | 	if n > 0 && a.intervalAggregations[n-1].IntervalStart == intervalStart {
129 | 		return a.intervalAggregations[n-1]
130 | 	}
131 | 
132 | 	return a.createIntervalAggregation(intervalStart)
133 | }
134 | 
135 | func (a *aggregator) createIntervalAggregation(interval time.Time) *IntervalAggregation {
136 | 	// Make new interval:
137 | 	current := NewIntervalAggregation(interval)
138 | 
139 | 	// If we've reached our max intervals, and we're going to shift everything down, then set the last one
140 | 	n := len(a.intervalAggregations)
141 | 	if n == a.maxIntervals {
142 | 		for i := 1; i < n; i++ {
143 | 			a.intervalAggregations[i-1] = a.intervalAggregations[i]
144 | 		}
145 | 		a.intervalAggregations[n-1] = current
146 | 	} else {
147 | 		a.intervalAggregations = append(a.intervalAggregations, current)
148 | 	}
149 | 
150 | 	return current
151 | }
152 | 
153 | var nowMock time.Time
154 | 
155 | func now() time.Time {
156 | 	if nowMock.IsZero() {
157 | 		return time.Now()
158 | 	}
159 | 	return nowMock
160 | }
161 | 
162 | func setNowMock(t string) {
163 | 	var err error
164 | 	nowMock, err = time.Parse(time.RFC3339, t)
165 | 	if err != nil {
166 | 		panic(err)
167 | 	}
168 | }
169 | 
170 | func resetNowMock() {
171 | 	nowMock = time.Time{}
172 | }
173 | 


--------------------------------------------------------------------------------
/cmd/healthtop/jobs.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"fmt"
  6 | 	"github.com/buger/goterm"
  7 | 	"github.com/gocraft/health/healthd"
  8 | 	"io/ioutil"
  9 | 	"net/http"
 10 | 	"net/url"
 11 | 	"strings"
 12 | 	"time"
 13 | )
 14 | 
 15 | type jobOptions struct {
 16 | 	Sort string
 17 | 	Name string
 18 | }
 19 | 
 20 | func jobsLoop(opts *jobOptions) {
 21 | 	secondTicker := time.Tick(1 * time.Second)
 22 | 
 23 | 	var lastApiResponse *healthd.ApiResponseJobs
 24 | 	var hStatus healthdStatus
 25 | 
 26 | 	responses := make(chan *healthd.ApiResponseJobs)
 27 | 	errors := make(chan error)
 28 | 
 29 | 	go pollHealthDJobs(opts, responses, errors)
 30 | 	for {
 31 | 		select {
 32 | 		case <-secondTicker:
 33 | 			go pollHealthDJobs(opts, responses, errors)
 34 | 			printJobs(lastApiResponse, &hStatus)
 35 | 		case resp := <-responses:
 36 | 			lastApiResponse = resp
 37 | 			hStatus.lastSuccessAt = time.Now()
 38 | 			printJobs(lastApiResponse, &hStatus)
 39 | 		case err := <-errors:
 40 | 			hStatus.lastErrorAt = time.Now()
 41 | 			hStatus.lastError = err
 42 | 		}
 43 | 	}
 44 | }
 45 | 
 46 | func pollHealthDJobs(opts *jobOptions, responses chan *healthd.ApiResponseJobs, errors chan error) {
 47 | 	var body []byte
 48 | 
 49 | 	// limit. If name is not set, then limit it to the terminal height.
 50 | 	// if name IS set, then don't limit it b/c we will currently filter in-memory
 51 | 	var limit uint
 52 | 	if opts.Name == "" {
 53 | 		limit = maxRows()
 54 | 	}
 55 | 
 56 | 	values := url.Values{}
 57 | 	if opts.Sort != "" {
 58 | 		values.Add("sort", opts.Sort)
 59 | 	}
 60 | 	if limit != 0 {
 61 | 		values.Add("limit", fmt.Sprint(limit))
 62 | 	}
 63 | 
 64 | 	uri := "http://" + sourceHostPort + "/healthd/jobs"
 65 | 	params := values.Encode()
 66 | 	if params != "" {
 67 | 		uri = uri + "?" + params
 68 | 	}
 69 | 
 70 | 	resp, err := http.Get(uri)
 71 | 	if err != nil {
 72 | 		errors <- err
 73 | 		return
 74 | 	}
 75 | 	defer resp.Body.Close()
 76 | 	body, err = ioutil.ReadAll(resp.Body)
 77 | 	if err != nil {
 78 | 		errors <- err
 79 | 		return
 80 | 	}
 81 | 
 82 | 	var response healthd.ApiResponseJobs
 83 | 	if err := json.Unmarshal(body, &response); err != nil {
 84 | 		errors <- err
 85 | 		return
 86 | 	}
 87 | 
 88 | 	if opts.Name != "" {
 89 | 		filterJobsByName(&response, opts.Name)
 90 | 	}
 91 | 
 92 | 	responses <- &response
 93 | }
 94 | 
 95 | // Given the api response, remove any job entries that don't have 'name' in them.
 96 | func filterJobsByName(resp *healthd.ApiResponseJobs, name string) {
 97 | 	filteredSlice := []*healthd.Job{}
 98 | 
 99 | 	for _, job := range resp.Jobs {
100 | 		if strings.Contains(job.Name, name) {
101 | 			filteredSlice = append(filteredSlice, job)
102 | 		}
103 | 	}
104 | 
105 | 	resp.Jobs = filteredSlice
106 | }
107 | 
108 | func printJobs(lastApiResponse *healthd.ApiResponseJobs, status *healthdStatus) {
109 | 	goterm.Clear() // Clear current screen
110 | 	goterm.MoveCursor(1, 1)
111 | 	defer goterm.Flush()
112 | 	goterm.Println("Current Time:", status.FmtNow(), "   Status:", status.FmtStatus())
113 | 
114 | 	if lastApiResponse == nil {
115 | 		goterm.Println("no data yet")
116 | 		return
117 | 	}
118 | 
119 | 	columns := []string{
120 | 		"Job",
121 | 		//		"Jobs/Second", //minute? flag?
122 | 		"Total Count",
123 | 		"Success",
124 | 		"ValidationError",
125 | 		"Panic",
126 | 		"Error",
127 | 		"Junk",
128 | 		"Avg Response Time",
129 | 		"Stddev",
130 | 		"Min",
131 | 		"Max",
132 | 		"Total",
133 | 	}
134 | 
135 | 	for i, s := range columns {
136 | 		columns[i] = goterm.Bold(goterm.Color(s, goterm.BLACK))
137 | 	}
138 | 
139 | 	table := goterm.NewTable(0, goterm.Width()-1, 5, ' ', 0)
140 | 	fmt.Fprintf(table, "%s\n", strings.Join(columns, "\t"))
141 | 
142 | 	for _, job := range lastApiResponse.Jobs {
143 | 		printJob(table, job)
144 | 	}
145 | 
146 | 	goterm.Println(table)
147 | }
148 | 
149 | func printJob(table *goterm.Table, job *healthd.Job) {
150 | 	fullSuccess := job.Count == job.CountSuccess
151 | 	printCellString(job.Name, table, true, false, false)
152 | 	printCellInt64(job.Count, table, false, fullSuccess, false)
153 | 	printCellInt64(job.CountSuccess, table, fullSuccess, fullSuccess, false)
154 | 	printCellInt64(job.CountValidationError, table, job.CountValidationError > 0, false, job.CountValidationError > 0)
155 | 	printCellInt64(job.CountPanic, table, job.CountPanic > 0, false, job.CountPanic > 0)
156 | 	printCellInt64(job.CountError, table, job.CountError > 0, false, job.CountError > 0)
157 | 	printCellInt64(job.CountJunk, table, job.CountJunk > 0, false, job.CountJunk > 0)
158 | 	printCellNanos(int64(job.NanosAvg), table, true, false, false)
159 | 	printCellNanos(int64(job.NanosStdDev), table, false, false, false)
160 | 	printCellNanos(job.NanosMin, table, false, false, false)
161 | 	printCellNanos(job.NanosMax, table, false, false, false)
162 | 	printCellNanos(job.NanosSum, table, false, false, false)
163 | 	fmt.Fprintf(table, "\n")
164 | }
165 | 
166 | func printCellNanos(nanos int64, table *goterm.Table, isBold, isGreen, isRed bool) {
167 | 	var units string
168 | 	switch {
169 | 	case nanos > 2000000:
170 | 		units = "ms"
171 | 		nanos /= 1000000
172 | 	case nanos > 1000:
173 | 		units = "μs"
174 | 		nanos /= 1000
175 | 	default:
176 | 		units = "ns"
177 | 	}
178 | 
179 | 	printCellString(fmt.Sprintf("%d %s", nanos, units), table, isBold, isGreen, isRed)
180 | }
181 | 
182 | func printCellInt64(val int64, table *goterm.Table, isBold, isGreen, isRed bool) {
183 | 	printCellString(fmt.Sprint(val), table, isBold, isGreen, isRed)
184 | }
185 | 
186 | func printCellString(text string, table *goterm.Table, isBold, isGreen, isRed bool) {
187 | 	color := goterm.BLACK
188 | 	if isGreen {
189 | 		color = goterm.GREEN
190 | 	} else if isRed {
191 | 		color = goterm.RED
192 | 	}
193 | 
194 | 	fmt.Fprintf(table, "%s\t", format(text, color, isBold))
195 | }
196 | 
197 | func format(text string, color int, isBold bool) string {
198 | 	if isBold {
199 | 		return goterm.Bold(goterm.Color(text, color))
200 | 	} else {
201 | 		return normal(goterm.Color(text, color))
202 | 	}
203 | 
204 | }
205 | 
206 | func normal(text string) string {
207 | 	return fmt.Sprintf("\033[0m%s\033[0m", text)
208 | }
209 | 
210 | // Returns the max amount of metrics/rows we can display
211 | // This is the # of rows in the terminal minus 2 (for time / stats + grid header)
212 | // To elimate any weird cases where the terminal is super short, we'll return a min rows of 3
213 | func maxRows() uint {
214 | 	n := goterm.Height() - 2
215 | 	if n < 3 {
216 | 		n = 3
217 | 	}
218 | 	return uint(n)
219 | }
220 | 


--------------------------------------------------------------------------------
/health.go:
--------------------------------------------------------------------------------
  1 | package health
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"fmt"
  6 | 	"runtime"
  7 | 	"time"
  8 | )
  9 | 
 10 | // This is primarily used as syntactic sugar for libs outside this app for passing in maps easily.
 11 | // We don't rely on it internally b/c I don't want to tie interfaces to the 'health' package.
 12 | type Kvs map[string]string
 13 | 
 14 | type EventReceiver interface {
 15 | 	Event(eventName string)
 16 | 	EventKv(eventName string, kvs map[string]string)
 17 | 	EventErr(eventName string, err error) error
 18 | 	EventErrKv(eventName string, err error, kvs map[string]string) error
 19 | 	Timing(eventName string, nanoseconds int64)
 20 | 	TimingKv(eventName string, nanoseconds int64, kvs map[string]string)
 21 | 	Gauge(eventName string, value float64)
 22 | 	GaugeKv(eventName string, value float64, kvs map[string]string)
 23 | }
 24 | 
 25 | type Stream struct {
 26 | 	Sinks     []Sink
 27 | 	KeyValues map[string]string
 28 | 	*Job
 29 | }
 30 | 
 31 | type Job struct {
 32 | 	Stream    *Stream
 33 | 	JobName   string
 34 | 	KeyValues map[string]string
 35 | 	Start     time.Time
 36 | }
 37 | 
 38 | type CompletionStatus int
 39 | 
 40 | const (
 41 | 	Success CompletionStatus = iota
 42 | 	ValidationError
 43 | 	Panic
 44 | 	Error
 45 | 	Junk
 46 | )
 47 | 
 48 | var completionStatusToString = map[CompletionStatus]string{
 49 | 	Success:         "success",
 50 | 	ValidationError: "validation_error",
 51 | 	Panic:           "panic",
 52 | 	Error:           "error",
 53 | 	Junk:            "junk",
 54 | }
 55 | 
 56 | func (cs CompletionStatus) String() string {
 57 | 	return completionStatusToString[cs]
 58 | }
 59 | 
 60 | type Sink interface {
 61 | 	EmitEvent(job string, event string, kvs map[string]string)
 62 | 	EmitEventErr(job string, event string, err error, kvs map[string]string)
 63 | 	EmitTiming(job string, event string, nanoseconds int64, kvs map[string]string)
 64 | 	EmitComplete(job string, status CompletionStatus, nanoseconds int64, kvs map[string]string)
 65 | 	EmitGauge(job string, event string, value float64, kvs map[string]string)
 66 | }
 67 | 
 68 | func NewStream() *Stream {
 69 | 	s := &Stream{}
 70 | 	s.Job = s.NewJob("general")
 71 | 	return s
 72 | }
 73 | 
 74 | func (s *Stream) AddSink(sink Sink) *Stream {
 75 | 	s.Sinks = append(s.Sinks, sink)
 76 | 	return s
 77 | }
 78 | 
 79 | func (s *Stream) KeyValue(key string, value string) *Stream {
 80 | 	if s.KeyValues == nil {
 81 | 		s.KeyValues = make(map[string]string)
 82 | 	}
 83 | 	s.KeyValues[key] = value
 84 | 	return s
 85 | }
 86 | 
 87 | func (s *Stream) NewJob(name string) *Job {
 88 | 	return &Job{
 89 | 		Stream:  s,
 90 | 		JobName: name,
 91 | 		Start:   time.Now(),
 92 | 	}
 93 | }
 94 | 
 95 | func (j *Job) KeyValue(key string, value string) *Job {
 96 | 	if j.KeyValues == nil {
 97 | 		j.KeyValues = make(map[string]string)
 98 | 	}
 99 | 	j.KeyValues[key] = value
100 | 	return j
101 | }
102 | 
103 | func (j *Job) Event(eventName string) {
104 | 	allKvs := j.mergedKeyValues(nil)
105 | 	for _, sink := range j.Stream.Sinks {
106 | 		sink.EmitEvent(j.JobName, eventName, allKvs)
107 | 	}
108 | }
109 | 
110 | func (j *Job) EventKv(eventName string, kvs map[string]string) {
111 | 	allKvs := j.mergedKeyValues(kvs)
112 | 	for _, sink := range j.Stream.Sinks {
113 | 		sink.EmitEvent(j.JobName, eventName, allKvs)
114 | 	}
115 | }
116 | 
117 | func (j *Job) EventErr(eventName string, err error) error {
118 | 	err = wrapErr(err)
119 | 	allKvs := j.mergedKeyValues(nil)
120 | 	for _, sink := range j.Stream.Sinks {
121 | 		sink.EmitEventErr(j.JobName, eventName, err, allKvs)
122 | 	}
123 | 	if err, ok := err.(*UnmutedError); ok {
124 | 		err.Emitted = true
125 | 	}
126 | 	return err
127 | }
128 | 
129 | func (j *Job) EventErrKv(eventName string, err error, kvs map[string]string) error {
130 | 	err = wrapErr(err)
131 | 	allKvs := j.mergedKeyValues(kvs)
132 | 	for _, sink := range j.Stream.Sinks {
133 | 		sink.EmitEventErr(j.JobName, eventName, err, allKvs)
134 | 	}
135 | 	if err, ok := err.(*UnmutedError); ok {
136 | 		err.Emitted = true
137 | 	}
138 | 	return err
139 | }
140 | 
141 | func (j *Job) Timing(eventName string, nanoseconds int64) {
142 | 	allKvs := j.mergedKeyValues(nil)
143 | 	for _, sink := range j.Stream.Sinks {
144 | 		sink.EmitTiming(j.JobName, eventName, nanoseconds, allKvs)
145 | 	}
146 | }
147 | 
148 | func (j *Job) TimingKv(eventName string, nanoseconds int64, kvs map[string]string) {
149 | 	allKvs := j.mergedKeyValues(kvs)
150 | 	for _, sink := range j.Stream.Sinks {
151 | 		sink.EmitTiming(j.JobName, eventName, nanoseconds, allKvs)
152 | 	}
153 | }
154 | 
155 | func (j *Job) Gauge(eventName string, value float64) {
156 | 	allKvs := j.mergedKeyValues(nil)
157 | 	for _, sink := range j.Stream.Sinks {
158 | 		sink.EmitGauge(j.JobName, eventName, value, allKvs)
159 | 	}
160 | }
161 | 
162 | func (j *Job) GaugeKv(eventName string, value float64, kvs map[string]string) {
163 | 	allKvs := j.mergedKeyValues(kvs)
164 | 	for _, sink := range j.Stream.Sinks {
165 | 		sink.EmitGauge(j.JobName, eventName, value, allKvs)
166 | 	}
167 | }
168 | 
169 | func (j *Job) Complete(status CompletionStatus) {
170 | 	allKvs := j.mergedKeyValues(nil)
171 | 	for _, sink := range j.Stream.Sinks {
172 | 		sink.EmitComplete(j.JobName, status, time.Since(j.Start).Nanoseconds(), allKvs)
173 | 	}
174 | }
175 | 
176 | func (j *Job) CompleteKv(status CompletionStatus, kvs map[string]string) {
177 | 	allKvs := j.mergedKeyValues(kvs)
178 | 	for _, sink := range j.Stream.Sinks {
179 | 		sink.EmitComplete(j.JobName, status, time.Since(j.Start).Nanoseconds(), allKvs)
180 | 	}
181 | }
182 | 
183 | func (j *Job) mergedKeyValues(instanceKvs map[string]string) map[string]string {
184 | 	var allKvs map[string]string
185 | 
186 | 	// Count how many maps actually have contents in them. If it's 0 or 1, we won't allocate a new map.
187 | 	// Also, optimistically set allKvs. We might use it or we might overwrite the value with a newly made map.
188 | 	var kvCount = 0
189 | 	if len(j.KeyValues) > 0 {
190 | 		kvCount += 1
191 | 		allKvs = j.KeyValues
192 | 	}
193 | 	if len(j.Stream.KeyValues) > 0 {
194 | 		kvCount += 1
195 | 		allKvs = j.Stream.KeyValues
196 | 	}
197 | 	if len(instanceKvs) > 0 {
198 | 		kvCount += 1
199 | 		allKvs = instanceKvs
200 | 	}
201 | 
202 | 	if kvCount > 1 {
203 | 		allKvs = make(map[string]string)
204 | 		for k, v := range j.Stream.KeyValues {
205 | 			allKvs[k] = v
206 | 		}
207 | 		for k, v := range j.KeyValues {
208 | 			allKvs[k] = v
209 | 		}
210 | 		for k, v := range instanceKvs {
211 | 			allKvs[k] = v
212 | 		}
213 | 	}
214 | 
215 | 	return allKvs
216 | }
217 | 
218 | func (s *Stream) Run(jobName string, f func() error) error {
219 | 	j := s.NewJob(jobName)
220 | 	return j.Run(f)
221 | }
222 | 
223 | func (j *Job) Run(f func() error) (err error) {
224 | 	defer func() {
225 | 		if r := recover(); r != nil {
226 | 			stack := make([]byte, 4096)
227 | 			stack = stack[:runtime.Stack(stack, false)]
228 | 
229 | 			// recovered value from panic() is an interface{}, and it might not be `error`
230 | 			// do not simply type-assert here
231 | 			err = errors.New(fmt.Sprint(r))
232 | 			j.EventErrKv("panic", err, Kvs{"stack": string(stack)})
233 | 			j.Complete(Panic)
234 | 		}
235 | 	}()
236 | 
237 | 	err = f()
238 | 	if err != nil {
239 | 		j.Complete(Error)
240 | 	} else {
241 | 		j.Complete(Success)
242 | 	}
243 | 
244 | 	return
245 | }
246 | 


--------------------------------------------------------------------------------
/writer_sink_test.go:
--------------------------------------------------------------------------------
  1 | package health
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"errors"
  6 | 	"github.com/stretchr/testify/assert"
  7 | 	"regexp"
  8 | 	"testing"
  9 | )
 10 | 
 11 | var basicEventRegexp = regexp.MustCompile("\\[[^\\]]+\\]: job:(.+) event:(.+)")
 12 | var kvsEventRegexp = regexp.MustCompile("\\[[^\\]]+\\]: job:(.+) event:(.+) kvs:\\[(.+)\\]")
 13 | var basicEventErrRegexp = regexp.MustCompile("\\[[^\\]]+\\]: job:(.+) event:(.+) err:(.+)")
 14 | var kvsEventErrRegexp = regexp.MustCompile("\\[[^\\]]+\\]: job:(.+) event:(.+) err:(.+) kvs:\\[(.+)\\]")
 15 | var basicTimingRegexp = regexp.MustCompile("\\[[^\\]]+\\]: job:(.+) event:(.+) time:(.+)")
 16 | var kvsTimingRegexp = regexp.MustCompile("\\[[^\\]]+\\]: job:(.+) event:(.+) time:(.+) kvs:\\[(.+)\\]")
 17 | var basicGaugeRegexp = regexp.MustCompile("\\[[^\\]]+\\]: job:(.+) event:(.+) gauge:(.+)")
 18 | var kvsGaugeRegexp = regexp.MustCompile("\\[[^\\]]+\\]: job:(.+) event:(.+) gauge:(.+) kvs:\\[(.+)\\]")
 19 | var basicCompletionRegexp = regexp.MustCompile("\\[[^\\]]+\\]: job:(.+) status:(.+) time:(.+)")
 20 | var kvsCompletionRegexp = regexp.MustCompile("\\[[^\\]]+\\]: job:(.+) status:(.+) time:(.+) kvs:\\[(.+)\\]")
 21 | 
 22 | var testErr = errors.New("my test error")
 23 | 
 24 | func BenchmarkWriterSinkEmitEvent(b *testing.B) {
 25 | 	var by bytes.Buffer
 26 | 	someKvs := map[string]string{"foo": "bar", "qux": "dog"}
 27 | 	sink := WriterSink{&by}
 28 | 	b.ResetTimer()
 29 | 	for i := 0; i < b.N; i++ {
 30 | 		by.Reset()
 31 | 		sink.EmitEvent("myjob", "myevent", someKvs)
 32 | 	}
 33 | }
 34 | 
 35 | func BenchmarkWriterSinkEmitEventErr(b *testing.B) {
 36 | 	var by bytes.Buffer
 37 | 	someKvs := map[string]string{"foo": "bar", "qux": "dog"}
 38 | 	sink := WriterSink{&by}
 39 | 	b.ResetTimer()
 40 | 	for i := 0; i < b.N; i++ {
 41 | 		by.Reset()
 42 | 		sink.EmitEventErr("myjob", "myevent", testErr, someKvs)
 43 | 	}
 44 | }
 45 | 
 46 | func BenchmarkWriterSinkEmitTiming(b *testing.B) {
 47 | 	var by bytes.Buffer
 48 | 	someKvs := map[string]string{"foo": "bar", "qux": "dog"}
 49 | 	sink := WriterSink{&by}
 50 | 	b.ResetTimer()
 51 | 	for i := 0; i < b.N; i++ {
 52 | 		by.Reset()
 53 | 		sink.EmitTiming("myjob", "myevent", 234203, someKvs)
 54 | 	}
 55 | }
 56 | 
 57 | func BenchmarkWriterSinkEmitComplete(b *testing.B) {
 58 | 	var by bytes.Buffer
 59 | 	someKvs := map[string]string{"foo": "bar", "qux": "dog"}
 60 | 	sink := WriterSink{&by}
 61 | 	b.ResetTimer()
 62 | 	for i := 0; i < b.N; i++ {
 63 | 		by.Reset()
 64 | 		sink.EmitComplete("myjob", Success, 234203, someKvs)
 65 | 	}
 66 | }
 67 | 
 68 | func TestWriterSinkEmitEventBasic(t *testing.T) {
 69 | 	var b bytes.Buffer
 70 | 	sink := WriterSink{&b}
 71 | 	sink.EmitEvent("myjob", "myevent", nil)
 72 | 
 73 | 	str := b.String()
 74 | 
 75 | 	result := basicEventRegexp.FindStringSubmatch(str)
 76 | 	assert.Equal(t, 3, len(result))
 77 | 	assert.Equal(t, "myjob", result[1])
 78 | 	assert.Equal(t, "myevent", result[2])
 79 | }
 80 | 
 81 | func TestWriterSinkEmitEventKvs(t *testing.T) {
 82 | 	var b bytes.Buffer
 83 | 	sink := WriterSink{&b}
 84 | 	sink.EmitEvent("myjob", "myevent", map[string]string{"wat": "ok", "another": "thing"})
 85 | 
 86 | 	str := b.String()
 87 | 
 88 | 	result := kvsEventRegexp.FindStringSubmatch(str)
 89 | 	assert.Equal(t, 4, len(result))
 90 | 	assert.Equal(t, "myjob", result[1])
 91 | 	assert.Equal(t, "myevent", result[2])
 92 | 	assert.Equal(t, "another:thing wat:ok", result[3])
 93 | }
 94 | 
 95 | func TestWriterSinkEmitEventErrBasic(t *testing.T) {
 96 | 	var b bytes.Buffer
 97 | 	sink := WriterSink{&b}
 98 | 	sink.EmitEventErr("myjob", "myevent", testErr, nil)
 99 | 
100 | 	str := b.String()
101 | 
102 | 	result := basicEventErrRegexp.FindStringSubmatch(str)
103 | 	assert.Equal(t, 4, len(result))
104 | 	assert.Equal(t, "myjob", result[1])
105 | 	assert.Equal(t, "myevent", result[2])
106 | 	assert.Equal(t, testErr.Error(), result[3])
107 | }
108 | 
109 | func TestWriterSinkEmitEventErrKvs(t *testing.T) {
110 | 	var b bytes.Buffer
111 | 	sink := WriterSink{&b}
112 | 	sink.EmitEventErr("myjob", "myevent", testErr, map[string]string{"wat": "ok", "another": "thing"})
113 | 
114 | 	str := b.String()
115 | 
116 | 	result := kvsEventErrRegexp.FindStringSubmatch(str)
117 | 	assert.Equal(t, 5, len(result))
118 | 	assert.Equal(t, "myjob", result[1])
119 | 	assert.Equal(t, "myevent", result[2])
120 | 	assert.Equal(t, testErr.Error(), result[3])
121 | 	assert.Equal(t, "another:thing wat:ok", result[4])
122 | }
123 | 
124 | func TestWriterSinkEmitTimingBasic(t *testing.T) {
125 | 	var b bytes.Buffer
126 | 	sink := WriterSink{&b}
127 | 	sink.EmitTiming("myjob", "myevent", 1204000, nil)
128 | 
129 | 	str := b.String()
130 | 
131 | 	result := basicTimingRegexp.FindStringSubmatch(str)
132 | 	assert.Equal(t, 4, len(result))
133 | 	assert.Equal(t, "myjob", result[1])
134 | 	assert.Equal(t, "myevent", result[2])
135 | 	assert.Equal(t, "1204 μs", result[3])
136 | }
137 | 
138 | func TestWriterSinkEmitTimingKvs(t *testing.T) {
139 | 	var b bytes.Buffer
140 | 	sink := WriterSink{&b}
141 | 	sink.EmitTiming("myjob", "myevent", 34567890, map[string]string{"wat": "ok", "another": "thing"})
142 | 
143 | 	str := b.String()
144 | 
145 | 	result := kvsTimingRegexp.FindStringSubmatch(str)
146 | 	assert.Equal(t, 5, len(result))
147 | 	assert.Equal(t, "myjob", result[1])
148 | 	assert.Equal(t, "myevent", result[2])
149 | 	assert.Equal(t, "34 ms", result[3])
150 | 	assert.Equal(t, "another:thing wat:ok", result[4])
151 | }
152 | 
153 | func TestWriterSinkEmitGaugeBasic(t *testing.T) {
154 | 	var b bytes.Buffer
155 | 	sink := WriterSink{&b}
156 | 	sink.EmitGauge("myjob", "myevent", 3.14, nil)
157 | 
158 | 	str := b.String()
159 | 
160 | 	result := basicGaugeRegexp.FindStringSubmatch(str)
161 | 	assert.Equal(t, 4, len(result))
162 | 	assert.Equal(t, "myjob", result[1])
163 | 	assert.Equal(t, "myevent", result[2])
164 | 	assert.Equal(t, "3.14", result[3])
165 | }
166 | 
167 | func TestWriterSinkEmitGaugeKvs(t *testing.T) {
168 | 	var b bytes.Buffer
169 | 	sink := WriterSink{&b}
170 | 	sink.EmitGauge("myjob", "myevent", 0.11, map[string]string{"wat": "ok", "another": "thing"})
171 | 
172 | 	str := b.String()
173 | 
174 | 	result := kvsGaugeRegexp.FindStringSubmatch(str)
175 | 	assert.Equal(t, 5, len(result))
176 | 	assert.Equal(t, "myjob", result[1])
177 | 	assert.Equal(t, "myevent", result[2])
178 | 	assert.Equal(t, "0.11", result[3])
179 | 	assert.Equal(t, "another:thing wat:ok", result[4])
180 | }
181 | 
182 | func TestWriterSinkEmitCompleteBasic(t *testing.T) {
183 | 	for kind, kindStr := range completionStatusToString {
184 | 		var b bytes.Buffer
185 | 		sink := WriterSink{&b}
186 | 		sink.EmitComplete("myjob", kind, 1204000, nil)
187 | 
188 | 		str := b.String()
189 | 
190 | 		result := basicCompletionRegexp.FindStringSubmatch(str)
191 | 		assert.Equal(t, 4, len(result))
192 | 		assert.Equal(t, "myjob", result[1])
193 | 		assert.Equal(t, kindStr, result[2])
194 | 		assert.Equal(t, "1204 μs", result[3])
195 | 	}
196 | }
197 | 
198 | func TestWriterSinkEmitCompleteKvs(t *testing.T) {
199 | 	for kind, kindStr := range completionStatusToString {
200 | 		var b bytes.Buffer
201 | 		sink := WriterSink{&b}
202 | 		sink.EmitComplete("myjob", kind, 34567890, map[string]string{"wat": "ok", "another": "thing"})
203 | 
204 | 		str := b.String()
205 | 
206 | 		result := kvsCompletionRegexp.FindStringSubmatch(str)
207 | 		assert.Equal(t, 5, len(result))
208 | 		assert.Equal(t, "myjob", result[1])
209 | 		assert.Equal(t, kindStr, result[2])
210 | 		assert.Equal(t, "34 ms", result[3])
211 | 		assert.Equal(t, "another:thing wat:ok", result[4])
212 | 	}
213 | }
214 | 


--------------------------------------------------------------------------------
/aggregator_test.go:
--------------------------------------------------------------------------------
  1 | package health
  2 | 
  3 | import (
  4 | 	"errors"
  5 | 	"testing"
  6 | 	"time"
  7 | 
  8 | 	"github.com/stretchr/testify/assert"
  9 | )
 10 | 
 11 | func TestNewAggregator(t *testing.T) {
 12 | 	a := newAggregator(time.Minute, time.Minute*5)
 13 | 	assert.Equal(t, time.Minute, a.intervalDuration)
 14 | 	assert.Equal(t, time.Minute*5, a.retain)
 15 | 	assert.Equal(t, 5, a.maxIntervals)
 16 | 	assert.Equal(t, 0, len(a.intervalAggregations))
 17 | 	assert.NotNil(t, a.intervalAggregations)
 18 | }
 19 | 
 20 | func TestEmitEvent(t *testing.T) {
 21 | 	// Set time, and do a single event
 22 | 	setNowMock("2011-09-09T23:36:13Z")
 23 | 	defer resetNowMock()
 24 | 	a := newAggregator(time.Minute, time.Minute*5)
 25 | 	a.EmitEvent("foo", "bar")
 26 | 
 27 | 	assert.Equal(t, 1, len(a.intervalAggregations))
 28 | 
 29 | 	intAgg := a.intervalAggregations[0]
 30 | 	assert.NotNil(t, intAgg.Events)
 31 | 	assert.EqualValues(t, 1, intAgg.Events["bar"])
 32 | 	assert.EqualValues(t, 1, intAgg.SerialNumber)
 33 | 
 34 | 	assert.NotNil(t, intAgg.Jobs)
 35 | 	jobAgg := intAgg.Jobs["foo"]
 36 | 	assert.NotNil(t, jobAgg)
 37 | 	assert.NotNil(t, jobAgg.Events)
 38 | 	assert.EqualValues(t, 1, jobAgg.Events["bar"])
 39 | 
 40 | 	// Now, without changing the time, we'll do 3 more events:
 41 | 	a.EmitEvent("foo", "bar") // duplicate to above
 42 | 	a.EmitEvent("foo", "baz") // same job, diff event
 43 | 	a.EmitEvent("wat", "bar") // diff job, same event
 44 | 
 45 | 	assert.Equal(t, 1, len(a.intervalAggregations))
 46 | 
 47 | 	intAgg = a.intervalAggregations[0]
 48 | 	assert.EqualValues(t, 3, intAgg.Events["bar"])
 49 | 	assert.EqualValues(t, 4, intAgg.SerialNumber)
 50 | 
 51 | 	jobAgg = intAgg.Jobs["foo"]
 52 | 	assert.EqualValues(t, 2, jobAgg.Events["bar"])
 53 | 	assert.EqualValues(t, 1, jobAgg.Events["baz"])
 54 | 
 55 | 	jobAgg = intAgg.Jobs["wat"]
 56 | 	assert.NotNil(t, jobAgg)
 57 | 	assert.EqualValues(t, 1, jobAgg.Events["bar"])
 58 | 
 59 | 	// Now we'll increment time and do one more event:
 60 | 	setNowMock("2011-09-09T23:37:01Z")
 61 | 	a.EmitEvent("foo", "bar")
 62 | 
 63 | 	assert.Equal(t, 2, len(a.intervalAggregations))
 64 | 
 65 | 	// make sure old values don't change:
 66 | 	intAgg = a.intervalAggregations[0]
 67 | 	assert.EqualValues(t, 3, intAgg.Events["bar"])
 68 | 	assert.EqualValues(t, 4, intAgg.SerialNumber)
 69 | 
 70 | 	intAgg = a.intervalAggregations[1]
 71 | 	assert.EqualValues(t, 1, intAgg.Events["bar"])
 72 | 	assert.EqualValues(t, 1, intAgg.SerialNumber)
 73 | }
 74 | 
 75 | func TestEmitEventErr(t *testing.T) {
 76 | 	setNowMock("2011-09-09T23:36:13Z")
 77 | 	defer resetNowMock()
 78 | 	a := newAggregator(time.Minute, time.Minute*5)
 79 | 	a.EmitEventErr("foo", "bar", errors.New("wat"))
 80 | 
 81 | 	assert.Equal(t, 1, len(a.intervalAggregations))
 82 | 
 83 | 	intAgg := a.intervalAggregations[0]
 84 | 	assert.NotNil(t, intAgg.EventErrs)
 85 | 	ce := intAgg.EventErrs["bar"]
 86 | 	assert.NotNil(t, ce)
 87 | 	assert.EqualValues(t, 1, ce.Count)
 88 | 	assert.Equal(t, []error{errors.New("wat")}, ce.getErrorSamples())
 89 | 	assert.EqualValues(t, 1, intAgg.SerialNumber)
 90 | 
 91 | 	assert.NotNil(t, intAgg.Jobs)
 92 | 	jobAgg := intAgg.Jobs["foo"]
 93 | 	assert.NotNil(t, jobAgg)
 94 | 	assert.NotNil(t, jobAgg.EventErrs)
 95 | 	ce = jobAgg.EventErrs["bar"]
 96 | 	assert.EqualValues(t, 1, ce.Count)
 97 | 	assert.Equal(t, []error{errors.New("wat")}, ce.getErrorSamples())
 98 | 
 99 | 	// One more event with the same error:
100 | 	a.EmitEventErr("foo", "bar", errors.New("wat"))
101 | 
102 | 	intAgg = a.intervalAggregations[0]
103 | 	ce = intAgg.EventErrs["bar"]
104 | 	assert.EqualValues(t, 2, ce.Count)
105 | 	assert.Equal(t, []error{errors.New("wat")}, ce.getErrorSamples()) // doesn't change
106 | 
107 | 	// One more event with diff error:
108 | 	a.EmitEventErr("foo", "bar", errors.New("lol"))
109 | 
110 | 	intAgg = a.intervalAggregations[0]
111 | 	ce = intAgg.EventErrs["bar"]
112 | 	assert.EqualValues(t, 3, ce.Count)
113 | 	assert.Equal(t, []error{errors.New("wat"), errors.New("lol")}, ce.getErrorSamples()) // new error added
114 | }
115 | 
116 | func TestEmitTiming(t *testing.T) {
117 | 	setNowMock("2011-09-09T23:36:13Z")
118 | 	defer resetNowMock()
119 | 	a := newAggregator(time.Minute, time.Minute*5)
120 | 	a.EmitTiming("foo", "bar", 100)
121 | 
122 | 	assert.Equal(t, 1, len(a.intervalAggregations))
123 | 
124 | 	intAgg := a.intervalAggregations[0]
125 | 	assert.NotNil(t, intAgg.Timers)
126 | 	assert.EqualValues(t, 1, intAgg.SerialNumber)
127 | 	tAgg := intAgg.Timers["bar"]
128 | 	assert.NotNil(t, tAgg)
129 | 	assert.EqualValues(t, 1, tAgg.Count)
130 | 	assert.EqualValues(t, 100, tAgg.NanosSum)
131 | 	assert.EqualValues(t, 10000, tAgg.NanosSumSquares)
132 | 	assert.EqualValues(t, 100, tAgg.NanosMin)
133 | 	assert.EqualValues(t, 100, tAgg.NanosMax)
134 | 
135 | 	assert.NotNil(t, intAgg.Jobs)
136 | 	jobAgg := intAgg.Jobs["foo"]
137 | 	assert.NotNil(t, jobAgg)
138 | 	assert.NotNil(t, jobAgg.Timers)
139 | 	tAgg = jobAgg.Timers["bar"]
140 | 	assert.EqualValues(t, 1, tAgg.Count)
141 | 	assert.EqualValues(t, 100, tAgg.NanosSum)
142 | 	assert.EqualValues(t, 10000, tAgg.NanosSumSquares)
143 | 	assert.EqualValues(t, 100, tAgg.NanosMin)
144 | 	assert.EqualValues(t, 100, tAgg.NanosMax)
145 | 
146 | 	// Another timing:
147 | 	a.EmitTiming("baz", "bar", 9) // note: diff job
148 | 
149 | 	intAgg = a.intervalAggregations[0]
150 | 	tAgg = intAgg.Timers["bar"]
151 | 	assert.NotNil(t, tAgg)
152 | 	assert.EqualValues(t, 2, tAgg.Count)
153 | 	assert.EqualValues(t, 109, tAgg.NanosSum)
154 | 	assert.EqualValues(t, 10081, tAgg.NanosSumSquares)
155 | 	assert.EqualValues(t, 9, tAgg.NanosMin)
156 | 	assert.EqualValues(t, 100, tAgg.NanosMax)
157 | 
158 | 	jobAgg = intAgg.Jobs["baz"]
159 | 	tAgg = jobAgg.Timers["bar"]
160 | 	assert.EqualValues(t, 1, tAgg.Count)
161 | 	assert.EqualValues(t, 9, tAgg.NanosSum)
162 | 	assert.EqualValues(t, 81, tAgg.NanosSumSquares)
163 | 	assert.EqualValues(t, 9, tAgg.NanosMin)
164 | 	assert.EqualValues(t, 9, tAgg.NanosMax)
165 | }
166 | 
167 | func TestEmitGauge(t *testing.T) {
168 | 	setNowMock("2011-09-09T23:36:13Z")
169 | 	defer resetNowMock()
170 | 	a := newAggregator(time.Minute, time.Minute*5)
171 | 	a.EmitGauge("foo", "bar", 100)
172 | 
173 | 	assert.Equal(t, 1, len(a.intervalAggregations))
174 | 
175 | 	intAgg := a.intervalAggregations[0]
176 | 	assert.NotNil(t, intAgg.Gauges)
177 | 	assert.EqualValues(t, 1, intAgg.SerialNumber)
178 | 	v, ok := intAgg.Gauges["bar"]
179 | 	assert.True(t, ok)
180 | 	assert.Equal(t, 100.0, v)
181 | 
182 | 	assert.NotNil(t, intAgg.Jobs)
183 | 	jobAgg := intAgg.Jobs["foo"]
184 | 	assert.NotNil(t, jobAgg)
185 | 	assert.NotNil(t, jobAgg.Gauges)
186 | 	v, ok = intAgg.Gauges["bar"]
187 | 	assert.True(t, ok)
188 | 	assert.Equal(t, 100.0, v)
189 | 
190 | 	// Another gauge:
191 | 	a.EmitGauge("baz", "bar", 3.14) // note: diff job
192 | 
193 | 	intAgg = a.intervalAggregations[0]
194 | 	v, ok = intAgg.Gauges["bar"]
195 | 	assert.True(t, ok)
196 | 	assert.Equal(t, 3.14, v)
197 | 
198 | 	jobAgg = intAgg.Jobs["baz"]
199 | 	v, ok = intAgg.Gauges["bar"]
200 | 	assert.True(t, ok)
201 | 	assert.Equal(t, 3.14, v)
202 | }
203 | 
204 | func TestEmitComplete(t *testing.T) {
205 | 	setNowMock("2011-09-09T23:36:13Z")
206 | 	defer resetNowMock()
207 | 	a := newAggregator(time.Minute, time.Minute*5)
208 | 	a.EmitComplete("foo", Success, 100)
209 | 	a.EmitComplete("foo", ValidationError, 5)
210 | 	a.EmitComplete("foo", Panic, 9)
211 | 	a.EmitComplete("foo", Error, 7)
212 | 	a.EmitComplete("foo", Junk, 11)
213 | 
214 | 	assert.Equal(t, 1, len(a.intervalAggregations))
215 | 
216 | 	intAgg := a.intervalAggregations[0]
217 | 	assert.EqualValues(t, 5, intAgg.SerialNumber)
218 | 	jobAgg := intAgg.Jobs["foo"]
219 | 	assert.NotNil(t, jobAgg)
220 | 
221 | 	assert.EqualValues(t, 5, jobAgg.Count)
222 | 	assert.EqualValues(t, 1, jobAgg.CountSuccess)
223 | 	assert.EqualValues(t, 1, jobAgg.CountValidationError)
224 | 	assert.EqualValues(t, 1, jobAgg.CountPanic)
225 | 	assert.EqualValues(t, 1, jobAgg.CountError)
226 | 	assert.EqualValues(t, 1, jobAgg.CountJunk)
227 | 	assert.EqualValues(t, 132, jobAgg.NanosSum)
228 | 	assert.EqualValues(t, 10276, jobAgg.NanosSumSquares)
229 | 	assert.EqualValues(t, 5, jobAgg.NanosMin)
230 | 	assert.EqualValues(t, 100, jobAgg.NanosMax)
231 | }
232 | 
233 | func TestRotation(t *testing.T) {
234 | 	defer resetNowMock()
235 | 	a := newAggregator(time.Minute, time.Minute*5)
236 | 	setNowMock("2011-09-09T23:36:13Z")
237 | 	a.EmitEvent("foo", "bar")
238 | 
239 | 	setNowMock("2011-09-09T23:37:13Z")
240 | 	a.EmitEvent("foo", "bar")
241 | 	a.EmitEvent("foo", "bar")
242 | 
243 | 	setNowMock("2011-09-09T23:38:13Z")
244 | 	a.EmitEvent("foo", "bar")
245 | 	a.EmitEvent("foo", "bar")
246 | 	a.EmitEvent("foo", "bar")
247 | 
248 | 	setNowMock("2011-09-09T23:39:13Z")
249 | 	a.EmitEvent("foo", "bar")
250 | 	a.EmitEvent("foo", "bar")
251 | 	a.EmitEvent("foo", "bar")
252 | 	a.EmitEvent("foo", "bar")
253 | 
254 | 	setNowMock("2011-09-09T23:40:13Z")
255 | 	a.EmitEvent("foo", "bar")
256 | 	a.EmitEvent("foo", "bar")
257 | 	a.EmitEvent("foo", "bar")
258 | 	a.EmitEvent("foo", "bar")
259 | 	a.EmitEvent("foo", "bar")
260 | 
261 | 	assert.Equal(t, 5, len(a.intervalAggregations))
262 | 
263 | 	for i := 0; i < 5; i++ {
264 | 		intAgg := a.intervalAggregations[i]
265 | 		assert.EqualValues(t, i+1, intAgg.Events["bar"])
266 | 	}
267 | 
268 | 	setNowMock("2011-09-09T23:41:13Z")
269 | 	a.EmitEvent("foo", "ok")
270 | 
271 | 	assert.Equal(t, 5, len(a.intervalAggregations))
272 | 
273 | 	for i := 0; i < 4; i++ {
274 | 		intAgg := a.intervalAggregations[i]
275 | 		assert.EqualValues(t, i+2, intAgg.Events["bar"])
276 | 	}
277 | 	intAgg := a.intervalAggregations[4]
278 | 	assert.EqualValues(t, 0, intAgg.Events["bar"])
279 | 	assert.EqualValues(t, 1, intAgg.Events["ok"])
280 | 
281 | }
282 | 


--------------------------------------------------------------------------------
/sinks/librato/sink.go:
--------------------------------------------------------------------------------
  1 | package librato
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"encoding/json"
  6 | 	"fmt"
  7 | 	"net/http"
  8 | 	"os"
  9 | 	"time"
 10 | 
 11 | 	"github.com/gocraft/health"
 12 | )
 13 | 
 14 | type SanitizationFunc func(string) string
 15 | 
 16 | type Sink struct {
 17 | 	SanitizationFunc
 18 | 	Source      string
 19 | 	FlushPeriod time.Duration
 20 | 
 21 | 	cmdChan      chan *emitCmd
 22 | 	doneChan     chan int
 23 | 	doneDoneChan chan int
 24 | 	httpClient   *http.Client
 25 | 
 26 | 	libratoUser   string
 27 | 	libratoApiKey string
 28 | 
 29 | 	// Prefix is something like "metroid"
 30 | 	// Events emitted to StatsD would be metroid.myevent.wat
 31 | 	// Eg, don't include a trailing dot in the prefix.
 32 | 	// It can be "", that's fine.
 33 | 	prefix string
 34 | 
 35 | 	timers   map[string]*gauge
 36 | 	counters map[string]int64
 37 | }
 38 | 
 39 | type gauge struct {
 40 | 	Count      int64           `json:"count"`
 41 | 	Sum        float64         `json:"sum"`
 42 | 	Min        float64         `json:"min"`
 43 | 	Max        float64         `json:"max"`
 44 | 	SumSquares float64         `json:"sum_squares"`
 45 | 	Attributes gaugeAttributes `json:"attributes"`
 46 | }
 47 | 
 48 | type gaugeAttributes struct {
 49 | 	Aggregate         bool   `json:"aggregate"`
 50 | 	DisplayUnitsShort string `json:"display_units_long"`
 51 | }
 52 | 
 53 | type libratoCounterValue struct {
 54 | 	Value      int64           `json:"value"`
 55 | 	Attributes gaugeAttributes `json:"attributes"`
 56 | }
 57 | 
 58 | type libratoMetricsPost struct {
 59 | 	MeasureTime int64                  `json:"measure_time"`
 60 | 	Period      int64                  `json:"period"`
 61 | 	Source      string                 `json:"source,omitempty"`
 62 | 	Gauges      map[string]interface{} `json:"gauges,omitempty"`
 63 | }
 64 | 
 65 | var defaultTimerAttributes gaugeAttributes = gaugeAttributes{true, "ms"}
 66 | var defaultCounterAttributes gaugeAttributes = gaugeAttributes{true, "count"}
 67 | var libratoRequestPath string = "https://metrics-api.librato.com/v1/metrics"
 68 | 
 69 | type cmdKind int
 70 | 
 71 | const (
 72 | 	cmdKindEvent cmdKind = iota
 73 | 	cmdKindEventErr
 74 | 	cmdKindTiming
 75 | 	cmdKindGauge
 76 | 	cmdKindComplete
 77 | )
 78 | 
 79 | type emitCmd struct {
 80 | 	Kind   cmdKind
 81 | 	Job    string
 82 | 	Event  string
 83 | 	Err    error
 84 | 	Nanos  int64
 85 | 	Value  float64
 86 | 	Status health.CompletionStatus
 87 | }
 88 | 
 89 | func New(user, apiKey, prefix string) *Sink {
 90 | 	const buffSize = 4096 // random-ass-guess
 91 | 
 92 | 	s := &Sink{
 93 | 		SanitizationFunc: sanitizeKey,
 94 | 		FlushPeriod:      15 * time.Second,
 95 | 		cmdChan:          make(chan *emitCmd, buffSize),
 96 | 		doneChan:         make(chan int),
 97 | 		doneDoneChan:     make(chan int),
 98 | 		httpClient:       &http.Client{},
 99 | 		libratoUser:      user,
100 | 		libratoApiKey:    apiKey,
101 | 		prefix:           prefix,
102 | 		timers:           make(map[string]*gauge),
103 | 		counters:         make(map[string]int64),
104 | 	}
105 | 
106 | 	s.Source, _ = os.Hostname()
107 | 
108 | 	go s.start()
109 | 
110 | 	return s
111 | }
112 | 
113 | func (s *Sink) Stop() {
114 | 	s.doneChan <- 1
115 | 	<-s.doneDoneChan
116 | }
117 | 
118 | func (s *Sink) EmitEvent(job string, event string, kvs map[string]string) {
119 | 	s.cmdChan <- &emitCmd{Kind: cmdKindEvent, Job: job, Event: event}
120 | }
121 | 
122 | func (s *Sink) EmitEventErr(job string, event string, inputErr error, kvs map[string]string) {
123 | 	s.cmdChan <- &emitCmd{Kind: cmdKindEventErr, Job: job, Event: event, Err: inputErr}
124 | }
125 | 
126 | func (s *Sink) EmitTiming(job string, event string, nanos int64, kvs map[string]string) {
127 | 	s.cmdChan <- &emitCmd{Kind: cmdKindTiming, Job: job, Event: event, Nanos: nanos}
128 | }
129 | 
130 | func (s *Sink) EmitGauge(job string, event string, value float64, kvs map[string]string) {
131 | 	s.cmdChan <- &emitCmd{Kind: cmdKindGauge, Job: job, Event: event, Value: value}
132 | }
133 | 
134 | func (s *Sink) EmitComplete(job string, status health.CompletionStatus, nanos int64, kvs map[string]string) {
135 | 	s.cmdChan <- &emitCmd{Kind: cmdKindComplete, Job: job, Status: status, Nanos: nanos}
136 | }
137 | 
138 | func (s *Sink) start() {
139 | 	cmdChan := s.cmdChan
140 | 	doneChan := s.doneChan
141 | 	ticker := time.Tick(s.FlushPeriod)
142 | 
143 | LIBRATO_LOOP:
144 | 	for {
145 | 		select {
146 | 		case <-doneChan:
147 | 			s.doneDoneChan <- 1
148 | 			break LIBRATO_LOOP
149 | 		case cmd := <-cmdChan:
150 | 			if cmd.Kind == cmdKindEvent {
151 | 				s.processEvent(cmd.Job, cmd.Event)
152 | 			} else if cmd.Kind == cmdKindEventErr {
153 | 				s.processEventErr(cmd.Job, cmd.Event, cmd.Err)
154 | 			} else if cmd.Kind == cmdKindTiming {
155 | 				s.processTiming(cmd.Job, cmd.Event, cmd.Nanos)
156 | 			} else if cmd.Kind == cmdKindGauge {
157 | 				s.processGauge(cmd.Job, cmd.Event, cmd.Value)
158 | 			} else if cmd.Kind == cmdKindComplete {
159 | 				s.processComplete(cmd.Job, cmd.Status, cmd.Nanos)
160 | 			}
161 | 		case <-ticker:
162 | 			s.purge()
163 | 		}
164 | 	}
165 | }
166 | 
167 | func (s *Sink) processEvent(job string, event string) {
168 | 	key1, key2 := s.eventKeys(job, event, "count")
169 | 	s.inc(key1)
170 | 	s.inc(key2)
171 | }
172 | 
173 | func (s *Sink) processEventErr(job string, event string, err error) {
174 | 	key1, key2 := s.eventKeys(job, event, "error.count")
175 | 	s.inc(key1)
176 | 	s.inc(key2)
177 | }
178 | 
179 | func (s *Sink) processTiming(job string, event string, nanos int64) {
180 | 	key1, key2 := s.eventKeys(job, event, "timing")
181 | 	ms := float64(nanos) / float64(time.Millisecond)
182 | 	s.measure(key1, ms)
183 | 	s.measure(key2, ms)
184 | }
185 | 
186 | func (s *Sink) processGauge(job string, event string, value float64) {
187 | 	key1, key2 := s.eventKeys(job, event, "gauge")
188 | 	s.measure(key1, value)
189 | 	s.measure(key2, value)
190 | }
191 | 
192 | func (s *Sink) processComplete(job string, status health.CompletionStatus, nanos int64) {
193 | 	var b bytes.Buffer
194 | 
195 | 	if s.prefix != "" {
196 | 		b.WriteString(s.prefix)
197 | 		b.WriteRune('.')
198 | 	}
199 | 	b.WriteString(s.SanitizationFunc(job))
200 | 	b.WriteRune('.')
201 | 	b.WriteString(status.String())
202 | 	b.WriteString(".timing")
203 | 
204 | 	ms := float64(nanos) / float64(time.Millisecond)
205 | 	s.measure(b.String(), ms)
206 | }
207 | 
208 | func (s *Sink) eventKeys(job, event, suffix string) (string, string) {
209 | 	var key1 bytes.Buffer // event
210 | 	var key2 bytes.Buffer // job.event
211 | 
212 | 	if s.prefix != "" {
213 | 		key1.WriteString(s.prefix)
214 | 		key1.WriteRune('.')
215 | 		key2.WriteString(s.prefix)
216 | 		key2.WriteRune('.')
217 | 	}
218 | 
219 | 	key1.WriteString(s.SanitizationFunc(event))
220 | 	key2.WriteString(s.SanitizationFunc(job))
221 | 	key2.WriteRune('.')
222 | 	key2.WriteString(s.SanitizationFunc(event))
223 | 
224 | 	if suffix != "" {
225 | 		key1.WriteRune('.')
226 | 		key1.WriteString(suffix)
227 | 		key2.WriteRune('.')
228 | 		key2.WriteString(suffix)
229 | 	}
230 | 
231 | 	return key1.String(), key2.String()
232 | }
233 | 
234 | func (s *Sink) inc(key string) {
235 | 	s.counters[key] += 1
236 | }
237 | 
238 | func (s *Sink) measure(key string, value float64) {
239 | 	g, ok := s.timers[key]
240 | 	if !ok {
241 | 		g = &gauge{Min: value, Max: value, Sum: value, Count: 1, SumSquares: value * value, Attributes: defaultTimerAttributes}
242 | 		s.timers[key] = g
243 | 	} else {
244 | 		g.Count++
245 | 		g.Sum += value
246 | 		g.SumSquares += value * value
247 | 
248 | 		if value < g.Min {
249 | 			g.Min = value
250 | 		}
251 | 		if value > g.Max {
252 | 			g.Max = value
253 | 		}
254 | 	}
255 | }
256 | 
257 | func (s *Sink) purge() {
258 | 	if err := s.send(); err != nil {
259 | 		fmt.Println("Error sending to librato: ", err)
260 | 	}
261 | 	s.timers = make(map[string]*gauge)
262 | 	s.counters = make(map[string]int64)
263 | }
264 | 
265 | func (s *Sink) send() error {
266 | 
267 | 	// no data? don't send anything to librato
268 | 	if len(s.timers) == 0 && len(s.counters) == 0 {
269 | 		return nil
270 | 	}
271 | 
272 | 	body := libratoMetricsPost{
273 | 		MeasureTime: time.Now().Unix(),
274 | 		Period:      int64(s.FlushPeriod / time.Second),
275 | 		Source:      s.Source,
276 | 	}
277 | 
278 | 	gauges := make(map[string]interface{})
279 | 
280 | 	for k, v := range s.timers {
281 | 		gauges[k] = v
282 | 	}
283 | 
284 | 	for k, v := range s.counters {
285 | 		gauges[k] = libratoCounterValue{v, defaultCounterAttributes}
286 | 	}
287 | 	body.Gauges = gauges
288 | 
289 | 	b, err := json.Marshal(body)
290 | 	if nil != err {
291 | 		return err
292 | 	}
293 | 
294 | 	fmt.Println(string(b))
295 | 
296 | 	req, err := http.NewRequest(
297 | 		"POST",
298 | 		libratoRequestPath,
299 | 		bytes.NewBuffer(b),
300 | 	)
301 | 	if nil != err {
302 | 		return err
303 | 	}
304 | 	req.Header.Add("Content-Type", "application/json")
305 | 	req.SetBasicAuth(s.libratoUser, s.libratoApiKey)
306 | 	_, err = s.httpClient.Do(req)
307 | 
308 | 	//fmt.Println(resp.Status)
309 | 
310 | 	return err
311 | }
312 | 
313 | // valid librato charactors: A-Za-z0-9.:-_
314 | func shouldSanitize(r rune) bool {
315 | 	switch {
316 | 	case 'A' <= r && r <= 'Z':
317 | 		fallthrough
318 | 	case 'a' <= r && r <= 'z':
319 | 		fallthrough
320 | 	case '0' <= r && r <= '9':
321 | 		fallthrough
322 | 	case r == '.':
323 | 		fallthrough
324 | 	case r == ':':
325 | 		fallthrough
326 | 	case r == '-':
327 | 		fallthrough
328 | 	case r == '_':
329 | 		return false
330 | 	}
331 | 	return true
332 | }
333 | 
334 | func sanitizeKey(k string) string {
335 | 	for _, r := range k {
336 | 		if shouldSanitize(r) {
337 | 			goto SANITIZE
338 | 		}
339 | 	}
340 | 	return k
341 | SANITIZE:
342 | 	var key bytes.Buffer
343 | 	for _, r := range k {
344 | 		if shouldSanitize(r) {
345 | 			key.WriteRune('_')
346 | 		} else {
347 | 			key.WriteRune(r)
348 | 		}
349 | 	}
350 | 	return key.String()
351 | }
352 | 


--------------------------------------------------------------------------------
/statsd_sink.go:
--------------------------------------------------------------------------------
  1 | package health
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"net"
  6 | 	"strconv"
  7 | 	"time"
  8 | )
  9 | 
 10 | type StatsDSinkSanitizationFunc func(*bytes.Buffer, string)
 11 | 
 12 | type eventKey struct {
 13 | 	job    string
 14 | 	event  string
 15 | 	suffix string
 16 | }
 17 | 
 18 | type prefixBuffer struct {
 19 | 	*bytes.Buffer
 20 | 	prefixLen int
 21 | }
 22 | 
 23 | type StatsDSinkOptions struct {
 24 | 	// Prefix is something like "metroid"
 25 | 	// Events emitted to StatsD would be metroid.myevent.wat
 26 | 	// Eg, don't include a trailing dot in the prefix.
 27 | 	// It can be "", that's fine.
 28 | 	Prefix string
 29 | 
 30 | 	// SanitizationFunc sanitizes jobs and events before sending them to statsd
 31 | 	SanitizationFunc StatsDSinkSanitizationFunc
 32 | 
 33 | 	// SkipNestedEvents will skip {events,timers,gauges} from sending the job.event version
 34 | 	// and will only send the event version.
 35 | 	SkipNestedEvents bool
 36 | 
 37 | 	// SkipTopLevelEvents will skip {events,timers,gauges} from sending the event version
 38 | 	// and will only send the job.event version.
 39 | 	SkipTopLevelEvents bool
 40 | }
 41 | 
 42 | var defaultStatsDOptions = StatsDSinkOptions{SanitizationFunc: sanitizeKey}
 43 | 
 44 | type StatsDSink struct {
 45 | 	options StatsDSinkOptions
 46 | 
 47 | 	cmdChan       chan statsdEmitCmd
 48 | 	drainDoneChan chan struct{}
 49 | 	stopDoneChan  chan struct{}
 50 | 
 51 | 	flushPeriod time.Duration
 52 | 
 53 | 	udpBuf    bytes.Buffer
 54 | 	timingBuf []byte
 55 | 
 56 | 	udpConn *net.UDPConn
 57 | 	udpAddr *net.UDPAddr
 58 | 
 59 | 	// map of {job,event,suffix} to a re-usable buffer prefixed with the key.
 60 | 	// Since each timing/gauge has a unique component (the time), we'll truncate to the prefix, write the timing,
 61 | 	// and write the statsD suffix (eg, "|ms\n"). Then copy that to the UDP buffer.
 62 | 	prefixBuffers map[eventKey]prefixBuffer
 63 | }
 64 | 
 65 | type statsdCmdKind int
 66 | 
 67 | const (
 68 | 	statsdCmdKindEvent statsdCmdKind = iota
 69 | 	statsdCmdKindEventErr
 70 | 	statsdCmdKindTiming
 71 | 	statsdCmdKindGauge
 72 | 	statsdCmdKindComplete
 73 | 	statsdCmdKindFlush
 74 | 	statsdCmdKindDrain
 75 | 	statsdCmdKindStop
 76 | )
 77 | 
 78 | type statsdEmitCmd struct {
 79 | 	Kind   statsdCmdKind
 80 | 	Job    string
 81 | 	Event  string
 82 | 	Nanos  int64
 83 | 	Value  float64
 84 | 	Status CompletionStatus
 85 | }
 86 | 
 87 | const cmdChanBuffSize = 8192 // random-ass-guess
 88 | const maxUdpBytes = 1440     // 1500(Ethernet MTU) - 60(Max UDP header size
 89 | 
 90 | func NewStatsDSink(addr string, options *StatsDSinkOptions) (*StatsDSink, error) {
 91 | 	c, err := net.ListenPacket("udp", ":0")
 92 | 	if err != nil {
 93 | 		return nil, err
 94 | 	}
 95 | 
 96 | 	ra, err := net.ResolveUDPAddr("udp", addr)
 97 | 	if err != nil {
 98 | 		return nil, err
 99 | 	}
100 | 
101 | 	s := &StatsDSink{
102 | 		udpConn:       c.(*net.UDPConn),
103 | 		udpAddr:       ra,
104 | 		cmdChan:       make(chan statsdEmitCmd, cmdChanBuffSize),
105 | 		drainDoneChan: make(chan struct{}),
106 | 		stopDoneChan:  make(chan struct{}),
107 | 		flushPeriod:   100 * time.Millisecond,
108 | 		prefixBuffers: make(map[eventKey]prefixBuffer),
109 | 	}
110 | 
111 | 	if options != nil {
112 | 		s.options = *options
113 | 		if s.options.SanitizationFunc == nil {
114 | 			s.options.SanitizationFunc = sanitizeKey
115 | 		}
116 | 	} else {
117 | 		s.options = defaultStatsDOptions
118 | 	}
119 | 
120 | 	go s.loop()
121 | 
122 | 	return s, nil
123 | }
124 | 
125 | func (s *StatsDSink) Stop() {
126 | 	s.cmdChan <- statsdEmitCmd{Kind: statsdCmdKindStop}
127 | 	<-s.stopDoneChan
128 | }
129 | 
130 | func (s *StatsDSink) Drain() {
131 | 	s.cmdChan <- statsdEmitCmd{Kind: statsdCmdKindDrain}
132 | 	<-s.drainDoneChan
133 | }
134 | 
135 | func (s *StatsDSink) EmitEvent(job string, event string, kvs map[string]string) {
136 | 	s.cmdChan <- statsdEmitCmd{Kind: statsdCmdKindEvent, Job: job, Event: event}
137 | }
138 | 
139 | func (s *StatsDSink) EmitEventErr(job string, event string, inputErr error, kvs map[string]string) {
140 | 	s.cmdChan <- statsdEmitCmd{Kind: statsdCmdKindEventErr, Job: job, Event: event}
141 | }
142 | 
143 | func (s *StatsDSink) EmitTiming(job string, event string, nanos int64, kvs map[string]string) {
144 | 	s.cmdChan <- statsdEmitCmd{Kind: statsdCmdKindTiming, Job: job, Event: event, Nanos: nanos}
145 | }
146 | 
147 | func (s *StatsDSink) EmitGauge(job string, event string, value float64, kvs map[string]string) {
148 | 	s.cmdChan <- statsdEmitCmd{Kind: statsdCmdKindGauge, Job: job, Event: event, Value: value}
149 | }
150 | 
151 | func (s *StatsDSink) EmitComplete(job string, status CompletionStatus, nanos int64, kvs map[string]string) {
152 | 	s.cmdChan <- statsdEmitCmd{Kind: statsdCmdKindComplete, Job: job, Status: status, Nanos: nanos}
153 | }
154 | 
155 | func (s *StatsDSink) loop() {
156 | 	cmdChan := s.cmdChan
157 | 
158 | 	ticker := time.NewTicker(s.flushPeriod)
159 | 	go func() {
160 | 		for _ = range ticker.C {
161 | 			cmdChan <- statsdEmitCmd{Kind: statsdCmdKindFlush}
162 | 		}
163 | 	}()
164 | 
165 | LOOP:
166 | 	for cmd := range cmdChan {
167 | 		switch cmd.Kind {
168 | 		case statsdCmdKindDrain:
169 | 		DRAIN_LOOP:
170 | 			for {
171 | 				select {
172 | 				case cmd := <-cmdChan:
173 | 					s.processCmd(&cmd)
174 | 				default:
175 | 					s.flush()
176 | 					s.drainDoneChan <- struct{}{}
177 | 					break DRAIN_LOOP
178 | 				}
179 | 			}
180 | 		case statsdCmdKindStop:
181 | 			s.stopDoneChan <- struct{}{}
182 | 			break LOOP
183 | 		case statsdCmdKindFlush:
184 | 			s.flush()
185 | 		default:
186 | 			s.processCmd(&cmd)
187 | 		}
188 | 	}
189 | 
190 | 	ticker.Stop()
191 | }
192 | 
193 | func (s *StatsDSink) processCmd(cmd *statsdEmitCmd) {
194 | 	switch cmd.Kind {
195 | 	case statsdCmdKindEvent:
196 | 		s.processEvent(cmd.Job, cmd.Event)
197 | 	case statsdCmdKindEventErr:
198 | 		s.processEventErr(cmd.Job, cmd.Event)
199 | 	case statsdCmdKindTiming:
200 | 		s.processTiming(cmd.Job, cmd.Event, cmd.Nanos)
201 | 	case statsdCmdKindGauge:
202 | 		s.processGauge(cmd.Job, cmd.Event, cmd.Value)
203 | 	case statsdCmdKindComplete:
204 | 		s.processComplete(cmd.Job, cmd.Status, cmd.Nanos)
205 | 	}
206 | }
207 | 
208 | func (s *StatsDSink) processEvent(job string, event string) {
209 | 	if !s.options.SkipTopLevelEvents {
210 | 		pb := s.getPrefixBuffer("", event, "")
211 | 		pb.WriteString("1|c\n")
212 | 		s.writeStatsDMetric(pb.Bytes())
213 | 	}
214 | 
215 | 	if !s.options.SkipNestedEvents {
216 | 		pb := s.getPrefixBuffer(job, event, "")
217 | 		pb.WriteString("1|c\n")
218 | 		s.writeStatsDMetric(pb.Bytes())
219 | 	}
220 | }
221 | 
222 | func (s *StatsDSink) processEventErr(job string, event string) {
223 | 	if !s.options.SkipTopLevelEvents {
224 | 		pb := s.getPrefixBuffer("", event, "error")
225 | 		pb.WriteString("1|c\n")
226 | 		s.writeStatsDMetric(pb.Bytes())
227 | 	}
228 | 
229 | 	if !s.options.SkipNestedEvents {
230 | 		pb := s.getPrefixBuffer(job, event, "error")
231 | 		pb.WriteString("1|c\n")
232 | 		s.writeStatsDMetric(pb.Bytes())
233 | 	}
234 | }
235 | 
236 | func (s *StatsDSink) processTiming(job string, event string, nanos int64) {
237 | 	s.writeNanosToTimingBuf(nanos)
238 | 
239 | 	if !s.options.SkipTopLevelEvents {
240 | 		pb := s.getPrefixBuffer("", event, "")
241 | 		pb.Write(s.timingBuf)
242 | 		pb.WriteString("|ms\n")
243 | 		s.writeStatsDMetric(pb.Bytes())
244 | 	}
245 | 
246 | 	if !s.options.SkipNestedEvents {
247 | 		pb := s.getPrefixBuffer(job, event, "")
248 | 		pb.Write(s.timingBuf)
249 | 		pb.WriteString("|ms\n")
250 | 		s.writeStatsDMetric(pb.Bytes())
251 | 	}
252 | }
253 | 
254 | func (s *StatsDSink) processGauge(job string, event string, value float64) {
255 | 	s.timingBuf = s.timingBuf[0:0]
256 | 	prec := 2
257 | 	if (value < 0.1) && (value > -0.1) {
258 | 		prec = -1
259 | 	}
260 | 	s.timingBuf = strconv.AppendFloat(s.timingBuf, value, 'f', prec, 64)
261 | 
262 | 	if !s.options.SkipTopLevelEvents {
263 | 		pb := s.getPrefixBuffer("", event, "")
264 | 		pb.Write(s.timingBuf)
265 | 		pb.WriteString("|g\n")
266 | 		s.writeStatsDMetric(pb.Bytes())
267 | 	}
268 | 
269 | 	if !s.options.SkipNestedEvents {
270 | 		pb := s.getPrefixBuffer(job, event, "")
271 | 		pb.Write(s.timingBuf)
272 | 		pb.WriteString("|g\n")
273 | 		s.writeStatsDMetric(pb.Bytes())
274 | 	}
275 | }
276 | 
277 | func (s *StatsDSink) processComplete(job string, status CompletionStatus, nanos int64) {
278 | 	s.writeNanosToTimingBuf(nanos)
279 | 	statusString := status.String()
280 | 
281 | 	pb := s.getPrefixBuffer(job, "", statusString)
282 | 	pb.Write(s.timingBuf)
283 | 	pb.WriteString("|ms\n")
284 | 	s.writeStatsDMetric(pb.Bytes())
285 | }
286 | 
287 | func (s *StatsDSink) flush() {
288 | 	if s.udpBuf.Len() > 0 {
289 | 		s.udpConn.WriteToUDP(s.udpBuf.Bytes(), s.udpAddr)
290 | 		s.udpBuf.Truncate(0)
291 | 	}
292 | }
293 | 
294 | // assumes b is a well-formed statsd metric like "job.event:1|c\n" (including newline)
295 | func (s *StatsDSink) writeStatsDMetric(b []byte) {
296 | 	lenb := len(b)
297 | 
298 | 	if lenb == 0 {
299 | 		return
300 | 	}
301 | 
302 | 	// single metric exceeds limit. sad day.
303 | 	if lenb > maxUdpBytes {
304 | 		return
305 | 	}
306 | 
307 | 	lenUdpBuf := s.udpBuf.Len()
308 | 
309 | 	if (lenb + lenUdpBuf) > maxUdpBytes {
310 | 		s.udpConn.WriteToUDP(s.udpBuf.Bytes(), s.udpAddr)
311 | 		s.udpBuf.Truncate(0)
312 | 	}
313 | 
314 | 	s.udpBuf.Write(b)
315 | }
316 | 
317 | func (s *StatsDSink) getPrefixBuffer(job, event, suffix string) prefixBuffer {
318 | 	key := eventKey{job, event, suffix}
319 | 
320 | 	b, ok := s.prefixBuffers[key]
321 | 	if !ok {
322 | 		b.Buffer = &bytes.Buffer{}
323 | 		s.writeSanitizedKeys(b.Buffer, s.options.Prefix, job, event, suffix)
324 | 		b.WriteByte(':')
325 | 		b.prefixLen = b.Len()
326 | 
327 | 		// 123456789.99|ms\n 16 bytes. timing value represents 11 days max
328 | 		b.Grow(16)
329 | 		s.prefixBuffers[key] = b
330 | 	} else {
331 | 		b.Truncate(b.prefixLen)
332 | 	}
333 | 
334 | 	return b
335 | }
336 | 
337 | func (s *StatsDSink) writeSanitizedKeys(b *bytes.Buffer, keys ...string) {
338 | 	needDot := false
339 | 	for _, k := range keys {
340 | 		if k != "" {
341 | 			if needDot {
342 | 				b.WriteByte('.')
343 | 			}
344 | 			s.options.SanitizationFunc(b, k)
345 | 			needDot = true
346 | 		}
347 | 	}
348 | }
349 | 
350 | func (s *StatsDSink) writeNanosToTimingBuf(nanos int64) {
351 | 	s.timingBuf = s.timingBuf[0:0]
352 | 	if nanos >= 10e6 {
353 | 		// More than 10 milliseconds. We'll just print as an integer
354 | 		s.timingBuf = strconv.AppendInt(s.timingBuf, nanos/1e6, 10)
355 | 	} else {
356 | 		s.timingBuf = strconv.AppendFloat(s.timingBuf, float64(nanos)/float64(time.Millisecond), 'f', 2, 64)
357 | 	}
358 | }
359 | 
360 | func sanitizeKey(b *bytes.Buffer, s string) {
361 | 	b.Grow(len(s) + 1)
362 | 	for i := 0; i < len(s); i++ {
363 | 		si := s[i]
364 | 		if ('A' <= si && si <= 'Z') || ('a' <= si && si <= 'z') || ('0' <= si && s[i] <= '9') || si == '_' || si == '.' {
365 | 			b.WriteByte(si)
366 | 		} else {
367 | 			b.WriteByte('$')
368 | 		}
369 | 	}
370 | }
371 | 


--------------------------------------------------------------------------------
/healthd/api.go:
--------------------------------------------------------------------------------
  1 | package healthd
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"fmt"
  6 | 	"github.com/braintree/manners"
  7 | 	"github.com/gocraft/health"
  8 | 	"github.com/gocraft/web"
  9 | 	"math"
 10 | 	"net/http"
 11 | 	"sort"
 12 | 	"strconv"
 13 | 	"time"
 14 | )
 15 | 
 16 | // Job represents a health.JobAggregation, but designed for JSON-ization without all the nested counters/timers
 17 | type Job struct {
 18 | 	Name                 string `json:"name"`
 19 | 	Count                int64  `json:"count"`
 20 | 	CountSuccess         int64  `json:"count_success"`
 21 | 	CountValidationError int64  `json:"count_validation_error"`
 22 | 	CountPanic           int64  `json:"count_panic"`
 23 | 	CountError           int64  `json:"count_error"`
 24 | 	CountJunk            int64  `json:"count_junk"`
 25 | 
 26 | 	NanosSum        int64   `json:"nanos_sum"`
 27 | 	NanosSumSquares float64 `json:"nanos_sum_squares"`
 28 | 	NanosMin        int64   `json:"nanos_min"`
 29 | 	NanosMax        int64   `json:"nanos_max"`
 30 | 	NanosAvg        float64 `json:"nanos_avg"`
 31 | 	NanosStdDev     float64 `json:"nanos_std_dev"`
 32 | }
 33 | 
 34 | type apiResponse struct {
 35 | 	InstanceId       string        `json:"instance_id"`
 36 | 	IntervalDuration time.Duration `json:"interval_duration"`
 37 | }
 38 | 
 39 | type ApiResponseJobs struct {
 40 | 	apiResponse
 41 | 	Jobs []*Job `json:"jobs"`
 42 | }
 43 | 
 44 | type ApiResponseAggregations struct {
 45 | 	apiResponse
 46 | 	Aggregations []*health.IntervalAggregation `json:"aggregations"`
 47 | }
 48 | 
 49 | type ApiResponseAggregationsOverall struct {
 50 | 	apiResponse
 51 | 	Overall *health.IntervalAggregation `json:"overall"`
 52 | }
 53 | 
 54 | type ApiResponseHosts struct {
 55 | 	apiResponse
 56 | 	Hosts []*HostStatus `json:"hosts"`
 57 | }
 58 | 
 59 | type apiContext struct {
 60 | 	hd *HealthD
 61 | 	*health.Job
 62 | }
 63 | 
 64 | func (hd *HealthD) apiRouter() http.Handler {
 65 | 	router := web.New(apiContext{})
 66 | 	router.NotFound(func(rw web.ResponseWriter, req *web.Request) {
 67 | 		renderNotFound(rw)
 68 | 	})
 69 | 
 70 | 	healthdRouter := router.Subrouter(apiContext{}, "/healthd")
 71 | 
 72 | 	healthdRouter.Middleware(func(c *apiContext, rw web.ResponseWriter, req *web.Request, next web.NextMiddlewareFunc) {
 73 | 		c.hd = hd
 74 | 		next(rw, req)
 75 | 	})
 76 | 
 77 | 	healthdRouter.Middleware((*apiContext).SetContentType).
 78 | 		Middleware((*apiContext).HealthMiddleware).
 79 | 		Get("/aggregations", (*apiContext).Aggregations).
 80 | 		Get("/aggregations/overall", (*apiContext).Overall).
 81 | 		Get("/jobs", (*apiContext).Jobs).
 82 | 		Get("/hosts", (*apiContext).Hosts)
 83 | 
 84 | 	return router
 85 | }
 86 | 
 87 | func (hd *HealthD) startHttpServer(hostPort string, done chan bool) {
 88 | 	server := manners.NewWithServer(&http.Server{
 89 | 		Addr:    hostPort,
 90 | 		Handler: hd.apiRouter(),
 91 | 	})
 92 | 	hd.stopHTTP = server.Close
 93 | 	done <- true
 94 | 	server.ListenAndServe()
 95 | }
 96 | 
 97 | func (c *apiContext) SetContentType(rw web.ResponseWriter, req *web.Request, next web.NextMiddlewareFunc) {
 98 | 	rw.Header().Set("Content-Type", "application/json; charset=utf-8")
 99 | 	next(rw, req)
100 | }
101 | 
102 | func (c *apiContext) HealthMiddleware(rw web.ResponseWriter, r *web.Request, next web.NextMiddlewareFunc) {
103 | 	c.Job = c.hd.stream.NewJob(r.RoutePath())
104 | 
105 | 	path := r.URL.Path
106 | 	c.EventKv("starting_request", health.Kvs{"path": path})
107 | 
108 | 	next(rw, r)
109 | 
110 | 	code := rw.StatusCode()
111 | 	kvs := health.Kvs{
112 | 		"code": fmt.Sprint(code),
113 | 		"path": path,
114 | 	}
115 | 
116 | 	// Map HTTP status code to category.
117 | 	var status health.CompletionStatus
118 | 	// if c.Panic {
119 | 	// 	status = health.Panic
120 | 	// } else
121 | 	if code < 400 {
122 | 		status = health.Success
123 | 	} else if code == 422 {
124 | 		status = health.ValidationError
125 | 	} else if code < 500 {
126 | 		status = health.Junk // 404, 401
127 | 	} else {
128 | 		status = health.Error
129 | 	}
130 | 	c.CompleteKv(status, kvs)
131 | }
132 | 
133 | func (c *apiContext) Aggregations(rw web.ResponseWriter, r *web.Request) {
134 | 	aggregations := c.hd.getAggregationSequence()
135 | 	resp := &ApiResponseAggregations{
136 | 		apiResponse:  getApiResponse(c.hd.intervalDuration),
137 | 		Aggregations: aggregations,
138 | 	}
139 | 	renderJson(rw, resp)
140 | }
141 | 
142 | func (c *apiContext) Overall(rw web.ResponseWriter, r *web.Request) {
143 | 	aggregations := c.hd.getAggregationSequence()
144 | 	overall := combineAggregations(aggregations)
145 | 	resp := &ApiResponseAggregationsOverall{
146 | 		apiResponse: getApiResponse(c.hd.intervalDuration),
147 | 		Overall:     overall,
148 | 	}
149 | 	renderJson(rw, resp)
150 | }
151 | 
152 | func (c *apiContext) Jobs(rw web.ResponseWriter, r *web.Request) {
153 | 	sort := getSort(r)
154 | 	limit := getLimit(r)
155 | 	aggregations := c.hd.getAggregationSequence()
156 | 	overall := combineAggregations(aggregations)
157 | 	jobs := filterJobs(overall, sort, limit)
158 | 	resp := &ApiResponseJobs{
159 | 		apiResponse: getApiResponse(c.hd.intervalDuration),
160 | 		Jobs:        jobs,
161 | 	}
162 | 	renderJson(rw, resp)
163 | }
164 | 
165 | func (c *apiContext) Hosts(rw web.ResponseWriter, r *web.Request) {
166 | 	hosts := c.hd.getHosts()
167 | 	sort.Sort(HostStatusByHostPort(hosts))
168 | 	resp := &ApiResponseHosts{
169 | 		apiResponse: getApiResponse(c.hd.intervalDuration),
170 | 		Hosts:       hosts,
171 | 	}
172 | 	renderJson(rw, resp)
173 | }
174 | 
175 | func getApiResponse(duration time.Duration) apiResponse {
176 | 	return apiResponse{
177 | 		InstanceId:       health.Identifier,
178 | 		IntervalDuration: duration,
179 | 	}
180 | }
181 | 
182 | func renderJson(rw http.ResponseWriter, data interface{}) {
183 | 	jsonData, err := json.MarshalIndent(data, "", "\t")
184 | 	if err != nil {
185 | 		renderError(rw, err)
186 | 		return
187 | 	}
188 | 	fmt.Fprintf(rw, string(jsonData))
189 | }
190 | 
191 | func renderNotFound(rw http.ResponseWriter) {
192 | 	rw.WriteHeader(404)
193 | 	fmt.Fprintf(rw, `{"error": "not_found"}`)
194 | }
195 | 
196 | func renderError(rw http.ResponseWriter, err error) {
197 | 	rw.WriteHeader(500)
198 | 	fmt.Fprintf(rw, `{"error": "%s"}`, err.Error())
199 | }
200 | 
201 | func combineAggregations(aggregations []*health.IntervalAggregation) *health.IntervalAggregation {
202 | 	if len(aggregations) == 0 {
203 | 		return nil
204 | 	}
205 | 
206 | 	overallAgg := health.NewIntervalAggregation(aggregations[0].IntervalStart)
207 | 	for _, ia := range aggregations {
208 | 		overallAgg.Merge(ia)
209 | 	}
210 | 	return overallAgg
211 | }
212 | 
213 | func getSort(r *web.Request) string {
214 | 	return r.URL.Query().Get("sort")
215 | }
216 | 
217 | func getLimit(r *web.Request) int {
218 | 	limit := r.URL.Query().Get("limit")
219 | 	if limit == "" {
220 | 		return 0
221 | 	}
222 | 
223 | 	n, err := strconv.ParseInt(limit, 10, 0)
224 | 	if err != nil {
225 | 		return 0
226 | 	}
227 | 	return int(n)
228 | }
229 | 
230 | // By is the type of a "less" function that defines the ordering of its Planet arguments.
231 | type By func(j1, j2 *Job) bool
232 | 
233 | // Sort is a method on the function type, By, that sorts the argument slice according to the function.
234 | func (by By) Sort(jobs []*Job) {
235 | 	js := &jobSorter{
236 | 		jobs: jobs,
237 | 		by:   by, // The Sort method's receiver is the function (closure) that defines the sort order.
238 | 	}
239 | 	sort.Sort(js)
240 | }
241 | 
242 | // planetSorter joins a By function and a slice of Planets to be sorted.
243 | type jobSorter struct {
244 | 	jobs []*Job
245 | 	by   By
246 | }
247 | 
248 | // Len is part of sort.Interface.
249 | func (s *jobSorter) Len() int {
250 | 	return len(s.jobs)
251 | }
252 | 
253 | // Swap is part of sort.Interface.
254 | func (s *jobSorter) Swap(i, j int) {
255 | 	s.jobs[i], s.jobs[j] = s.jobs[j], s.jobs[i]
256 | }
257 | 
258 | // Less is part of sort.Interface. It is implemented by calling the "by" closure in the sorter.
259 | func (s *jobSorter) Less(i, j int) bool {
260 | 	return s.by(s.jobs[i], s.jobs[j])
261 | }
262 | 
263 | var jobSorters = map[string]By{
264 | 	"name": func(j1, j2 *Job) bool {
265 | 		return j1.Name < j2.Name
266 | 	},
267 | 	"count": func(j1, j2 *Job) bool {
268 | 		return j1.Count > j2.Count
269 | 	},
270 | 	"count_success": func(j1, j2 *Job) bool {
271 | 		return j1.CountSuccess > j2.CountSuccess
272 | 	},
273 | 	"count_validation_error": func(j1, j2 *Job) bool {
274 | 		return j1.CountValidationError > j2.CountValidationError
275 | 	},
276 | 	"count_panic": func(j1, j2 *Job) bool {
277 | 		return j1.CountPanic > j2.CountPanic
278 | 	},
279 | 	"count_error": func(j1, j2 *Job) bool {
280 | 		return j1.CountError > j2.CountError
281 | 	},
282 | 	"count_junk": func(j1, j2 *Job) bool {
283 | 		return j1.CountJunk > j2.CountJunk
284 | 	},
285 | 	"total_time": func(j1, j2 *Job) bool {
286 | 		return j1.NanosSum > j2.NanosSum
287 | 	},
288 | 	"avg": func(j1, j2 *Job) bool {
289 | 		return j1.NanosAvg > j2.NanosAvg
290 | 	},
291 | 	"min": func(j1, j2 *Job) bool {
292 | 		return j1.NanosMin > j2.NanosMin
293 | 	},
294 | 	"max": func(j1, j2 *Job) bool {
295 | 		return j1.NanosMax > j2.NanosMax
296 | 	},
297 | 	"stddev": func(j1, j2 *Job) bool {
298 | 		return j1.NanosStdDev > j2.NanosStdDev
299 | 	},
300 | }
301 | 
302 | func sortJobs(jobs []*Job, sort string) {
303 | 	if by, ok := jobSorters[sort]; ok {
304 | 		by.Sort(jobs)
305 | 	}
306 | }
307 | 
308 | func filterJobs(overall *health.IntervalAggregation, sort string, limit int) []*Job {
309 | 	if overall == nil {
310 | 		return nil
311 | 	}
312 | 	jobs := make([]*Job, 0, len(overall.Jobs))
313 | 
314 | 	for k, j := range overall.Jobs {
315 | 		var avg, stddev float64
316 | 		if j.Count == 0 {
317 | 			avg = 0
318 | 			stddev = 0
319 | 		} else {
320 | 			avg = float64(j.NanosSum) / float64(j.Count)
321 | 			if j.Count == 1 {
322 | 				stddev = 0
323 | 			} else {
324 | 				num := (float64(j.Count) * j.NanosSumSquares) - math.Pow(float64(j.NanosSum), 2)
325 | 				div := float64(j.Count * (j.Count - 1))
326 | 				stddev = math.Sqrt(num / div)
327 | 			}
328 | 		}
329 | 		job := &Job{
330 | 			Name:                 k,
331 | 			Count:                j.Count,
332 | 			CountSuccess:         j.CountSuccess,
333 | 			CountValidationError: j.CountValidationError,
334 | 			CountPanic:           j.CountPanic,
335 | 			CountError:           j.CountError,
336 | 			CountJunk:            j.CountJunk,
337 | 			NanosSum:             j.NanosSum,
338 | 			NanosSumSquares:      j.NanosSumSquares,
339 | 			NanosMin:             j.NanosMin,
340 | 			NanosMax:             j.NanosMax,
341 | 			NanosAvg:             avg,
342 | 			NanosStdDev:          stddev,
343 | 		}
344 | 		jobs = append(jobs, job)
345 | 	}
346 | 
347 | 	sortJobs(jobs, sort)
348 | 
349 | 	if limit > 0 {
350 | 		max := len(jobs)
351 | 		if limit > max {
352 | 			limit = max
353 | 		}
354 | 		jobs = jobs[0:limit]
355 | 	}
356 | 
357 | 	return jobs
358 | }
359 | 
360 | type HostStatusByHostPort []*HostStatus
361 | 
362 | func (a HostStatusByHostPort) Len() int           { return len(a) }
363 | func (a HostStatusByHostPort) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
364 | func (a HostStatusByHostPort) Less(i, j int) bool { return a[i].HostPort < a[j].HostPort }
365 | 


--------------------------------------------------------------------------------
/healthd/healthd.go:
--------------------------------------------------------------------------------
  1 | package healthd
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"github.com/gocraft/health"
  6 | 	"sort"
  7 | 	"sync"
  8 | 	"sync/atomic"
  9 | 	"time"
 10 | )
 11 | 
 12 | type HealthD struct {
 13 | 	stream *health.Stream
 14 | 
 15 | 	// How long is each aggregation interval. Eg, 1 minute
 16 | 	intervalDuration time.Duration
 17 | 
 18 | 	// Retain controls how many metrics interval we keep. Eg, 5 minutes
 19 | 	retain time.Duration
 20 | 
 21 | 	// maxIntervals is the maximum length of intervals.
 22 | 	// It is retain / interval.
 23 | 	maxIntervals int
 24 | 
 25 | 	// These guys are the real aggregated deal
 26 | 	intervalAggregations []*health.IntervalAggregation
 27 | 
 28 | 	// let's keep the last 5 minutes worth of data from each host
 29 | 	hostAggregations map[hostAggregationKey]*health.IntervalAggregation
 30 | 
 31 | 	// intervalsNeedingRecalculation is a set of intervals that need to be recalculated. It is cleared when they are recalculated.
 32 | 	intervalsNeedingRecalculation map[time.Time]struct{}
 33 | 
 34 | 	// map from HostPort to status
 35 | 	hostStatus map[string]*HostStatus
 36 | 
 37 | 	intervalsChanChan chan chan []*health.IntervalAggregation
 38 | 	hostsChanChan     chan chan []*HostStatus
 39 | 
 40 | 	stopFlag           int64
 41 | 	stopAggregator     chan bool
 42 | 	stopStopAggregator chan bool
 43 | 	stopHTTP           func() bool
 44 | }
 45 | 
 46 | type HostStatus struct {
 47 | 	HostPort string `json:"host_port"`
 48 | 
 49 | 	LastCheckTime        time.Time     `json:"last_check_time"`
 50 | 	LastInstanceId       string        `json:"last_instance_id"`
 51 | 	LastIntervalDuration time.Duration `json:"last_interval_duration"`
 52 | 	LastErr              string        `json:"last_err"`
 53 | 	LastNanos            int64         `json:"last_nanos"`
 54 | 	LastCode             int           `json:"last_code"` // http status code of last response
 55 | 
 56 | 	FirstSuccessfulResponse time.Time `json:"first_successful_response"`
 57 | 	LastSuccessfulResponse  time.Time `json:"last_successful_response"`
 58 | }
 59 | 
 60 | type hostAggregationKey struct {
 61 | 	Time       time.Time
 62 | 	InstanceId string
 63 | 	HostPort   string
 64 | }
 65 | 
 66 | func StartNewHealthD(monitoredHostPorts []string, serverHostPort string, stream *health.Stream) *HealthD {
 67 | 	hd := &HealthD{}
 68 | 	hd.stream = stream
 69 | 	hd.intervalsChanChan = make(chan chan []*health.IntervalAggregation, 16)
 70 | 	hd.hostsChanChan = make(chan chan []*HostStatus, 16)
 71 | 	hd.hostStatus = make(map[string]*HostStatus)
 72 | 	hd.hostAggregations = make(map[hostAggregationKey]*health.IntervalAggregation)
 73 | 	hd.intervalsNeedingRecalculation = make(map[time.Time]struct{})
 74 | 	hd.retain = time.Hour * 2 // In the future this should be configurable
 75 | 	hd.intervalDuration = 0   // We don't know this yet. Will be configured from polled hosts.
 76 | 	hd.maxIntervals = 0       // We don't know this yet. See above.
 77 | 	hd.stopAggregator = make(chan bool)
 78 | 	hd.stopStopAggregator = make(chan bool)
 79 | 
 80 | 	for _, hp := range monitoredHostPorts {
 81 | 		hd.hostStatus[hp] = &HostStatus{
 82 | 			HostPort: hp,
 83 | 		}
 84 | 	}
 85 | 
 86 | 	go hd.pollAndAggregate()
 87 | 
 88 | 	httpStarted := make(chan bool)
 89 | 	go hd.startHttpServer(serverHostPort, httpStarted)
 90 | 	<-httpStarted
 91 | 
 92 | 	return hd
 93 | }
 94 | 
 95 | func (hd *HealthD) Stop() {
 96 | 	atomic.StoreInt64(&hd.stopFlag, 1)
 97 | 	hd.stopAggregator <- true
 98 | 	<-hd.stopStopAggregator
 99 | 	hd.stopHTTP()
100 | }
101 | 
102 | // shouldStop returns true if we've been flagged to stop
103 | func (hd *HealthD) shouldStop() bool {
104 | 	v := atomic.LoadInt64(&hd.stopFlag)
105 | 	return v == 1
106 | }
107 | 
108 | func (hd *HealthD) pollAndAggregate() {
109 | 	ticker := time.Tick(10 * time.Second)
110 | 
111 | 	responses := make(chan *pollResponse, 64)
112 | 	recalcIntervals := make(chan struct{})
113 | 	recalcIntervalsRequest := make(chan struct{}, 64)
114 | 	intervalsChanChan := hd.intervalsChanChan
115 | 	hostsChanChan := hd.hostsChanChan
116 | 
117 | 	go debouncer(recalcIntervals, recalcIntervalsRequest, time.Second*2, time.Millisecond*300)
118 | 
119 | 	// Immediately poll for servers on healthd startup
120 | 	go hd.poll(responses)
121 | 
122 | AGGREGATE_LOOP:
123 | 	for {
124 | 		// Usual flow:
125 | 		// 1. ticker ticks. Poll each host.
126 | 		// 2. Get responses in. Trigger debouncer
127 | 		// 3. If we get all responses quickly, we'll get a nil, and then recalc.
128 | 		// 4. The debouncer will fire in 2 seconds and do a partial calc or full recalc.
129 | 		// 5. Repeat 2-4 until all resonses are in and everything settles down.
130 | 		// At any time, we could get:
131 | 		//  - A requset for metrics. We'll get a channel and send response back on that channel.
132 | 		//  - A requset to shut down.
133 | 		select {
134 | 		case <-ticker:
135 | 			go hd.poll(responses)
136 | 			hd.purge()
137 | 		case resp := <-responses:
138 | 			if resp == nil {
139 | 				// nil is a sentinel value that is sent when all hosts have reported in.
140 | 				hd.recalculateIntervals()
141 | 			} else {
142 | 				hd.consumePollResponse(resp)
143 | 				recalcIntervalsRequest <- struct{}{}
144 | 			}
145 | 		case <-recalcIntervals:
146 | 			hd.recalculateIntervals()
147 | 		case intervalsChan := <-intervalsChanChan:
148 | 			intervalsChan <- hd.memorySafeIntervals()
149 | 		case hostsChan := <-hostsChanChan:
150 | 			hostsChan <- hd.memorySafeHosts()
151 | 		case <-hd.stopAggregator:
152 | 			hd.stopStopAggregator <- true
153 | 			break AGGREGATE_LOOP
154 | 		}
155 | 	}
156 | }
157 | 
158 | // poll is meant to be alled in a new goroutine.
159 | // It will poll each managed host in a new goroutine.
160 | // When everything has finished, it will send nil to responses to signal that we have all data.
161 | func (hd *HealthD) poll(responses chan *pollResponse) {
162 | 	var wg sync.WaitGroup
163 | 	for _, hs := range hd.hostStatus {
164 | 		wg.Add(1)
165 | 		go func(hs *HostStatus) {
166 | 			defer wg.Done()
167 | 			poll(hd.stream, hs.HostPort, responses)
168 | 		}(hs)
169 | 	}
170 | 	wg.Wait()
171 | 	responses <- nil
172 | }
173 | 
174 | func (hd *HealthD) getAggregationSequence() []*health.IntervalAggregation {
175 | 	if hd.shouldStop() {
176 | 		return nil
177 | 	}
178 | 	intervalsChan := make(chan []*health.IntervalAggregation)
179 | 	hd.intervalsChanChan <- intervalsChan
180 | 	return <-intervalsChan
181 | }
182 | 
183 | func (hd *HealthD) getHosts() []*HostStatus {
184 | 	if hd.shouldStop() {
185 | 		return nil
186 | 	}
187 | 	hostsChan := make(chan []*HostStatus)
188 | 	hd.hostsChanChan <- hostsChan
189 | 	return <-hostsChan
190 | }
191 | 
192 | func (agg *HealthD) memorySafeIntervals() []*health.IntervalAggregation {
193 | 	ret := make([]*health.IntervalAggregation, 0, len(agg.intervalAggregations))
194 | 
195 | 	for _, intAgg := range agg.intervalAggregations {
196 | 		ret = append(ret, intAgg.Clone())
197 | 	}
198 | 
199 | 	return ret
200 | }
201 | 
202 | func (hd *HealthD) memorySafeHosts() []*HostStatus {
203 | 	ret := make([]*HostStatus, 0, len(hd.hostStatus))
204 | 
205 | 	for _, hs := range hd.hostStatus {
206 | 		var host = *hs // copy mem
207 | 		ret = append(ret, &host)
208 | 	}
209 | 
210 | 	return ret
211 | }
212 | 
213 | func (hd *HealthD) consumePollResponse(resp *pollResponse) {
214 | 	if hs, ok := hd.hostStatus[resp.HostPort]; ok {
215 | 		hs.LastCheckTime = resp.Timestamp
216 | 		hs.LastNanos = resp.Nanos
217 | 		hs.LastInstanceId = resp.InstanceId
218 | 		hs.LastIntervalDuration = resp.IntervalDuration
219 | 		hs.LastCode = resp.Code
220 | 		if resp.Err == nil {
221 | 			hs.LastErr = ""
222 | 		} else {
223 | 			hs.LastErr = resp.Err.Error()
224 | 		}
225 | 
226 | 		if resp.Code == 200 && resp.Err == nil {
227 | 			if hs.FirstSuccessfulResponse.IsZero() {
228 | 				hs.FirstSuccessfulResponse = now()
229 | 			}
230 | 			hs.LastSuccessfulResponse = now()
231 | 		}
232 | 	} else {
233 | 		// BUG
234 | 		// TODO: log that we got an unknown hostPort.
235 | 	}
236 | 
237 | 	// Add resp to hostAggregations
238 | 	if resp.Code == 200 && resp.Err == nil {
239 | 		if hd.intervalDuration == 0 {
240 | 			hd.intervalDuration = resp.IntervalDuration // TODO: validate this
241 | 			hd.maxIntervals = int(hd.retain / hd.intervalDuration)
242 | 		} else if hd.intervalDuration != resp.IntervalDuration {
243 | 			fmt.Println("interval duration mismatch: agg.intervalDuration=", hd.intervalDuration, " but resp.IntervalDuration=", resp.IntervalDuration)
244 | 			return
245 | 		}
246 | 
247 | 		for _, intAgg := range resp.IntervalAggregations {
248 | 			key := hostAggregationKey{
249 | 				Time:       intAgg.IntervalStart,
250 | 				InstanceId: resp.InstanceId,
251 | 				HostPort:   resp.HostPort,
252 | 			}
253 | 
254 | 			existingIntAgg, ok := hd.hostAggregations[key]
255 | 			if ok && existingIntAgg.SerialNumber == intAgg.SerialNumber {
256 | 				// ignore; we already have this data
257 | 			} else {
258 | 				hd.hostAggregations[key] = intAgg
259 | 				hd.intervalsNeedingRecalculation[intAgg.IntervalStart] = struct{}{}
260 | 			}
261 | 		}
262 | 	}
263 | }
264 | 
265 | // purge purges old hostAggregations older than 5 intervals
266 | func (agg *HealthD) purge() {
267 | 	var threshold = agg.intervalDuration * 5 // NOTE: this is arbitrary.
268 | 	for k, _ := range agg.hostAggregations {
269 | 		if time.Since(k.Time) > threshold {
270 | 			delete(agg.hostAggregations, k)
271 | 		}
272 | 	}
273 | 
274 | 	n := len(agg.intervalAggregations)
275 | 	if n > agg.maxIntervals {
276 | 		agg.intervalAggregations = agg.intervalAggregations[(n - agg.maxIntervals):]
277 | 	}
278 | }
279 | 
280 | func (hd *HealthD) recalculateIntervals() {
281 | 	job := hd.stream.NewJob("recalculate")
282 | 
283 | 	for k, _ := range hd.intervalsNeedingRecalculation {
284 | 		intAggsAtTime := []*health.IntervalAggregation{}
285 | 
286 | 		for key, intAgg := range hd.hostAggregations {
287 | 			if key.Time == k {
288 | 				intAggsAtTime = append(intAggsAtTime, intAgg)
289 | 			}
290 | 		}
291 | 
292 | 		overallAgg := health.NewIntervalAggregation(k)
293 | 		for _, ia := range intAggsAtTime {
294 | 			overallAgg.Merge(ia)
295 | 		}
296 | 		hd.setAggregation(overallAgg)
297 | 	}
298 | 
299 | 	// Reset everything:
300 | 	hd.intervalsNeedingRecalculation = make(map[time.Time]struct{})
301 | 
302 | 	job.Complete(health.Success)
303 | }
304 | 
305 | type ByInterval []*health.IntervalAggregation
306 | 
307 | func (a ByInterval) Len() int           { return len(a) }
308 | func (a ByInterval) Swap(i, j int)      { a[i], a[j] = a[j], a[i] }
309 | func (a ByInterval) Less(i, j int) bool { return a[i].IntervalStart.Before(a[j].IntervalStart) }
310 | 
311 | func (agg *HealthD) setAggregation(intAgg *health.IntervalAggregation) {
312 | 	// If we already have the intAgg, replace it.
313 | 	for i, existingAgg := range agg.intervalAggregations {
314 | 		if existingAgg.IntervalStart == intAgg.IntervalStart {
315 | 			agg.intervalAggregations[i] = intAgg
316 | 			return
317 | 		}
318 | 	}
319 | 
320 | 	// Otherwise, just append it and sort to get ordering right.
321 | 	agg.intervalAggregations = append(agg.intervalAggregations, intAgg)
322 | 	sort.Sort(ByInterval(agg.intervalAggregations))
323 | 
324 | 	// If we have too many aggregations, truncate
325 | 	n := len(agg.intervalAggregations)
326 | 	if n > agg.maxIntervals {
327 | 		agg.intervalAggregations = agg.intervalAggregations[(n - agg.maxIntervals):]
328 | 	}
329 | }
330 | 


--------------------------------------------------------------------------------
/statsd_sink_test.go:
--------------------------------------------------------------------------------
  1 | package health
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"github.com/stretchr/testify/assert"
  6 | 	"net"
  7 | 	"runtime"
  8 | 	"strings"
  9 | 	"sync"
 10 | 	"testing"
 11 | 	"time"
 12 | )
 13 | 
 14 | var testAddr = "127.0.0.1:7890"
 15 | 
 16 | func callerInfo() string {
 17 | 	_, file, line, ok := runtime.Caller(2)
 18 | 	if !ok {
 19 | 		return ""
 20 | 	}
 21 | 	parts := strings.Split(file, "/")
 22 | 	file = parts[len(parts)-1]
 23 | 	return fmt.Sprintf("%s:%d", file, line)
 24 | }
 25 | 
 26 | func listenFor(t *testing.T, msgs []string, f func()) {
 27 | 	c, err := net.ListenPacket("udp", testAddr)
 28 | 	defer c.Close()
 29 | 	assert.NoError(t, err)
 30 | 
 31 | 	f()
 32 | 
 33 | 	buf := make([]byte, 10000)
 34 | 	for _, msg := range msgs {
 35 | 		err = c.SetReadDeadline(time.Now().Add(1 * time.Millisecond))
 36 | 		assert.NoError(t, err)
 37 | 		nbytes, _, err := c.ReadFrom(buf)
 38 | 		assert.NoError(t, err)
 39 | 		if err == nil {
 40 | 			gotMsg := string(buf[0:nbytes])
 41 | 			if gotMsg != msg {
 42 | 				t.Errorf("Expected UPD packet %s but got %s\n", msg, gotMsg)
 43 | 			}
 44 | 		}
 45 | 	}
 46 | }
 47 | 
 48 | func TestStatsDSinkPeriodicPurge(t *testing.T) {
 49 | 	sink, err := NewStatsDSink(testAddr, &StatsDSinkOptions{Prefix: "metroid"})
 50 | 	assert.NoError(t, err)
 51 | 
 52 | 	// Stop the sink, set a smaller flush period, and start it agian
 53 | 	sink.Stop()
 54 | 	sink.flushPeriod = 1 * time.Millisecond
 55 | 	go sink.loop()
 56 | 	defer sink.Stop()
 57 | 
 58 | 	listenFor(t, []string{"metroid.my.event:1|c\nmetroid.my.job.my.event:1|c\n"}, func() {
 59 | 		sink.EmitEvent("my.job", "my.event", nil)
 60 | 		time.Sleep(10 * time.Millisecond)
 61 | 	})
 62 | }
 63 | 
 64 | func TestStatsDSinkPacketLimit(t *testing.T) {
 65 | 	sink, err := NewStatsDSink(testAddr, &StatsDSinkOptions{Prefix: "metroid", SkipNestedEvents: true})
 66 | 	assert.NoError(t, err)
 67 | 
 68 | 	// s is 101 bytes
 69 | 	s := "metroid." + strings.Repeat("a", 88) + ":1|c\n"
 70 | 
 71 | 	// expect 1 packet that is 14*101=1414 bytes, and the next one to be 101 bytes
 72 | 	listenFor(t, []string{strings.Repeat(s, 14), s}, func() {
 73 | 		for i := 0; i < 15; i++ {
 74 | 			sink.EmitEvent("my.job", strings.Repeat("a", 88), nil)
 75 | 		}
 76 | 
 77 | 		sink.Drain()
 78 | 	})
 79 | }
 80 | 
 81 | func TestStatsDSinkEmitEventPrefix(t *testing.T) {
 82 | 	sink, err := NewStatsDSink(testAddr, &StatsDSinkOptions{Prefix: "metroid"})
 83 | 	defer sink.Stop()
 84 | 	assert.NoError(t, err)
 85 | 	listenFor(t, []string{"metroid.my.event:1|c\nmetroid.my.job.my.event:1|c\n"}, func() {
 86 | 		sink.EmitEvent("my.job", "my.event", nil)
 87 | 		sink.Drain()
 88 | 	})
 89 | }
 90 | 
 91 | func TestStatsDSinkEmitEventShouldSanitize(t *testing.T) {
 92 | 	sink, err := NewStatsDSink(testAddr, &StatsDSinkOptions{Prefix: "metroid"})
 93 | 	defer sink.Stop()
 94 | 	assert.NoError(t, err)
 95 | 	listenFor(t, []string{"metroid.my$event:1|c\nmetroid.my$job.my$event:1|c\n"}, func() {
 96 | 		sink.EmitEvent("my|job", "my:event", nil)
 97 | 		sink.Drain()
 98 | 	})
 99 | }
100 | 
101 | func TestStatsDSinkEmitEventNoPrefix(t *testing.T) {
102 | 	sink, err := NewStatsDSink(testAddr, nil)
103 | 	defer sink.Stop()
104 | 	assert.NoError(t, err)
105 | 	listenFor(t, []string{"my.event:1|c\nmy.job.my.event:1|c\n"}, func() {
106 | 		sink.EmitEvent("my.job", "my.event", nil)
107 | 		sink.Drain()
108 | 	})
109 | }
110 | 
111 | func TestStatsDSinkEmitEventSkipNested(t *testing.T) {
112 | 	sink, err := NewStatsDSink(testAddr, &StatsDSinkOptions{SkipNestedEvents: true})
113 | 	defer sink.Stop()
114 | 	assert.NoError(t, err)
115 | 	listenFor(t, []string{"my.event:1|c\n"}, func() {
116 | 		sink.EmitEvent("my.job", "my.event", nil)
117 | 		sink.Drain()
118 | 	})
119 | }
120 | 
121 | func TestStatsDSinkEmitEventSkipTopLevel(t *testing.T) {
122 | 	sink, err := NewStatsDSink(testAddr, &StatsDSinkOptions{SkipTopLevelEvents: true})
123 | 	defer sink.Stop()
124 | 	assert.NoError(t, err)
125 | 	listenFor(t, []string{"my.job.my.event:1|c\n"}, func() {
126 | 		sink.EmitEvent("my.job", "my.event", nil)
127 | 		sink.Drain()
128 | 	})
129 | }
130 | 
131 | func TestStatsDSinkEmitEventErrPrefix(t *testing.T) {
132 | 	sink, err := NewStatsDSink(testAddr, &StatsDSinkOptions{Prefix: "metroid"})
133 | 	defer sink.Stop()
134 | 	assert.NoError(t, err)
135 | 	listenFor(t, []string{"metroid.my.event.error:1|c\nmetroid.my.job.my.event.error:1|c\n"}, func() {
136 | 		sink.EmitEventErr("my.job", "my.event", testErr, nil)
137 | 		sink.Drain()
138 | 	})
139 | }
140 | 
141 | func TestStatsDSinkEmitEventErrNoPrefix(t *testing.T) {
142 | 	sink, err := NewStatsDSink(testAddr, nil)
143 | 	defer sink.Stop()
144 | 	assert.NoError(t, err)
145 | 	listenFor(t, []string{"my.event.error:1|c\nmy.job.my.event.error:1|c\n"}, func() {
146 | 		sink.EmitEventErr("my.job", "my.event", testErr, nil)
147 | 		sink.Drain()
148 | 	})
149 | }
150 | 
151 | func TestStatsDSinkEmitEventErrSkipNested(t *testing.T) {
152 | 	sink, err := NewStatsDSink(testAddr, &StatsDSinkOptions{SkipNestedEvents: true})
153 | 	defer sink.Stop()
154 | 	assert.NoError(t, err)
155 | 	listenFor(t, []string{"my.event.error:1|c\n"}, func() {
156 | 		sink.EmitEventErr("my.job", "my.event", testErr, nil)
157 | 		sink.Drain()
158 | 	})
159 | }
160 | 
161 | func TestStatsDSinkEmitEventErrSkipTopLevel(t *testing.T) {
162 | 	sink, err := NewStatsDSink(testAddr, &StatsDSinkOptions{SkipTopLevelEvents: true})
163 | 	defer sink.Stop()
164 | 	assert.NoError(t, err)
165 | 	listenFor(t, []string{"my.job.my.event.error:1|c\n"}, func() {
166 | 		sink.EmitEventErr("my.job", "my.event", testErr, nil)
167 | 		sink.Drain()
168 | 	})
169 | }
170 | 
171 | func TestStatsDSinkEmitTimingPrefix(t *testing.T) {
172 | 	sink, err := NewStatsDSink(testAddr, &StatsDSinkOptions{Prefix: "metroid"})
173 | 	defer sink.Stop()
174 | 	assert.NoError(t, err)
175 | 	listenFor(t, []string{"metroid.my.event:123|ms\nmetroid.my.job.my.event:123|ms\n"}, func() {
176 | 		sink.EmitTiming("my.job", "my.event", 123456789, nil)
177 | 		sink.Drain()
178 | 	})
179 | }
180 | 
181 | func TestStatsDSinkEmitTimingNoPrefix(t *testing.T) {
182 | 	sink, err := NewStatsDSink(testAddr, nil)
183 | 	defer sink.Stop()
184 | 	assert.NoError(t, err)
185 | 	listenFor(t, []string{"my.event:123|ms\nmy.job.my.event:123|ms\n"}, func() {
186 | 		sink.EmitTiming("my.job", "my.event", 123456789, nil)
187 | 		sink.Drain()
188 | 	})
189 | }
190 | 
191 | func TestStatsDSinkEmitTimingSkipNested(t *testing.T) {
192 | 	sink, err := NewStatsDSink(testAddr, &StatsDSinkOptions{SkipNestedEvents: true})
193 | 	defer sink.Stop()
194 | 	assert.NoError(t, err)
195 | 	listenFor(t, []string{"my.event:123|ms\n"}, func() {
196 | 		sink.EmitTiming("my.job", "my.event", 123456789, nil)
197 | 		sink.Drain()
198 | 	})
199 | }
200 | 
201 | func TestStatsDSinkEmitTimingSkipTopLevel(t *testing.T) {
202 | 	sink, err := NewStatsDSink(testAddr, &StatsDSinkOptions{SkipTopLevelEvents: true})
203 | 	defer sink.Stop()
204 | 	assert.NoError(t, err)
205 | 	listenFor(t, []string{"my.job.my.event:123|ms\n"}, func() {
206 | 		sink.EmitTiming("my.job", "my.event", 123456789, nil)
207 | 		sink.Drain()
208 | 	})
209 | }
210 | 
211 | func TestStatsDSinkEmitTimingShort(t *testing.T) {
212 | 	sink, err := NewStatsDSink(testAddr, nil)
213 | 	defer sink.Stop()
214 | 	assert.NoError(t, err)
215 | 	listenFor(t, []string{"my.event:1.23|ms\nmy.job.my.event:1.23|ms\n"}, func() {
216 | 		sink.EmitTiming("my.job", "my.event", 1234567, nil)
217 | 		sink.Drain()
218 | 	})
219 | }
220 | 
221 | func TestStatsDSinkEmitGaugePrefix(t *testing.T) {
222 | 	sink, err := NewStatsDSink(testAddr, &StatsDSinkOptions{Prefix: "metroid"})
223 | 	defer sink.Stop()
224 | 	assert.NoError(t, err)
225 | 	listenFor(t, []string{"metroid.my.event:3.14|g\nmetroid.my.job.my.event:3.14|g\n"}, func() {
226 | 		sink.EmitGauge("my.job", "my.event", 3.14, nil)
227 | 		sink.Drain()
228 | 	})
229 | }
230 | 
231 | func TestStatsDSinkEmitGaugeSmall(t *testing.T) {
232 | 	sink, err := NewStatsDSink(testAddr, &StatsDSinkOptions{Prefix: "metroid", SkipNestedEvents: true})
233 | 	defer sink.Stop()
234 | 	assert.NoError(t, err)
235 | 	listenFor(t, []string{"metroid.my.event:0.14|g\nmetroid.my.event:0.0401|g\nmetroid.my.event:-0.0001|g\n"}, func() {
236 | 		sink.EmitGauge("my.job", "my.event", 0.1401, nil)
237 | 		sink.EmitGauge("my.job", "my.event", 0.0401, nil)
238 | 		sink.EmitGauge("my.job", "my.event", -0.0001, nil)
239 | 		sink.Drain()
240 | 	})
241 | }
242 | 
243 | func TestStatsDSinkEmitGaugeNoPrefix(t *testing.T) {
244 | 	sink, err := NewStatsDSink(testAddr, nil)
245 | 	defer sink.Stop()
246 | 	assert.NoError(t, err)
247 | 	listenFor(t, []string{"my.event:3.00|g\nmy.job.my.event:3.00|g\n"}, func() {
248 | 		sink.EmitGauge("my.job", "my.event", 3, nil)
249 | 		sink.Drain()
250 | 	})
251 | }
252 | 
253 | func TestStatsDSinkEmitGaugeSkipNested(t *testing.T) {
254 | 	sink, err := NewStatsDSink(testAddr, &StatsDSinkOptions{SkipNestedEvents: true})
255 | 	defer sink.Stop()
256 | 	assert.NoError(t, err)
257 | 	listenFor(t, []string{"my.event:3.00|g\n"}, func() {
258 | 		sink.EmitGauge("my.job", "my.event", 3, nil)
259 | 		sink.Drain()
260 | 	})
261 | }
262 | 
263 | func TestStatsDSinkEmitGaugeSkipTopLevel(t *testing.T) {
264 | 	sink, err := NewStatsDSink(testAddr, &StatsDSinkOptions{SkipTopLevelEvents: true})
265 | 	defer sink.Stop()
266 | 	assert.NoError(t, err)
267 | 	listenFor(t, []string{"my.job.my.event:3.00|g\n"}, func() {
268 | 		sink.EmitGauge("my.job", "my.event", 3, nil)
269 | 		sink.Drain()
270 | 	})
271 | }
272 | 
273 | func TestStatsDSinkEmitCompletePrefix(t *testing.T) {
274 | 	sink, err := NewStatsDSink(testAddr, &StatsDSinkOptions{Prefix: "metroid"})
275 | 	defer sink.Stop()
276 | 	assert.NoError(t, err)
277 | 	for kind, kindStr := range completionStatusToString {
278 | 		str := fmt.Sprintf("metroid.my.job.%s:129|ms\n", kindStr)
279 | 		listenFor(t, []string{str}, func() {
280 | 			sink.EmitComplete("my.job", kind, 129456789, nil)
281 | 			sink.Drain()
282 | 		})
283 | 	}
284 | }
285 | 
286 | func TestStatsDSinkEmitCompleteNoPrefix(t *testing.T) {
287 | 	sink, err := NewStatsDSink(testAddr, nil)
288 | 	defer sink.Stop()
289 | 	assert.NoError(t, err)
290 | 	for kind, kindStr := range completionStatusToString {
291 | 		str := fmt.Sprintf("my.job.%s:129|ms\n", kindStr)
292 | 		listenFor(t, []string{str}, func() {
293 | 			sink.EmitComplete("my.job", kind, 129456789, nil)
294 | 			sink.Drain()
295 | 		})
296 | 	}
297 | }
298 | 
299 | func TestStatsDSinkEmitTimingSubMillisecond(t *testing.T) {
300 | 	sink, err := NewStatsDSink(testAddr, &StatsDSinkOptions{Prefix: "metroid"})
301 | 	defer sink.Stop()
302 | 	assert.NoError(t, err)
303 | 	listenFor(t, []string{"metroid.my.event:0.46|ms\nmetroid.my.job.my.event:0.46|ms\n"}, func() {
304 | 		sink.EmitTiming("my.job", "my.event", 456789, nil)
305 | 		sink.Drain()
306 | 	})
307 | }
308 | 
309 | func BenchmarkStatsDSinkProcessEvent(b *testing.B) {
310 | 	sink, _ := NewStatsDSink(testAddr, &StatsDSinkOptions{Prefix: "metroid"})
311 | 	sink.Stop() // Don't do periodic things while we're benching
312 | 
313 | 	b.ResetTimer()
314 | 	for i := 0; i < b.N; i++ {
315 | 		sink.processEvent("myjob", "myevent")
316 | 	}
317 | }
318 | 
319 | func BenchmarkStatsDSinkProcessEventErr(b *testing.B) {
320 | 	sink, _ := NewStatsDSink(testAddr, &StatsDSinkOptions{Prefix: "metroid"})
321 | 	sink.Stop() // Don't do periodic things while we're benching
322 | 
323 | 	b.ResetTimer()
324 | 	for i := 0; i < b.N; i++ {
325 | 		sink.processEventErr("myjob", "myevent")
326 | 	}
327 | }
328 | 
329 | func BenchmarkStatsDSinkProcessTimingBig(b *testing.B) {
330 | 	sink, _ := NewStatsDSink(testAddr, &StatsDSinkOptions{Prefix: "metroid"})
331 | 	sink.Stop() // Don't do periodic things while we're benching
332 | 
333 | 	b.ResetTimer()
334 | 	for i := 0; i < b.N; i++ {
335 | 		sink.processTiming("myjob", "myevent", 30000000)
336 | 	}
337 | }
338 | 
339 | func BenchmarkStatsDSinkProcessTimingSmall(b *testing.B) {
340 | 	sink, _ := NewStatsDSink(testAddr, &StatsDSinkOptions{Prefix: "metroid"})
341 | 	sink.Stop() // Don't do periodic things while we're benching
342 | 
343 | 	b.ResetTimer()
344 | 	for i := 0; i < b.N; i++ {
345 | 		sink.processTiming("myjob", "myevent", 1230000)
346 | 	}
347 | }
348 | 
349 | func BenchmarkStatsDSinkProcessGauge(b *testing.B) {
350 | 	sink, _ := NewStatsDSink(testAddr, &StatsDSinkOptions{Prefix: "metroid"})
351 | 	sink.Stop() // Don't do periodic things while we're benching
352 | 
353 | 	b.ResetTimer()
354 | 	for i := 0; i < b.N; i++ {
355 | 		sink.processGauge("myjob", "myevent", 3.14)
356 | 	}
357 | }
358 | 
359 | func BenchmarkStatsDSinkProcessComplete(b *testing.B) {
360 | 	sink, _ := NewStatsDSink(testAddr, &StatsDSinkOptions{Prefix: "metroid"})
361 | 	sink.Stop() // Don't do periodic things while we're benching
362 | 
363 | 	b.ResetTimer()
364 | 	for i := 0; i < b.N; i++ {
365 | 		sink.processComplete("myjob", Success, 1230000)
366 | 	}
367 | }
368 | 
369 | func BenchmarkStatsDSinkOverall(b *testing.B) {
370 | 	const numGoroutines = 100
371 | 	var requestsPerGoroutine = b.N / numGoroutines
372 | 
373 | 	stream := NewStream()
374 | 	sink, _ := NewStatsDSink(testAddr, &StatsDSinkOptions{Prefix: "metroid"})
375 | 	stream.AddSink(sink)
376 | 	job := stream.NewJob("foo")
377 | 
378 | 	wg := sync.WaitGroup{}
379 | 	for i := 0; i < numGoroutines; i++ {
380 | 		wg.Add(1)
381 | 		go func() {
382 | 			for j := 0; j < requestsPerGoroutine; j++ {
383 | 				job.Event("evt")
384 | 			}
385 | 			wg.Done()
386 | 		}()
387 | 	}
388 | 
389 | 	wg.Wait()
390 | 	sink.Drain()
391 | }
392 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # gocraft/health [![GoDoc](https://godoc.org/github.com/gocraft/health?status.png)](https://godoc.org/github.com/gocraft/health)
  2 | 
  3 | gocraft/health allows you to instrument your service for logging and metrics, and then send that instrumentation to log files, StatsD, Bugsnag, or to be polled and aggregated via a JSON API.
  4 | 
  5 | gocraft/health also ships with a New Relic-like aggregator (called healthd) that shows you your slowest endpoints, top error producers, top throughput endpoints, and so on.
  6 | 
  7 | ## Instrumenting your service
  8 | 
  9 | ### Make a new stream with sinks
 10 | 
 11 | First, you'll want to make a new Stream and attach your sinks to it. Streams are commonly saved in a global variable.
 12 | 
 13 | ```go
 14 | import (
 15 | 	"github.com/gocraft/health"
 16 | 	"github.com/gocraft/health/sinks/bugsnag"
 17 | 	"os"
 18 | )
 19 | 
 20 | // Save the stream as a global variable
 21 | var stream = health.NewStream()
 22 | 
 23 | // In your main func, initiailze the stream with your sinks.
 24 | func main() {
 25 | 	// Log to stdout! (can also use WriterSink to write to a log file, Syslog, etc)
 26 | 	stream.AddSink(&health.WriterSink{os.Stdout})
 27 | 
 28 | 	// Log to StatsD!
 29 | 	statsdSink, err = health.NewStatsDSink("127.0.0.1:8125", "myapp")
 30 | 	if err != nil {
 31 | 		stream.EventErr("new_statsd_sink", err)
 32 | 		return
 33 | 	}
 34 | 	stream.AddSink(statsdSink)
 35 | 
 36 | 	// Expose instrumentation in this app on a JSON endpoint that healthd can poll!
 37 | 	sink := health.NewJsonPollingSink(time.Minute, time.Minute*5)
 38 | 	stream.AddSink(sink)
 39 | 	sink.StartServer(addr)
 40 | 
 41 | 	// Send errors to bugsnag!
 42 | 	stream.AddSink(bugsnag.NewSink(&bugsnag.Config{APIKey: "myApiKey"}))
 43 | 
 44 | 	// Now that your stream is setup, start a web server or something...
 45 | }
 46 | ```
 47 | 
 48 | ### Jobs
 49 | 
 50 | gocraft/health excels at instrumenting services that perform *jobs*. Examples of jobs: serving an HTTP request, serving an RPC request, or processing a message from a work queue. Jobs are encoded semantically into gocraft/health in order to provide out-of-the-box answers to questions like, "what is my slowest endpoint?"
 51 | 
 52 | Jobs serve three functions:
 53 | * Jobs record a timing (eg, it took 21ms to complete this job)
 54 | * Jobs record a status (eg, did the job complete successfully or was there an error?)
 55 | * Jobs group instrumentation inside that job together so that you can analyze it later.
 56 | 
 57 | Let's say you're writing a web service that processes JSON requests/responses. You might write something like this:
 58 | 
 59 | ```go
 60 | import (
 61 | 	"github.com/gocraft/health"
 62 | 	"net/http"
 63 | )
 64 | var stream = health.NewStream()
 65 | func main() {
 66 | 	// setup stream with sinks
 67 | 	stream.AddSink(&health.WriterSink{os.Stdout})
 68 | 	http.HandleFunc("/users", getUsers)
 69 | }
 70 | 
 71 | func getUsers(rw http.ResponseWriter, r *http.Request) {
 72 | 	// All logging and instrumentation should be within the context of a job!
 73 | 	job := stream.NewJob("get_users")
 74 | 
 75 | 	err := fetchUsersFromDatabase(r)
 76 | 	if err != nil {
 77 | 		// When in your job's context, you can log errors, events, timings, etc.
 78 | 		job.EventErr("fetch_user_from_database", err)
 79 | 	}
 80 | 
 81 | 	// When done with the job, call job.Complete with a completion status.
 82 | 	if err == nil {
 83 | 		job.Complete(health.Success)
 84 | 	} else {
 85 | 		job.Complete(health.Error)
 86 | 	}
 87 | }
 88 | 
 89 | ```
 90 | 
 91 | (This example is just used for illustration -- in practice, you'll probably want to use middleware to create your job if you have more than a few endpoints.)
 92 | 
 93 | There are five types of completion statuses:
 94 | * **Success** - Your job completed successfully.
 95 | * **Error** - Some library call resulted in an error that prevented you from successfully completing your job.
 96 | * **Panic** - Some code paniced!
 97 | * **ValidationError** - Your code was fine, but the user passed in bad inputs, and so the job wasn't completed successfully.
 98 | * **Junk** - The job wasn't completed successfully, but not really because of an Error or ValidationError. For instance, maybe there's just a 404 (not found) or 401 (unauthorized) request to your app. This status code might not apply to all apps.
 99 | 
100 | ### Events, Timings, Gauges, and Errors
101 | 
102 | Within jobs, you can emit events, timings, gauges, and errors. The first argument of each of these methods is supposed to be a *key*. Camel case with dots is good because it works with other metrics stores like StatsD. Each method has a basic version as well as a version that accepts keys/values.
103 | 
104 | #### Events
105 | 
106 | ```go
107 | // Events. Notice the camel case with dots.
108 | // (This is helpful when you want to use StatsD sinks)
109 | job.Event("starting_server")
110 | job.Event("proccess_user.by_email.gmail")
111 | 
112 | // Event with keys and values:
113 | job.EventKv("failover.started", health.Kvs{"from_ip": fmt.Sprint(currentIP)})
114 | ```
115 | 
116 | * For the WriterSink, an event is just like logging to a file:
117 | ```
118 | [2015-03-11T22:53:22.115855203Z]: job:/api/v2/user_stories event:starting_request kvs:[path:/api/v2/user_stories request-id:F8a8bQOWmRpO6ky]
119 | ```
120 | 
121 | * For the StatsD sink (and other metrics sinks), an event is like incrementing a counter.
122 | 
123 | #### Timings
124 | 
125 | ```go
126 | // Timings:
127 | startTime := time.Now()
128 | // Do something...
129 | job.Timing("fetch_user", time.Since(startTime).Nanoseconds()) // NOTE: Nanoseconds!
130 | 
131 | // Timings also support keys/values:
132 | job.TimingKv("fetch_user", time.Since(startTime).Nanoseconds(),
133 | 	health.Kvs{"user_email": userEmail})
134 | ```
135 | 
136 | * NOTE: All timing values are in nanoseconds.
137 | * For the WriterSink, a timing is just like logging to a file:
138 | ```
139 | [2014-12-17T20:36:24.136663759Z]: job:/api/v2/user_stories event:dbr.select time:371 μs kvs:[request-id:F8a8bQOWmRpO6ky sql:SELECT COUNT(*) FROM user_stories WHERE (subdomain_id = 1221) AND (deleted_at IS NULL) AND (ticket_id IN (38327))]
140 | ```
141 | 
142 | * For the StatsD sink, we'll send it to StatsD as a timing.
143 | * The JSON polling sink will compute a summary of your timings: min, max, avg, stddev, count, sum.
144 | 
145 | #### Gauges
146 | 
147 | ```go
148 | // Gauges:
149 | job.Gauge("num_goroutines", numRunningGoroutines()) 
150 | 
151 | // Timings also support keys/values:
152 | job.GaugeKv("num_goroutines", numRunningGoroutines(),
153 | 	health.Kvs{"dispatcher": dispatcherStatus()})
154 | ```
155 | 
156 | * For the WriterSink, a timing is just like logging to a file:
157 | ```
158 | [2014-12-17T20:36:24.136663759Z]: job:/api/v2/user_stories event:num_goroutines gauge:17 kvs:[request-id:F8a8bQOWmRpO6ky dispatcher:running]
159 | ```
160 | 
161 | * For the StatsD sink, we'll send it to StatsD as a gauge.
162 | 
163 | #### Errors
164 | 
165 | ```go
166 | // Errors:
167 | err := someFunc(user.Email)
168 | if err != nil {
169 | 	return job.EventErr("some_func", err)
170 | }
171 | 
172 | // And with keys/Values:
173 | job.EventErrKv("some_func", err, health.Kvs{"email": user.Email})
174 | ```
175 | 
176 | * For the WriterSink, and error will just log to the file with the error:
177 | ```
178 | job:/api/v2/user_stories event:load_session.populate err:not_found kvs:[request-id:F8a8bQOWmRpO6ky]
179 | ```
180 | 
181 | * For metrics sinks, Errors are just like Events
182 | * The JSON polling sink and healthd will let you see which errors are trending.
183 | * For the Bugsnag sink, we'll push each error to bugsnag.
184 | 
185 | Errors will capture a stacktrace by default so that you can diagnose it in things like Bugsnag. If an error is common or not worth sending to something like Bugsnag, you can mute it. This will cause health to not capture a stack trace or send it to bugsnag:
186 | 
187 | ```go
188 | i, err := strconv.ParseInt(userInput, 10, 0)
189 | if err != nil {
190 | 	// Mute this error! It's pretty common and
191 | 	// does not indicate a problem with our code!
192 | 	job.EventErr("myfunc.parse_int", health.Mute(err))
193 | 	i = 2 // We have a default anyway. No big deal.
194 | }
195 | ```
196 | 
197 | Since error handling is so prevalent in Go code, you'll have sitations where multiple functions have the option of loggin the same root error. The best practice that we've identified is to just not think about it and log it on every level of the call stack. Keep in mind that gocraft/health will handle this intelligently and only send one error to Bugsnag, have a correct root backtrace, and so on.
198 | 
199 | ```go
200 | func showUser(ctx *Context) error {
201 | 	user, err := ctx.getUser()
202 | 	if err != nil {
203 | 		// But we'll just log it here too!
204 | 		return ctx.EventErr("show_user.get_user", err)
205 | 	}
206 | }
207 | 
208 | func getUser(ctx *Context) (*User, error) {
209 | 	var u User
210 | 	err := ctx.db.Select("SELECT * FROM users WHERE id = ?", ctx.userID).LoadStruct(&u)
211 | 	if err != nil {
212 | 		// Original error is here:
213 | 		return nil, ctx.EventErr("get_user.select", err)
214 | 	}
215 | 	return &u, nil
216 | }
217 | ```
218 | 
219 | ### Keys and Values
220 | 
221 | Most objects and methods in health work with key/value pairs. Key/value pairs are just maps of strings to strings. Keys and values are only relevant right now for logging sinks: The keys and values will be printed on each line written.
222 | 
223 | You can add keys/values to a stream. This is useful for things like hostname or pid. They keys/values will show up on every future event/timing/error.
224 | ```go
225 | stream := health.NewStream()
226 | stream.KeyValue("hostname", hostname)
227 | stream.KeyValue("pid", pid)
228 | ```
229 | 
230 | You can add keys/values to a job. This is useful for things like a request-id or the current user id:
231 | ```go
232 | job.KeyValue("request_id", makeRequestID())
233 | if user != nil {
234 | 	job.KeyValue("user_id", fmt.Sprint(user.ID))
235 | }
236 | ```
237 | 
238 | And as previously discussed, each individual event/timing/error can have its own keys and values.
239 | 
240 | ### Writing your own Sink
241 | 
242 | If you need a custom sink, you can just implement the Sink interface:
243 | 
244 | ```go
245 | type Sink interface {
246 | 	EmitEvent(job string, event string, kvs map[string]string)
247 | 	EmitEventErr(job string, event string, err error, kvs map[string]string)
248 | 	EmitTiming(job string, event string, nanoseconds int64, kvs map[string]string)
249 | 	EmitGauge(job string, event string, value float64, kvs map[string]string)
250 | 	EmitComplete(job string, status CompletionStatus, nanoseconds int64, kvs map[string]string)
251 | }
252 | ```
253 | 
254 | If you do implement a custom sink that you think would be valuable to other people, I'd be interested in including it in this package. Get in touch via an issue or send a pull requset.
255 | 
256 | ### Miscellaneous logging
257 | 
258 | If you need to, you can log via a stream directly without creating a job. This will emit events under a job named 'general'. This is useful during application initialization:
259 | 
260 | ```go
261 | stream := NewStream()
262 | stream.EventKv("starting_app", health.Kvs{"listen_ip": listenIP})
263 | ```
264 | 
265 | ## healthd and healthtop
266 | 
267 | We've built a set of tools to give you New Relic-like application performance monitoring for your Go app. It can show you things like your slowest endpoints, top error producers, top throughput endpoints, and so on.
268 | 
269 | These tools are completely optional -- health is super useful without them. But with them, it becomes even better.
270 | 
271 | 
272 | ![Healthtop Screenshot](https://gocraft.github.io/health/images/healthtop.png)
273 | 
274 | ### Add a JsonPollingSink to your stream
275 | 
276 | ```go
277 | // Make sink and add it to stream:
278 | sink := health.NewJsonPollingSink(time.Minute, time.Minute*5)
279 | stream.AddSink(sink)
280 | 
281 | // Start the HTTP server! This will expose metrics via a JSON API.
282 | // NOTE: this won't interfere with your main app (if it also serves HTTP),
283 | // since it starts a separate net/http server.
284 | // In prod, addr should be a private network interface and port, like "10.2.1.4:5020"
285 | // In local dev, it can be something like "127.0.0.1:5020"
286 | sink.StartServer(addr)
287 | ```
288 | 
289 | Once you start your app, you can browse to ```/health``` endpoint (eg, ```127.0.0.1:5020/health```) to see your metrics. Per the initialization options above, your metrics are aggregated in 1-minute chunks. We'll keep 5 minutes worth of data in memory. Nothing is ever persisted to disk.
290 | 
291 | 
292 | ### Start healthd
293 | 
294 | healthd will poll multiple services that are exposing a ```/health``` endpoint and aggregate that data. It will then expose that data via its own JSON API. You can query the healthd API to answer questions like 'what are my slowest endpoints'?
295 | 
296 | Install the healthd binary:
297 | 
298 | ```bash
299 | go get github.com/gocraft/health/cmd/healthd
300 | ```
301 | 
302 | Now you can run it. It accepts two main inputs as environment variables:
303 | 
304 | * **HEALTHD_MONITORED_HOSTPORTS**: comma separated list of hostports that represent your services running the JsonPollingSink. Example: ```HEALTHD_MONITORED_HOSTPORTS=10.18.23.130:5020,10.18.23.131:5020```
305 | * **HEALTHD_SERVER_HOSTPORT**: interface and port where you want to expose the healthd endpoints. Example: ```HEALTHD_SERVER_HOSTPORT=10.18.23.132:5032```
306 | 
307 | Putting those together:
308 | ```bash
309 | HEALTHD_MONITORED_HOSTPORTS=10.18.23.130:5020,10.18.23.131:5020 HEALTHD_SERVER_HOSTPORT=10.18.23.132:5030 healthd
310 | ```
311 | 
312 | Of course, in local development mode, you can do something like this:
313 | ```bash
314 | HEALTHD_MONITORED_HOSTPORTS=:5020 HEALTHD_SERVER_HOSTPORT=:5032 healthd
315 | ```
316 | 
317 | Great! To get a sense of the type of data healthd serves, you can manually navigate to:
318 | 
319 | * ```/jobs```: Lists top jobs 
320 | * ```/aggregations```: Provides a time series of aggregations
321 | * ```/aggregations/overall```: Squishes all time series aggregations into one aggregation.
322 | * ```/hosts```: Lists all monitored hosts and their statuses.
323 | 
324 | However, viewing raw JSON is just to give you a sense of the data. See the next section...
325 | 
326 | ### Use healthtop to query healthd
327 | 
328 | healthtop is a command-line tool that repeatedly queries a healthd and displays the results.
329 | 
330 | Install the healthtop binary:
331 | 
332 | ```bash
333 | go get github.com/gocraft/health/cmd/healthtop
334 | ```
335 | 
336 | See your top jobs:
337 | 
338 | ```bash
339 | healthtop jobs
340 | ```
341 | 
342 | ![Healthtop Screenshot](https://gocraft.github.io/health/images/healthtop.png)
343 | 
344 | (By default, healthop will query healthd on localhost:5032 -- if this is not the case, you can use the source option: ```healthtop --source=10.28.3.132:5032 jobs```)
345 | 
346 | You can sort your top jobs by a variety of things:
347 | 
348 | ```bash
349 | $ healthtop jobs --sort
350 | Error: flag needs an argument: --sort
351 | Usage of jobs:
352 |   -h, --help=false: help for jobs
353 |       --name="": name is a partial match on the name
354 |       --sort="name": sort ∈ {name, count, count_success, count_XXX, min, max, avg}
355 |       --source="localhost:5032": source is the host:port of the healthd to query. ex: localhost:5031
356 | 
357 | $ healthtop jobs --sort=count_error
358 | ```
359 | 
360 | 
361 | See your hosts:
362 | 
363 | ```bash
364 | healthtop hosts
365 | ```
366 | 
367 | ![Healthtop Screenshot](https://gocraft.github.io/health/images/healthtop_hosts.png)
368 | 
369 | To get help:
370 | 
371 | ```bash
372 | healthtop help
373 | ```
374 | 
375 | ## Current Status and Contributing
376 | 
377 | Currently, the core instrumentation component is very solid. Healthd is good. healthtop is functional but could use some love.
378 | 
379 | Request for contributions:
380 | 
381 | health core:
382 | 
383 | * A way to do fine-grained histograms with variable binning.
384 | 
385 | healthd & healthtop
386 | 
387 | * A web UI that is built into healthd
388 | * Keep track of multiple service types so that we can use one healthd to monitor multiple types of applications
389 | * Ability to drill into specific jobs to see top errors
390 | * tests
391 | * general love
392 | 
393 | If anything here interests you, let me know by opening an issue and we can collaborate on it.
394 | 
395 | ## gocraft
396 | 
397 | gocraft offers a toolkit for building web apps. Currently these packages are available:
398 | 
399 | * [gocraft/web](https://github.com/gocraft/web) - Go Router + Middleware. Your Contexts.
400 | * [gocraft/dbr](https://github.com/gocraft/dbr) - Additions to Go's database/sql for super fast performance and convenience.
401 | * [gocraft/health](https://github.com/gocraft/health) -  Instrument your web apps with logging and metrics.
402 | * [gocraft/work](https://github.com/gocraft/work) - Process background jobs in Go.
403 | 
404 | These packages were developed by the [engineering team](https://eng.uservoice.com) at [UserVoice](https://www.uservoice.com) and currently power much of its infrastructure and tech stack.
405 | 
406 | ## Authors
407 | 
408 | * Jonathan Novak -- [https://github.com/cypriss](https://github.com/cypriss)
409 | * Sponsored by [UserVoice](https://eng.uservoice.com)
410 | 


--------------------------------------------------------------------------------