├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── Vagrantfile ├── hystrix ├── circuit.go ├── circuit_test.go ├── doc.go ├── eventstream.go ├── eventstream_test.go ├── hystrix.go ├── hystrix_test.go ├── logger.go ├── metric_collector │ ├── default_metric_collector.go │ └── metric_collector.go ├── metrics.go ├── metrics_test.go ├── pool.go ├── pool_metrics.go ├── pool_test.go ├── rolling │ ├── rolling.go │ ├── rolling_test.go │ ├── rolling_timing.go │ └── rolling_timing_test.go ├── settings.go └── settings_test.go ├── loadtest ├── README.md └── service │ └── main.go ├── plugins ├── datadog_collector.go ├── graphite_aggregator.go ├── statsd_collector.go └── statsd_collector_test.go └── scripts └── vagrant.sh /.gitignore: -------------------------------------------------------------------------------- 1 | .vagrant 2 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: false 2 | language: go 3 | script: 4 | - cd hystrix 5 | - go test -race 6 | go: 7 | - 1.7.x 8 | - 1.8.x 9 | - 1.9.x 10 | - 1.10.x 11 | - tip 12 | env: 13 | global: 14 | - GORACE="halt_on_error=1" 15 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2013 keith 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | hystrix-go 2 | ========== 3 | 4 | [![Build Status](https://travis-ci.org/afex/hystrix-go.png?branch=master)](https://travis-ci.org/afex/hystrix-go) 5 | [![GoDoc Documentation](http://godoc.org/github.com/afex/hystrix-go/hystrix?status.png)](https://godoc.org/github.com/afex/hystrix-go/hystrix) 6 | 7 | [Hystrix](https://github.com/Netflix/Hystrix) is a great project from Netflix. 8 | 9 | > Hystrix is a latency and fault tolerance library designed to isolate points of access to remote systems, services and 3rd party libraries, stop cascading failure and enable resilience in complex distributed systems where failure is inevitable. 10 | 11 | I think the Hystrix patterns of programmer-defined fallbacks and adaptive health monitoring are good for any distributed system. Go routines and channels are great concurrency primitives, but don't directly help our application stay available during failures. 12 | 13 | hystrix-go aims to allow Go programmers to easily build applications with similar execution semantics of the Java-based Hystrix library. 14 | 15 | For more about how Hystrix works, refer to the [Java Hystrix wiki](https://github.com/Netflix/Hystrix/wiki) 16 | 17 | For API documentation, refer to [GoDoc](https://godoc.org/github.com/afex/hystrix-go/hystrix) 18 | 19 | How to use 20 | ---------- 21 | 22 | ```go 23 | import "github.com/afex/hystrix-go/hystrix" 24 | ``` 25 | 26 | ### Execute code as a Hystrix command 27 | 28 | Define your application logic which relies on external systems, passing your function to ```hystrix.Go```. When that system is healthy this will be the only thing which executes. 29 | 30 | ```go 31 | hystrix.Go("my_command", func() error { 32 | // talk to other services 33 | return nil 34 | }, nil) 35 | ``` 36 | 37 | ### Defining fallback behavior 38 | 39 | If you want code to execute during a service outage, pass in a second function to ```hystrix.Go```. Ideally, the logic here will allow your application to gracefully handle external services being unavailable. 40 | 41 | This triggers when your code returns an error, or whenever it is unable to complete based on a [variety of health checks](https://github.com/Netflix/Hystrix/wiki/How-it-Works). 42 | 43 | ```go 44 | hystrix.Go("my_command", func() error { 45 | // talk to other services 46 | return nil 47 | }, func(err error) error { 48 | // do this when services are down 49 | return nil 50 | }) 51 | ``` 52 | 53 | ### Waiting for output 54 | 55 | Calling ```hystrix.Go``` is like launching a goroutine, except you receive a channel of errors you can choose to monitor. 56 | 57 | ```go 58 | output := make(chan bool, 1) 59 | errors := hystrix.Go("my_command", func() error { 60 | // talk to other services 61 | output <- true 62 | return nil 63 | }, nil) 64 | 65 | select { 66 | case out := <-output: 67 | // success 68 | case err := <-errors: 69 | // failure 70 | } 71 | ``` 72 | 73 | ### Synchronous API 74 | 75 | Since calling a command and immediately waiting for it to finish is a common pattern, a synchronous API is available with the `hystrix.Do` function which returns a single error. 76 | 77 | ```go 78 | err := hystrix.Do("my_command", func() error { 79 | // talk to other services 80 | return nil 81 | }, nil) 82 | ``` 83 | 84 | ### Configure settings 85 | 86 | During application boot, you can call ```hystrix.ConfigureCommand()``` to tweak the settings for each command. 87 | 88 | ```go 89 | hystrix.ConfigureCommand("my_command", hystrix.CommandConfig{ 90 | Timeout: 1000, 91 | MaxConcurrentRequests: 100, 92 | ErrorPercentThreshold: 25, 93 | }) 94 | ``` 95 | 96 | You can also use ```hystrix.Configure()``` which accepts a ```map[string]CommandConfig```. 97 | 98 | ### Enable dashboard metrics 99 | 100 | In your main.go, register the event stream HTTP handler on a port and launch it in a goroutine. Once you configure turbine for your [Hystrix Dashboard](https://github.com/Netflix/Hystrix/tree/master/hystrix-dashboard) to start streaming events, your commands will automatically begin appearing. 101 | 102 | ```go 103 | hystrixStreamHandler := hystrix.NewStreamHandler() 104 | hystrixStreamHandler.Start() 105 | go http.ListenAndServe(net.JoinHostPort("", "81"), hystrixStreamHandler) 106 | ``` 107 | 108 | ### Send circuit metrics to Statsd 109 | 110 | ```go 111 | c, err := plugins.InitializeStatsdCollector(&plugins.StatsdCollectorConfig{ 112 | StatsdAddr: "localhost:8125", 113 | Prefix: "myapp.hystrix", 114 | }) 115 | if err != nil { 116 | log.Fatalf("could not initialize statsd client: %v", err) 117 | } 118 | 119 | metricCollector.Registry.Register(c.NewStatsdCollector) 120 | ``` 121 | 122 | FAQ 123 | --- 124 | 125 | **What happens if my run function panics? Does hystrix-go trigger the fallback?** 126 | 127 | No. hystrix-go does not use ```recover()``` so panics will kill the process like normal. 128 | 129 | Build and Test 130 | -------------- 131 | 132 | - Install vagrant and VirtualBox 133 | - Clone the hystrix-go repository 134 | - Inside the hystrix-go directory, run ```vagrant up```, then ```vagrant ssh``` 135 | - ```cd /go/src/github.com/afex/hystrix-go``` 136 | - ```go test ./...``` 137 | -------------------------------------------------------------------------------- /Vagrantfile: -------------------------------------------------------------------------------- 1 | Vagrant.configure("2") do |config| 2 | config.vm.box = "ubuntu/trusty64" 3 | config.vm.hostname = 'hystrix-go.local' 4 | 5 | config.vm.provision :shell, :path => "scripts/vagrant.sh" 6 | 7 | config.vm.synced_folder ".", "/go/src/github.com/afex/hystrix-go" 8 | 9 | config.vm.provider "virtualbox" do |v| 10 | v.cpus = 3 11 | end 12 | 13 | config.vm.network "forwarded_port", guest: 8888, host: 8888 14 | end 15 | -------------------------------------------------------------------------------- /hystrix/circuit.go: -------------------------------------------------------------------------------- 1 | package hystrix 2 | 3 | import ( 4 | "fmt" 5 | "sync" 6 | "sync/atomic" 7 | "time" 8 | ) 9 | 10 | // CircuitBreaker is created for each ExecutorPool to track whether requests 11 | // should be attempted, or rejected if the Health of the circuit is too low. 12 | type CircuitBreaker struct { 13 | Name string 14 | open bool 15 | forceOpen bool 16 | mutex *sync.RWMutex 17 | openedOrLastTestedTime int64 18 | 19 | executorPool *executorPool 20 | metrics *metricExchange 21 | } 22 | 23 | var ( 24 | circuitBreakersMutex *sync.RWMutex 25 | circuitBreakers map[string]*CircuitBreaker 26 | ) 27 | 28 | func init() { 29 | circuitBreakersMutex = &sync.RWMutex{} 30 | circuitBreakers = make(map[string]*CircuitBreaker) 31 | } 32 | 33 | // GetCircuit returns the circuit for the given command and whether this call created it. 34 | func GetCircuit(name string) (*CircuitBreaker, bool, error) { 35 | circuitBreakersMutex.RLock() 36 | _, ok := circuitBreakers[name] 37 | if !ok { 38 | circuitBreakersMutex.RUnlock() 39 | circuitBreakersMutex.Lock() 40 | defer circuitBreakersMutex.Unlock() 41 | // because we released the rlock before we obtained the exclusive lock, 42 | // we need to double check that some other thread didn't beat us to 43 | // creation. 44 | if cb, ok := circuitBreakers[name]; ok { 45 | return cb, false, nil 46 | } 47 | circuitBreakers[name] = newCircuitBreaker(name) 48 | } else { 49 | defer circuitBreakersMutex.RUnlock() 50 | } 51 | 52 | return circuitBreakers[name], !ok, nil 53 | } 54 | 55 | // Flush purges all circuit and metric information from memory. 56 | func Flush() { 57 | circuitBreakersMutex.Lock() 58 | defer circuitBreakersMutex.Unlock() 59 | 60 | for name, cb := range circuitBreakers { 61 | cb.metrics.Reset() 62 | cb.executorPool.Metrics.Reset() 63 | delete(circuitBreakers, name) 64 | } 65 | } 66 | 67 | // newCircuitBreaker creates a CircuitBreaker with associated Health 68 | func newCircuitBreaker(name string) *CircuitBreaker { 69 | c := &CircuitBreaker{} 70 | c.Name = name 71 | c.metrics = newMetricExchange(name) 72 | c.executorPool = newExecutorPool(name) 73 | c.mutex = &sync.RWMutex{} 74 | 75 | return c 76 | } 77 | 78 | // toggleForceOpen allows manually causing the fallback logic for all instances 79 | // of a given command. 80 | func (circuit *CircuitBreaker) toggleForceOpen(toggle bool) error { 81 | circuit, _, err := GetCircuit(circuit.Name) 82 | if err != nil { 83 | return err 84 | } 85 | 86 | circuit.forceOpen = toggle 87 | return nil 88 | } 89 | 90 | // IsOpen is called before any Command execution to check whether or 91 | // not it should be attempted. An "open" circuit means it is disabled. 92 | func (circuit *CircuitBreaker) IsOpen() bool { 93 | circuit.mutex.RLock() 94 | o := circuit.forceOpen || circuit.open 95 | circuit.mutex.RUnlock() 96 | 97 | if o { 98 | return true 99 | } 100 | 101 | if uint64(circuit.metrics.Requests().Sum(time.Now())) < getSettings(circuit.Name).RequestVolumeThreshold { 102 | return false 103 | } 104 | 105 | if !circuit.metrics.IsHealthy(time.Now()) { 106 | // too many failures, open the circuit 107 | circuit.setOpen() 108 | return true 109 | } 110 | 111 | return false 112 | } 113 | 114 | // AllowRequest is checked before a command executes, ensuring that circuit state and metric health allow it. 115 | // When the circuit is open, this call will occasionally return true to measure whether the external service 116 | // has recovered. 117 | func (circuit *CircuitBreaker) AllowRequest() bool { 118 | return !circuit.IsOpen() || circuit.allowSingleTest() 119 | } 120 | 121 | func (circuit *CircuitBreaker) allowSingleTest() bool { 122 | circuit.mutex.RLock() 123 | defer circuit.mutex.RUnlock() 124 | 125 | now := time.Now().UnixNano() 126 | openedOrLastTestedTime := atomic.LoadInt64(&circuit.openedOrLastTestedTime) 127 | if circuit.open && now > openedOrLastTestedTime+getSettings(circuit.Name).SleepWindow.Nanoseconds() { 128 | swapped := atomic.CompareAndSwapInt64(&circuit.openedOrLastTestedTime, openedOrLastTestedTime, now) 129 | if swapped { 130 | log.Printf("hystrix-go: allowing single test to possibly close circuit %v", circuit.Name) 131 | } 132 | return swapped 133 | } 134 | 135 | return false 136 | } 137 | 138 | func (circuit *CircuitBreaker) setOpen() { 139 | circuit.mutex.Lock() 140 | defer circuit.mutex.Unlock() 141 | 142 | if circuit.open { 143 | return 144 | } 145 | 146 | log.Printf("hystrix-go: opening circuit %v", circuit.Name) 147 | 148 | circuit.openedOrLastTestedTime = time.Now().UnixNano() 149 | circuit.open = true 150 | } 151 | 152 | func (circuit *CircuitBreaker) setClose() { 153 | circuit.mutex.Lock() 154 | defer circuit.mutex.Unlock() 155 | 156 | if !circuit.open { 157 | return 158 | } 159 | 160 | log.Printf("hystrix-go: closing circuit %v", circuit.Name) 161 | 162 | circuit.open = false 163 | circuit.metrics.Reset() 164 | } 165 | 166 | // ReportEvent records command metrics for tracking recent error rates and exposing data to the dashboard. 167 | func (circuit *CircuitBreaker) ReportEvent(eventTypes []string, start time.Time, runDuration time.Duration) error { 168 | if len(eventTypes) == 0 { 169 | return fmt.Errorf("no event types sent for metrics") 170 | } 171 | 172 | circuit.mutex.RLock() 173 | o := circuit.open 174 | circuit.mutex.RUnlock() 175 | if eventTypes[0] == "success" && o { 176 | circuit.setClose() 177 | } 178 | 179 | var concurrencyInUse float64 180 | if circuit.executorPool.Max > 0 { 181 | concurrencyInUse = float64(circuit.executorPool.ActiveCount()) / float64(circuit.executorPool.Max) 182 | } 183 | 184 | select { 185 | case circuit.metrics.Updates <- &commandExecution{ 186 | Types: eventTypes, 187 | Start: start, 188 | RunDuration: runDuration, 189 | ConcurrencyInUse: concurrencyInUse, 190 | }: 191 | default: 192 | return CircuitError{Message: fmt.Sprintf("metrics channel (%v) is at capacity", circuit.Name)} 193 | } 194 | 195 | return nil 196 | } 197 | -------------------------------------------------------------------------------- /hystrix/circuit_test.go: -------------------------------------------------------------------------------- 1 | package hystrix 2 | 3 | import ( 4 | "sync" 5 | "sync/atomic" 6 | "testing" 7 | "time" 8 | 9 | "math/rand" 10 | "testing/quick" 11 | 12 | . "github.com/smartystreets/goconvey/convey" 13 | ) 14 | 15 | func TestGetCircuit(t *testing.T) { 16 | defer Flush() 17 | 18 | Convey("when calling GetCircuit", t, func() { 19 | var created bool 20 | var err error 21 | _, created, err = GetCircuit("foo") 22 | 23 | Convey("once, the circuit should be created", func() { 24 | So(err, ShouldBeNil) 25 | So(created, ShouldEqual, true) 26 | }) 27 | 28 | Convey("twice, the circuit should be reused", func() { 29 | _, created, err = GetCircuit("foo") 30 | So(err, ShouldBeNil) 31 | So(created, ShouldEqual, false) 32 | }) 33 | }) 34 | } 35 | 36 | func TestMultithreadedGetCircuit(t *testing.T) { 37 | defer Flush() 38 | 39 | Convey("calling GetCircuit", t, func() { 40 | numThreads := 100 41 | var numCreates int32 42 | var numRunningRoutines int32 43 | var startingLine sync.WaitGroup 44 | var finishLine sync.WaitGroup 45 | startingLine.Add(1) 46 | finishLine.Add(numThreads) 47 | 48 | for i := 0; i < numThreads; i++ { 49 | go func() { 50 | if atomic.AddInt32(&numRunningRoutines, 1) == int32(numThreads) { 51 | startingLine.Done() 52 | } else { 53 | startingLine.Wait() 54 | } 55 | 56 | _, created, _ := GetCircuit("foo") 57 | 58 | if created { 59 | atomic.AddInt32(&numCreates, 1) 60 | } 61 | 62 | finishLine.Done() 63 | }() 64 | } 65 | 66 | finishLine.Wait() 67 | 68 | Convey("should be threadsafe", func() { 69 | So(numCreates, ShouldEqual, int32(1)) 70 | }) 71 | }) 72 | } 73 | 74 | func TestReportEventOpenThenClose(t *testing.T) { 75 | Convey("when a circuit is closed", t, func() { 76 | defer Flush() 77 | 78 | ConfigureCommand("", CommandConfig{ErrorPercentThreshold: 50}) 79 | 80 | cb, _, err := GetCircuit("") 81 | So(err, ShouldEqual, nil) 82 | So(cb.IsOpen(), ShouldBeFalse) 83 | openedTime := cb.openedOrLastTestedTime 84 | 85 | Convey("but the metrics are unhealthy", func() { 86 | cb.metrics = metricFailingPercent(100) 87 | So(cb.metrics.IsHealthy(time.Now()), ShouldBeFalse) 88 | 89 | Convey("and a success is reported", func() { 90 | err = cb.ReportEvent([]string{"success"}, time.Now(), 0) 91 | So(err, ShouldEqual, nil) 92 | 93 | Convey("the circuit does not open then close", func() { 94 | So(cb.openedOrLastTestedTime, ShouldEqual, openedTime) 95 | }) 96 | }) 97 | }) 98 | }) 99 | } 100 | 101 | func TestReportEventMultiThreaded(t *testing.T) { 102 | rand.Seed(time.Now().UnixNano()) 103 | run := func() bool { 104 | defer Flush() 105 | // Make the circuit easily open and close intermittently. 106 | ConfigureCommand("", CommandConfig{ 107 | MaxConcurrentRequests: 1, 108 | ErrorPercentThreshold: 1, 109 | RequestVolumeThreshold: 1, 110 | SleepWindow: 10, 111 | }) 112 | cb, _, _ := GetCircuit("") 113 | count := 5 114 | wg := &sync.WaitGroup{} 115 | wg.Add(count) 116 | c := make(chan bool, count) 117 | for i := 0; i < count; i++ { 118 | go func() { 119 | defer func() { 120 | if r := recover(); r != nil { 121 | t.Error(r) 122 | c <- false 123 | } else { 124 | wg.Done() 125 | } 126 | }() 127 | // randomized eventType to open/close circuit 128 | eventType := "rejected" 129 | if rand.Intn(3) == 1 { 130 | eventType = "success" 131 | } 132 | err := cb.ReportEvent([]string{eventType}, time.Now(), time.Second) 133 | if err != nil { 134 | t.Error(err) 135 | } 136 | time.Sleep(time.Millisecond) 137 | // cb.IsOpen() internally calls cb.setOpen() 138 | cb.IsOpen() 139 | }() 140 | } 141 | go func() { 142 | wg.Wait() 143 | c <- true 144 | }() 145 | return <-c 146 | } 147 | if err := quick.Check(run, nil); err != nil { 148 | t.Error(err) 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /hystrix/doc.go: -------------------------------------------------------------------------------- 1 | /* 2 | Package hystrix is a latency and fault tolerance library designed to isolate 3 | points of access to remote systems, services and 3rd party libraries, stop 4 | cascading failure and enable resilience in complex distributed systems where 5 | failure is inevitable. 6 | 7 | Based on the java project of the same name, by Netflix. https://github.com/Netflix/Hystrix 8 | 9 | Execute code as a Hystrix command 10 | 11 | Define your application logic which relies on external systems, passing your function to Go. When that system is healthy this will be the only thing which executes. 12 | 13 | hystrix.Go("my_command", func() error { 14 | // talk to other services 15 | return nil 16 | }, nil) 17 | 18 | Defining fallback behavior 19 | 20 | If you want code to execute during a service outage, pass in a second function to Go. Ideally, the logic here will allow your application to gracefully handle external services being unavailable. 21 | 22 | This triggers when your code returns an error, or whenever it is unable to complete based on a variety of health checks https://github.com/Netflix/Hystrix/wiki/How-it-Works. 23 | 24 | hystrix.Go("my_command", func() error { 25 | // talk to other services 26 | return nil 27 | }, func(err error) error { 28 | // do this when services are down 29 | return nil 30 | }) 31 | 32 | Waiting for output 33 | 34 | Calling Go is like launching a goroutine, except you receive a channel of errors you can choose to monitor. 35 | 36 | output := make(chan bool, 1) 37 | errors := hystrix.Go("my_command", func() error { 38 | // talk to other services 39 | output <- true 40 | return nil 41 | }, nil) 42 | 43 | select { 44 | case out := <-output: 45 | // success 46 | case err := <-errors: 47 | // failure 48 | } 49 | 50 | Synchronous API 51 | 52 | Since calling a command and immediately waiting for it to finish is a common pattern, a synchronous API is available with the Do function which returns a single error. 53 | 54 | err := hystrix.Do("my_command", func() error { 55 | // talk to other services 56 | return nil 57 | }, nil) 58 | 59 | Configure settings 60 | 61 | During application boot, you can call ConfigureCommand to tweak the settings for each command. 62 | 63 | hystrix.ConfigureCommand("my_command", hystrix.CommandConfig{ 64 | Timeout: 1000, 65 | MaxConcurrentRequests: 100, 66 | ErrorPercentThreshold: 25, 67 | }) 68 | 69 | You can also use Configure which accepts a map[string]CommandConfig. 70 | 71 | Enable dashboard metrics 72 | 73 | In your main.go, register the event stream HTTP handler on a port and launch it in a goroutine. Once you configure turbine for your Hystrix Dashboard https://github.com/Netflix/Hystrix/tree/master/hystrix-dashboard to start streaming events, your commands will automatically begin appearing. 74 | 75 | hystrixStreamHandler := hystrix.NewStreamHandler() 76 | hystrixStreamHandler.Start() 77 | go http.ListenAndServe(net.JoinHostPort("", "81"), hystrixStreamHandler) 78 | */ 79 | package hystrix 80 | -------------------------------------------------------------------------------- /hystrix/eventstream.go: -------------------------------------------------------------------------------- 1 | package hystrix 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "net/http" 7 | "sync" 8 | "time" 9 | 10 | "github.com/afex/hystrix-go/hystrix/rolling" 11 | ) 12 | 13 | const ( 14 | streamEventBufferSize = 10 15 | ) 16 | 17 | // NewStreamHandler returns a server capable of exposing dashboard metrics via HTTP. 18 | func NewStreamHandler() *StreamHandler { 19 | return &StreamHandler{} 20 | } 21 | 22 | // StreamHandler publishes metrics for each command and each pool once a second to all connected HTTP client. 23 | type StreamHandler struct { 24 | requests map[*http.Request]chan []byte 25 | mu sync.RWMutex 26 | done chan struct{} 27 | } 28 | 29 | // Start begins watching the in-memory circuit breakers for metrics 30 | func (sh *StreamHandler) Start() { 31 | sh.requests = make(map[*http.Request]chan []byte) 32 | sh.done = make(chan struct{}) 33 | go sh.loop() 34 | } 35 | 36 | // Stop shuts down the metric collection routine 37 | func (sh *StreamHandler) Stop() { 38 | close(sh.done) 39 | } 40 | 41 | var _ http.Handler = (*StreamHandler)(nil) 42 | 43 | func (sh *StreamHandler) ServeHTTP(rw http.ResponseWriter, req *http.Request) { 44 | // Make sure that the writer supports flushing. 45 | f, ok := rw.(http.Flusher) 46 | if !ok { 47 | http.Error(rw, "Streaming unsupported!", http.StatusInternalServerError) 48 | return 49 | } 50 | events := sh.register(req) 51 | defer sh.unregister(req) 52 | 53 | notify := rw.(http.CloseNotifier).CloseNotify() 54 | 55 | rw.Header().Add("Content-Type", "text/event-stream") 56 | rw.Header().Set("Cache-Control", "no-cache") 57 | rw.Header().Set("Connection", "keep-alive") 58 | for { 59 | select { 60 | case <-notify: 61 | // client is gone 62 | return 63 | case event := <-events: 64 | _, err := rw.Write(event) 65 | if err != nil { 66 | return 67 | } 68 | f.Flush() 69 | } 70 | } 71 | } 72 | 73 | func (sh *StreamHandler) loop() { 74 | tick := time.Tick(1 * time.Second) 75 | for { 76 | select { 77 | case <-tick: 78 | circuitBreakersMutex.RLock() 79 | for _, cb := range circuitBreakers { 80 | sh.publishMetrics(cb) 81 | sh.publishThreadPools(cb.executorPool) 82 | } 83 | circuitBreakersMutex.RUnlock() 84 | case <-sh.done: 85 | return 86 | } 87 | } 88 | } 89 | 90 | func (sh *StreamHandler) publishMetrics(cb *CircuitBreaker) error { 91 | now := time.Now() 92 | reqCount := cb.metrics.Requests().Sum(now) 93 | errCount := cb.metrics.DefaultCollector().Errors().Sum(now) 94 | errPct := cb.metrics.ErrorPercent(now) 95 | 96 | eventBytes, err := json.Marshal(&streamCmdMetric{ 97 | Type: "HystrixCommand", 98 | Name: cb.Name, 99 | Group: cb.Name, 100 | Time: currentTime(), 101 | ReportingHosts: 1, 102 | 103 | RequestCount: uint32(reqCount), 104 | ErrorCount: uint32(errCount), 105 | ErrorPct: uint32(errPct), 106 | CircuitBreakerOpen: cb.IsOpen(), 107 | 108 | RollingCountSuccess: uint32(cb.metrics.DefaultCollector().Successes().Sum(now)), 109 | RollingCountFailure: uint32(cb.metrics.DefaultCollector().Failures().Sum(now)), 110 | RollingCountThreadPoolRejected: uint32(cb.metrics.DefaultCollector().Rejects().Sum(now)), 111 | RollingCountShortCircuited: uint32(cb.metrics.DefaultCollector().ShortCircuits().Sum(now)), 112 | RollingCountTimeout: uint32(cb.metrics.DefaultCollector().Timeouts().Sum(now)), 113 | RollingCountFallbackSuccess: uint32(cb.metrics.DefaultCollector().FallbackSuccesses().Sum(now)), 114 | RollingCountFallbackFailure: uint32(cb.metrics.DefaultCollector().FallbackFailures().Sum(now)), 115 | 116 | LatencyTotal: generateLatencyTimings(cb.metrics.DefaultCollector().TotalDuration()), 117 | LatencyTotalMean: cb.metrics.DefaultCollector().TotalDuration().Mean(), 118 | LatencyExecute: generateLatencyTimings(cb.metrics.DefaultCollector().RunDuration()), 119 | LatencyExecuteMean: cb.metrics.DefaultCollector().RunDuration().Mean(), 120 | 121 | // TODO: all hard-coded values should become configurable settings, per circuit 122 | 123 | RollingStatsWindow: 10000, 124 | ExecutionIsolationStrategy: "THREAD", 125 | 126 | CircuitBreakerEnabled: true, 127 | CircuitBreakerForceClosed: false, 128 | CircuitBreakerForceOpen: cb.forceOpen, 129 | CircuitBreakerErrorThresholdPercent: uint32(getSettings(cb.Name).ErrorPercentThreshold), 130 | CircuitBreakerSleepWindow: uint32(getSettings(cb.Name).SleepWindow.Seconds() * 1000), 131 | CircuitBreakerRequestVolumeThreshold: uint32(getSettings(cb.Name).RequestVolumeThreshold), 132 | }) 133 | if err != nil { 134 | return err 135 | } 136 | err = sh.writeToRequests(eventBytes) 137 | if err != nil { 138 | return err 139 | } 140 | 141 | return nil 142 | } 143 | 144 | func (sh *StreamHandler) publishThreadPools(pool *executorPool) error { 145 | now := time.Now() 146 | 147 | eventBytes, err := json.Marshal(&streamThreadPoolMetric{ 148 | Type: "HystrixThreadPool", 149 | Name: pool.Name, 150 | ReportingHosts: 1, 151 | 152 | CurrentActiveCount: uint32(pool.ActiveCount()), 153 | CurrentTaskCount: 0, 154 | CurrentCompletedTaskCount: 0, 155 | 156 | RollingCountThreadsExecuted: uint32(pool.Metrics.Executed.Sum(now)), 157 | RollingMaxActiveThreads: uint32(pool.Metrics.MaxActiveRequests.Max(now)), 158 | 159 | CurrentPoolSize: uint32(pool.Max), 160 | CurrentCorePoolSize: uint32(pool.Max), 161 | CurrentLargestPoolSize: uint32(pool.Max), 162 | CurrentMaximumPoolSize: uint32(pool.Max), 163 | 164 | RollingStatsWindow: 10000, 165 | QueueSizeRejectionThreshold: 0, 166 | CurrentQueueSize: 0, 167 | }) 168 | if err != nil { 169 | return err 170 | } 171 | err = sh.writeToRequests(eventBytes) 172 | 173 | return nil 174 | } 175 | 176 | func (sh *StreamHandler) writeToRequests(eventBytes []byte) error { 177 | var b bytes.Buffer 178 | _, err := b.Write([]byte("data:")) 179 | if err != nil { 180 | return err 181 | } 182 | 183 | _, err = b.Write(eventBytes) 184 | if err != nil { 185 | return err 186 | } 187 | _, err = b.Write([]byte("\n\n")) 188 | if err != nil { 189 | return err 190 | } 191 | dataBytes := b.Bytes() 192 | sh.mu.RLock() 193 | 194 | for _, requestEvents := range sh.requests { 195 | select { 196 | case requestEvents <- dataBytes: 197 | default: 198 | } 199 | } 200 | sh.mu.RUnlock() 201 | 202 | return nil 203 | } 204 | 205 | func (sh *StreamHandler) register(req *http.Request) <-chan []byte { 206 | sh.mu.RLock() 207 | events, ok := sh.requests[req] 208 | sh.mu.RUnlock() 209 | if ok { 210 | return events 211 | } 212 | 213 | events = make(chan []byte, streamEventBufferSize) 214 | sh.mu.Lock() 215 | sh.requests[req] = events 216 | sh.mu.Unlock() 217 | return events 218 | } 219 | 220 | func (sh *StreamHandler) unregister(req *http.Request) { 221 | sh.mu.Lock() 222 | delete(sh.requests, req) 223 | sh.mu.Unlock() 224 | } 225 | 226 | func generateLatencyTimings(r *rolling.Timing) streamCmdLatency { 227 | return streamCmdLatency{ 228 | Timing0: r.Percentile(0), 229 | Timing25: r.Percentile(25), 230 | Timing50: r.Percentile(50), 231 | Timing75: r.Percentile(75), 232 | Timing90: r.Percentile(90), 233 | Timing95: r.Percentile(95), 234 | Timing99: r.Percentile(99), 235 | Timing995: r.Percentile(99.5), 236 | Timing100: r.Percentile(100), 237 | } 238 | } 239 | 240 | type streamCmdMetric struct { 241 | Type string `json:"type"` 242 | Name string `json:"name"` 243 | Group string `json:"group"` 244 | Time int64 `json:"currentTime"` 245 | ReportingHosts uint32 `json:"reportingHosts"` 246 | 247 | // Health 248 | RequestCount uint32 `json:"requestCount"` 249 | ErrorCount uint32 `json:"errorCount"` 250 | ErrorPct uint32 `json:"errorPercentage"` 251 | CircuitBreakerOpen bool `json:"isCircuitBreakerOpen"` 252 | 253 | RollingCountCollapsedRequests uint32 `json:"rollingCountCollapsedRequests"` 254 | RollingCountExceptionsThrown uint32 `json:"rollingCountExceptionsThrown"` 255 | RollingCountFailure uint32 `json:"rollingCountFailure"` 256 | RollingCountFallbackFailure uint32 `json:"rollingCountFallbackFailure"` 257 | RollingCountFallbackRejection uint32 `json:"rollingCountFallbackRejection"` 258 | RollingCountFallbackSuccess uint32 `json:"rollingCountFallbackSuccess"` 259 | RollingCountResponsesFromCache uint32 `json:"rollingCountResponsesFromCache"` 260 | RollingCountSemaphoreRejected uint32 `json:"rollingCountSemaphoreRejected"` 261 | RollingCountShortCircuited uint32 `json:"rollingCountShortCircuited"` 262 | RollingCountSuccess uint32 `json:"rollingCountSuccess"` 263 | RollingCountThreadPoolRejected uint32 `json:"rollingCountThreadPoolRejected"` 264 | RollingCountTimeout uint32 `json:"rollingCountTimeout"` 265 | 266 | CurrentConcurrentExecutionCount uint32 `json:"currentConcurrentExecutionCount"` 267 | 268 | LatencyExecuteMean uint32 `json:"latencyExecute_mean"` 269 | LatencyExecute streamCmdLatency `json:"latencyExecute"` 270 | LatencyTotalMean uint32 `json:"latencyTotal_mean"` 271 | LatencyTotal streamCmdLatency `json:"latencyTotal"` 272 | 273 | // Properties 274 | CircuitBreakerRequestVolumeThreshold uint32 `json:"propertyValue_circuitBreakerRequestVolumeThreshold"` 275 | CircuitBreakerSleepWindow uint32 `json:"propertyValue_circuitBreakerSleepWindowInMilliseconds"` 276 | CircuitBreakerErrorThresholdPercent uint32 `json:"propertyValue_circuitBreakerErrorThresholdPercentage"` 277 | CircuitBreakerForceOpen bool `json:"propertyValue_circuitBreakerForceOpen"` 278 | CircuitBreakerForceClosed bool `json:"propertyValue_circuitBreakerForceClosed"` 279 | CircuitBreakerEnabled bool `json:"propertyValue_circuitBreakerEnabled"` 280 | ExecutionIsolationStrategy string `json:"propertyValue_executionIsolationStrategy"` 281 | ExecutionIsolationThreadTimeout uint32 `json:"propertyValue_executionIsolationThreadTimeoutInMilliseconds"` 282 | ExecutionIsolationThreadInterruptOnTimeout bool `json:"propertyValue_executionIsolationThreadInterruptOnTimeout"` 283 | ExecutionIsolationThreadPoolKeyOverride string `json:"propertyValue_executionIsolationThreadPoolKeyOverride"` 284 | ExecutionIsolationSemaphoreMaxConcurrentRequests uint32 `json:"propertyValue_executionIsolationSemaphoreMaxConcurrentRequests"` 285 | FallbackIsolationSemaphoreMaxConcurrentRequests uint32 `json:"propertyValue_fallbackIsolationSemaphoreMaxConcurrentRequests"` 286 | RollingStatsWindow uint32 `json:"propertyValue_metricsRollingStatisticalWindowInMilliseconds"` 287 | RequestCacheEnabled bool `json:"propertyValue_requestCacheEnabled"` 288 | RequestLogEnabled bool `json:"propertyValue_requestLogEnabled"` 289 | } 290 | 291 | type streamCmdLatency struct { 292 | Timing0 uint32 `json:"0"` 293 | Timing25 uint32 `json:"25"` 294 | Timing50 uint32 `json:"50"` 295 | Timing75 uint32 `json:"75"` 296 | Timing90 uint32 `json:"90"` 297 | Timing95 uint32 `json:"95"` 298 | Timing99 uint32 `json:"99"` 299 | Timing995 uint32 `json:"99.5"` 300 | Timing100 uint32 `json:"100"` 301 | } 302 | 303 | type streamThreadPoolMetric struct { 304 | Type string `json:"type"` 305 | Name string `json:"name"` 306 | ReportingHosts uint32 `json:"reportingHosts"` 307 | 308 | CurrentActiveCount uint32 `json:"currentActiveCount"` 309 | CurrentCompletedTaskCount uint32 `json:"currentCompletedTaskCount"` 310 | CurrentCorePoolSize uint32 `json:"currentCorePoolSize"` 311 | CurrentLargestPoolSize uint32 `json:"currentLargestPoolSize"` 312 | CurrentMaximumPoolSize uint32 `json:"currentMaximumPoolSize"` 313 | CurrentPoolSize uint32 `json:"currentPoolSize"` 314 | CurrentQueueSize uint32 `json:"currentQueueSize"` 315 | CurrentTaskCount uint32 `json:"currentTaskCount"` 316 | 317 | RollingMaxActiveThreads uint32 `json:"rollingMaxActiveThreads"` 318 | RollingCountThreadsExecuted uint32 `json:"rollingCountThreadsExecuted"` 319 | 320 | RollingStatsWindow uint32 `json:"propertyValue_metricsRollingStatisticalWindowInMilliseconds"` 321 | QueueSizeRejectionThreshold uint32 `json:"propertyValue_queueSizeRejectionThreshold"` 322 | } 323 | 324 | func currentTime() int64 { 325 | return time.Now().UnixNano() / int64(1000000) 326 | } 327 | -------------------------------------------------------------------------------- /hystrix/eventstream_test.go: -------------------------------------------------------------------------------- 1 | package hystrix 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "net/http" 7 | "net/http/httptest" 8 | "strings" 9 | "sync" 10 | "testing" 11 | "time" 12 | 13 | . "github.com/smartystreets/goconvey/convey" 14 | ) 15 | 16 | type eventStreamTestServer struct { 17 | *httptest.Server 18 | *StreamHandler 19 | } 20 | 21 | func (s *eventStreamTestServer) stopTestServer() error { 22 | s.Close() 23 | s.Stop() 24 | Flush() 25 | 26 | return nil 27 | } 28 | 29 | func startTestServer() *eventStreamTestServer { 30 | hystrixStreamHandler := NewStreamHandler() 31 | hystrixStreamHandler.Start() 32 | return &eventStreamTestServer{ 33 | httptest.NewServer(hystrixStreamHandler), 34 | hystrixStreamHandler, 35 | } 36 | } 37 | 38 | func sleepingCommand(t *testing.T, name string, duration time.Duration) { 39 | done := make(chan bool) 40 | errChan := Go(name, func() error { 41 | time.Sleep(duration) 42 | done <- true 43 | return nil 44 | }, nil) 45 | 46 | select { 47 | case _ = <-done: 48 | // do nothing 49 | case err := <-errChan: 50 | t.Fatal(err) 51 | } 52 | } 53 | 54 | func failingCommand(t *testing.T, name string, duration time.Duration) { 55 | done := make(chan bool) 56 | errChan := Go(name, func() error { 57 | time.Sleep(duration) 58 | return fmt.Errorf("fail") 59 | }, nil) 60 | 61 | select { 62 | case _ = <-done: 63 | t.Fatal("should not have succeeded") 64 | case _ = <-errChan: 65 | // do nothing 66 | } 67 | } 68 | 69 | // grabFirstFromStream reads on the http request until we see the first 70 | // full result printed 71 | func grabFirstCommandFromStream(t *testing.T, url string) streamCmdMetric { 72 | var event streamCmdMetric 73 | 74 | metrics, done := streamMetrics(t, url) 75 | for m := range metrics { 76 | if strings.Contains(m, "HystrixCommand") { 77 | done <- true 78 | close(done) 79 | 80 | err := json.Unmarshal([]byte(m), &event) 81 | if err != nil { 82 | t.Fatal(err) 83 | } 84 | 85 | break 86 | } 87 | } 88 | 89 | return event 90 | } 91 | 92 | func grabFirstThreadPoolFromStream(t *testing.T, url string) streamThreadPoolMetric { 93 | var event streamThreadPoolMetric 94 | 95 | metrics, done := streamMetrics(t, url) 96 | for m := range metrics { 97 | if strings.Contains(m, "HystrixThreadPool") { 98 | done <- true 99 | close(done) 100 | 101 | err := json.Unmarshal([]byte(m), &event) 102 | if err != nil { 103 | t.Fatal(err) 104 | } 105 | break 106 | } 107 | } 108 | 109 | return event 110 | } 111 | 112 | func streamMetrics(t *testing.T, url string) (chan string, chan bool) { 113 | metrics := make(chan string, 1) 114 | done := make(chan bool, 1) 115 | 116 | go func() { 117 | res, err := http.Get(url) 118 | if err != nil { 119 | t.Fatal(err) 120 | } 121 | defer res.Body.Close() 122 | 123 | buf := []byte{0} 124 | data := "" 125 | for { 126 | _, err := res.Body.Read(buf) 127 | if err != nil { 128 | t.Fatal(err) 129 | } 130 | 131 | data += string(buf) 132 | if strings.Contains(data, "\n\n") { 133 | data = strings.Replace(data, "data:{", "{", 1) 134 | metrics <- data 135 | data = "" 136 | } 137 | 138 | select { 139 | case _ = <-done: 140 | close(metrics) 141 | return 142 | default: 143 | } 144 | } 145 | }() 146 | 147 | return metrics, done 148 | } 149 | 150 | func TestEventStream(t *testing.T) { 151 | Convey("given a running event stream", t, func() { 152 | server := startTestServer() 153 | defer server.stopTestServer() 154 | 155 | Convey("after 2 successful commands", func() { 156 | sleepingCommand(t, "eventstream", 1*time.Millisecond) 157 | sleepingCommand(t, "eventstream", 1*time.Millisecond) 158 | 159 | Convey("request count should be 2", func() { 160 | event := grabFirstCommandFromStream(t, server.URL) 161 | 162 | So(event.Name, ShouldEqual, "eventstream") 163 | So(int(event.RequestCount), ShouldEqual, 2) 164 | }) 165 | }) 166 | 167 | Convey("after 1 successful command and 2 unsuccessful commands", func() { 168 | sleepingCommand(t, "errorpercent", 1*time.Millisecond) 169 | failingCommand(t, "errorpercent", 1*time.Millisecond) 170 | failingCommand(t, "errorpercent", 1*time.Millisecond) 171 | 172 | Convey("the error precentage should be 67", func() { 173 | metric := grabFirstCommandFromStream(t, server.URL) 174 | 175 | So(metric.ErrorPct, ShouldEqual, 67) 176 | }) 177 | }) 178 | }) 179 | } 180 | 181 | func TestClientCancelEventStream(t *testing.T) { 182 | Convey("given a running event stream", t, func() { 183 | server := startTestServer() 184 | defer server.stopTestServer() 185 | 186 | sleepingCommand(t, "eventstream", 1*time.Millisecond) 187 | 188 | Convey("after a client connects", func() { 189 | req, err := http.NewRequest("GET", server.URL, nil) 190 | if err != nil { 191 | t.Fatal(err) 192 | } 193 | // use a transport so we can cancel the stream when we're done - in 1.5 this is much easier 194 | tr := &http.Transport{} 195 | client := &http.Client{Transport: tr} 196 | wait := make(chan struct{}) 197 | afterFirstRead := &sync.WaitGroup{} 198 | afterFirstRead.Add(1) 199 | 200 | go func() { 201 | afr := afterFirstRead 202 | buf := []byte{0} 203 | res, err := client.Do(req) 204 | if err != nil { 205 | t.Fatal(err) 206 | } 207 | defer res.Body.Close() 208 | 209 | for { 210 | select { 211 | case <-wait: 212 | //wait for master goroutine to break us out 213 | tr.CancelRequest(req) 214 | return 215 | default: 216 | //read something 217 | _, err = res.Body.Read(buf) 218 | if err != nil { 219 | t.Fatal(err) 220 | } 221 | if afr != nil { 222 | afr.Done() 223 | afr = nil 224 | } 225 | } 226 | } 227 | }() 228 | // need to make sure our request has round-tripped to the server 229 | afterFirstRead.Wait() 230 | 231 | Convey("it should be registered", func() { 232 | server.StreamHandler.mu.RLock() 233 | So(len(server.StreamHandler.requests), ShouldEqual, 1) 234 | server.StreamHandler.mu.RUnlock() 235 | Convey("after client disconnects", func() { 236 | // let the request be cancelled and the body closed 237 | close(wait) 238 | // wait for the server to clean up 239 | time.Sleep(2000 * time.Millisecond) 240 | Convey("it should be detected as disconnected and de-registered", func() { 241 | //confirm we have 0 clients 242 | server.StreamHandler.mu.RLock() 243 | So(len(server.StreamHandler.requests), ShouldEqual, 0) 244 | server.StreamHandler.mu.RUnlock() 245 | }) 246 | }) 247 | }) 248 | }) 249 | }) 250 | } 251 | 252 | func TestThreadPoolStream(t *testing.T) { 253 | Convey("given a running event stream", t, func() { 254 | server := startTestServer() 255 | defer server.stopTestServer() 256 | 257 | Convey("after a successful command", func() { 258 | sleepingCommand(t, "threadpool", 1*time.Millisecond) 259 | metric := grabFirstThreadPoolFromStream(t, server.URL) 260 | 261 | Convey("the rolling count of executions should increment", func() { 262 | So(metric.RollingCountThreadsExecuted, ShouldEqual, 1) 263 | }) 264 | 265 | Convey("the pool size should be 10", func() { 266 | So(metric.CurrentPoolSize, ShouldEqual, 10) 267 | }) 268 | }) 269 | }) 270 | } 271 | -------------------------------------------------------------------------------- /hystrix/hystrix.go: -------------------------------------------------------------------------------- 1 | package hystrix 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "sync" 7 | "time" 8 | ) 9 | 10 | type runFunc func() error 11 | type fallbackFunc func(error) error 12 | type runFuncC func(context.Context) error 13 | type fallbackFuncC func(context.Context, error) error 14 | 15 | // A CircuitError is an error which models various failure states of execution, 16 | // such as the circuit being open or a timeout. 17 | type CircuitError struct { 18 | Message string 19 | } 20 | 21 | func (e CircuitError) Error() string { 22 | return "hystrix: " + e.Message 23 | } 24 | 25 | // command models the state used for a single execution on a circuit. "hystrix command" is commonly 26 | // used to describe the pairing of your run/fallback functions with a circuit. 27 | type command struct { 28 | sync.Mutex 29 | 30 | ticket *struct{} 31 | start time.Time 32 | errChan chan error 33 | finished chan bool 34 | circuit *CircuitBreaker 35 | run runFuncC 36 | fallback fallbackFuncC 37 | runDuration time.Duration 38 | events []string 39 | } 40 | 41 | var ( 42 | // ErrMaxConcurrency occurs when too many of the same named command are executed at the same time. 43 | ErrMaxConcurrency = CircuitError{Message: "max concurrency"} 44 | // ErrCircuitOpen returns when an execution attempt "short circuits". This happens due to the circuit being measured as unhealthy. 45 | ErrCircuitOpen = CircuitError{Message: "circuit open"} 46 | // ErrTimeout occurs when the provided function takes too long to execute. 47 | ErrTimeout = CircuitError{Message: "timeout"} 48 | ) 49 | 50 | // Go runs your function while tracking the health of previous calls to it. 51 | // If your function begins slowing down or failing repeatedly, we will block 52 | // new calls to it for you to give the dependent service time to repair. 53 | // 54 | // Define a fallback function if you want to define some code to execute during outages. 55 | func Go(name string, run runFunc, fallback fallbackFunc) chan error { 56 | runC := func(ctx context.Context) error { 57 | return run() 58 | } 59 | var fallbackC fallbackFuncC 60 | if fallback != nil { 61 | fallbackC = func(ctx context.Context, err error) error { 62 | return fallback(err) 63 | } 64 | } 65 | return GoC(context.Background(), name, runC, fallbackC) 66 | } 67 | 68 | // GoC runs your function while tracking the health of previous calls to it. 69 | // If your function begins slowing down or failing repeatedly, we will block 70 | // new calls to it for you to give the dependent service time to repair. 71 | // 72 | // Define a fallback function if you want to define some code to execute during outages. 73 | func GoC(ctx context.Context, name string, run runFuncC, fallback fallbackFuncC) chan error { 74 | cmd := &command{ 75 | run: run, 76 | fallback: fallback, 77 | start: time.Now(), 78 | errChan: make(chan error, 1), 79 | finished: make(chan bool, 1), 80 | } 81 | 82 | // dont have methods with explicit params and returns 83 | // let data come in and out naturally, like with any closure 84 | // explicit error return to give place for us to kill switch the operation (fallback) 85 | 86 | circuit, _, err := GetCircuit(name) 87 | if err != nil { 88 | cmd.errChan <- err 89 | return cmd.errChan 90 | } 91 | cmd.circuit = circuit 92 | ticketCond := sync.NewCond(cmd) 93 | ticketChecked := false 94 | // When the caller extracts error from returned errChan, it's assumed that 95 | // the ticket's been returned to executorPool. Therefore, returnTicket() can 96 | // not run after cmd.errorWithFallback(). 97 | returnTicket := func() { 98 | cmd.Lock() 99 | // Avoid releasing before a ticket is acquired. 100 | for !ticketChecked { 101 | ticketCond.Wait() 102 | } 103 | cmd.circuit.executorPool.Return(cmd.ticket) 104 | cmd.Unlock() 105 | } 106 | // Shared by the following two goroutines. It ensures only the faster 107 | // goroutine runs errWithFallback() and reportAllEvent(). 108 | returnOnce := &sync.Once{} 109 | reportAllEvent := func() { 110 | err := cmd.circuit.ReportEvent(cmd.events, cmd.start, cmd.runDuration) 111 | if err != nil { 112 | log.Printf(err.Error()) 113 | } 114 | } 115 | 116 | go func() { 117 | defer func() { cmd.finished <- true }() 118 | 119 | // Circuits get opened when recent executions have shown to have a high error rate. 120 | // Rejecting new executions allows backends to recover, and the circuit will allow 121 | // new traffic when it feels a healthly state has returned. 122 | if !cmd.circuit.AllowRequest() { 123 | cmd.Lock() 124 | // It's safe for another goroutine to go ahead releasing a nil ticket. 125 | ticketChecked = true 126 | ticketCond.Signal() 127 | cmd.Unlock() 128 | returnOnce.Do(func() { 129 | returnTicket() 130 | cmd.errorWithFallback(ctx, ErrCircuitOpen) 131 | reportAllEvent() 132 | }) 133 | return 134 | } 135 | 136 | // As backends falter, requests take longer but don't always fail. 137 | // 138 | // When requests slow down but the incoming rate of requests stays the same, you have to 139 | // run more at a time to keep up. By controlling concurrency during these situations, you can 140 | // shed load which accumulates due to the increasing ratio of active commands to incoming requests. 141 | cmd.Lock() 142 | select { 143 | case cmd.ticket = <-circuit.executorPool.Tickets: 144 | ticketChecked = true 145 | ticketCond.Signal() 146 | cmd.Unlock() 147 | default: 148 | ticketChecked = true 149 | ticketCond.Signal() 150 | cmd.Unlock() 151 | returnOnce.Do(func() { 152 | returnTicket() 153 | cmd.errorWithFallback(ctx, ErrMaxConcurrency) 154 | reportAllEvent() 155 | }) 156 | return 157 | } 158 | 159 | runStart := time.Now() 160 | runErr := run(ctx) 161 | returnOnce.Do(func() { 162 | defer reportAllEvent() 163 | cmd.runDuration = time.Since(runStart) 164 | returnTicket() 165 | if runErr != nil { 166 | cmd.errorWithFallback(ctx, runErr) 167 | return 168 | } 169 | cmd.reportEvent("success") 170 | }) 171 | }() 172 | 173 | go func() { 174 | timer := time.NewTimer(getSettings(name).Timeout) 175 | defer timer.Stop() 176 | 177 | select { 178 | case <-cmd.finished: 179 | // returnOnce has been executed in another goroutine 180 | case <-ctx.Done(): 181 | returnOnce.Do(func() { 182 | returnTicket() 183 | cmd.errorWithFallback(ctx, ctx.Err()) 184 | reportAllEvent() 185 | }) 186 | return 187 | case <-timer.C: 188 | returnOnce.Do(func() { 189 | returnTicket() 190 | cmd.errorWithFallback(ctx, ErrTimeout) 191 | reportAllEvent() 192 | }) 193 | return 194 | } 195 | }() 196 | 197 | return cmd.errChan 198 | } 199 | 200 | // Do runs your function in a synchronous manner, blocking until either your function succeeds 201 | // or an error is returned, including hystrix circuit errors 202 | func Do(name string, run runFunc, fallback fallbackFunc) error { 203 | runC := func(ctx context.Context) error { 204 | return run() 205 | } 206 | var fallbackC fallbackFuncC 207 | if fallback != nil { 208 | fallbackC = func(ctx context.Context, err error) error { 209 | return fallback(err) 210 | } 211 | } 212 | return DoC(context.Background(), name, runC, fallbackC) 213 | } 214 | 215 | // DoC runs your function in a synchronous manner, blocking until either your function succeeds 216 | // or an error is returned, including hystrix circuit errors 217 | func DoC(ctx context.Context, name string, run runFuncC, fallback fallbackFuncC) error { 218 | done := make(chan struct{}, 1) 219 | 220 | r := func(ctx context.Context) error { 221 | err := run(ctx) 222 | if err != nil { 223 | return err 224 | } 225 | 226 | done <- struct{}{} 227 | return nil 228 | } 229 | 230 | f := func(ctx context.Context, e error) error { 231 | err := fallback(ctx, e) 232 | if err != nil { 233 | return err 234 | } 235 | 236 | done <- struct{}{} 237 | return nil 238 | } 239 | 240 | var errChan chan error 241 | if fallback == nil { 242 | errChan = GoC(ctx, name, r, nil) 243 | } else { 244 | errChan = GoC(ctx, name, r, f) 245 | } 246 | 247 | select { 248 | case <-done: 249 | return nil 250 | case err := <-errChan: 251 | return err 252 | } 253 | } 254 | 255 | func (c *command) reportEvent(eventType string) { 256 | c.Lock() 257 | defer c.Unlock() 258 | 259 | c.events = append(c.events, eventType) 260 | } 261 | 262 | // errorWithFallback triggers the fallback while reporting the appropriate metric events. 263 | func (c *command) errorWithFallback(ctx context.Context, err error) { 264 | eventType := "failure" 265 | if err == ErrCircuitOpen { 266 | eventType = "short-circuit" 267 | } else if err == ErrMaxConcurrency { 268 | eventType = "rejected" 269 | } else if err == ErrTimeout { 270 | eventType = "timeout" 271 | } else if err == context.Canceled { 272 | eventType = "context_canceled" 273 | } else if err == context.DeadlineExceeded { 274 | eventType = "context_deadline_exceeded" 275 | } 276 | 277 | c.reportEvent(eventType) 278 | fallbackErr := c.tryFallback(ctx, err) 279 | if fallbackErr != nil { 280 | c.errChan <- fallbackErr 281 | } 282 | } 283 | 284 | func (c *command) tryFallback(ctx context.Context, err error) error { 285 | if c.fallback == nil { 286 | // If we don't have a fallback return the original error. 287 | return err 288 | } 289 | 290 | fallbackErr := c.fallback(ctx, err) 291 | if fallbackErr != nil { 292 | c.reportEvent("fallback-failure") 293 | return fmt.Errorf("fallback failed with '%v'. run error was '%v'", fallbackErr, err) 294 | } 295 | 296 | c.reportEvent("fallback-success") 297 | 298 | return nil 299 | } 300 | -------------------------------------------------------------------------------- /hystrix/hystrix_test.go: -------------------------------------------------------------------------------- 1 | package hystrix 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "testing" 7 | "time" 8 | 9 | "testing/quick" 10 | 11 | . "github.com/smartystreets/goconvey/convey" 12 | ) 13 | 14 | func TestSuccess(t *testing.T) { 15 | Convey("with a command which sends to a channel", t, func() { 16 | defer Flush() 17 | 18 | resultChan := make(chan int) 19 | errChan := GoC(context.Background(), "", func(ctx context.Context) error { 20 | resultChan <- 1 21 | return nil 22 | }, nil) 23 | 24 | Convey("reading from that channel should provide the expected value", func() { 25 | So(<-resultChan, ShouldEqual, 1) 26 | 27 | Convey("no errors should be returned", func() { 28 | So(len(errChan), ShouldEqual, 0) 29 | }) 30 | Convey("metrics are recorded", func() { 31 | time.Sleep(10 * time.Millisecond) 32 | cb, _, _ := GetCircuit("") 33 | So(cb.metrics.DefaultCollector().Successes().Sum(time.Now()), ShouldEqual, 1) 34 | }) 35 | }) 36 | }) 37 | } 38 | 39 | func TestFallback(t *testing.T) { 40 | Convey("with a command which fails, and whose fallback sends to a channel", t, func() { 41 | defer Flush() 42 | 43 | resultChan := make(chan int) 44 | errChan := GoC(context.Background(), "", func(ctx context.Context) error { 45 | return fmt.Errorf("error") 46 | }, func(ctx context.Context, err error) error { 47 | if err.Error() == "error" { 48 | resultChan <- 1 49 | } 50 | return nil 51 | }) 52 | 53 | Convey("reading from that channel should provide the expected value", func() { 54 | So(<-resultChan, ShouldEqual, 1) 55 | 56 | Convey("no errors should be returned", func() { 57 | So(len(errChan), ShouldEqual, 0) 58 | }) 59 | Convey("metrics are recorded", func() { 60 | time.Sleep(10 * time.Millisecond) 61 | cb, _, _ := GetCircuit("") 62 | So(cb.metrics.DefaultCollector().Successes().Sum(time.Now()), ShouldEqual, 0) 63 | So(cb.metrics.DefaultCollector().Failures().Sum(time.Now()), ShouldEqual, 1) 64 | So(cb.metrics.DefaultCollector().FallbackSuccesses().Sum(time.Now()), ShouldEqual, 1) 65 | }) 66 | }) 67 | }) 68 | } 69 | 70 | func TestTimeout(t *testing.T) { 71 | Convey("with a command which times out, and whose fallback sends to a channel", t, func() { 72 | defer Flush() 73 | ConfigureCommand("", CommandConfig{Timeout: 100}) 74 | 75 | resultChan := make(chan int) 76 | errChan := GoC(context.Background(), "", func(ctx context.Context) error { 77 | time.Sleep(1 * time.Second) 78 | resultChan <- 1 79 | return nil 80 | }, func(ctx context.Context, err error) error { 81 | if err == ErrTimeout { 82 | resultChan <- 2 83 | } 84 | return nil 85 | }) 86 | 87 | Convey("reading from that channel should provide the expected value", func() { 88 | So(<-resultChan, ShouldEqual, 2) 89 | }) 90 | Convey("no errors should be returned", func() { 91 | So(len(errChan), ShouldEqual, 0) 92 | }) 93 | }) 94 | } 95 | 96 | func TestTimeoutEmptyFallback(t *testing.T) { 97 | Convey("with a command which times out, and has no fallback", t, func() { 98 | defer Flush() 99 | ConfigureCommand("", CommandConfig{Timeout: 100}) 100 | 101 | resultChan := make(chan int) 102 | errChan := GoC(context.Background(), "", func(ctx context.Context) error { 103 | time.Sleep(1 * time.Second) 104 | resultChan <- 1 105 | return nil 106 | }, nil) 107 | 108 | Convey("a timeout error should be returned", func() { 109 | So(<-errChan, ShouldResemble, ErrTimeout) 110 | 111 | Convey("metrics are recorded", func() { 112 | time.Sleep(10 * time.Millisecond) 113 | cb, _, _ := GetCircuit("") 114 | So(cb.metrics.DefaultCollector().Successes().Sum(time.Now()), ShouldEqual, 0) 115 | So(cb.metrics.DefaultCollector().Timeouts().Sum(time.Now()), ShouldEqual, 1) 116 | So(cb.metrics.DefaultCollector().FallbackSuccesses().Sum(time.Now()), ShouldEqual, 0) 117 | So(cb.metrics.DefaultCollector().FallbackFailures().Sum(time.Now()), ShouldEqual, 0) 118 | }) 119 | }) 120 | }) 121 | } 122 | 123 | func TestMaxConcurrent(t *testing.T) { 124 | Convey("if a command has max concurrency set to 2", t, func() { 125 | defer Flush() 126 | ConfigureCommand("", CommandConfig{MaxConcurrentRequests: 2}) 127 | resultChan := make(chan int) 128 | 129 | run := func(ctx context.Context) error { 130 | time.Sleep(1 * time.Second) 131 | resultChan <- 1 132 | return nil 133 | } 134 | 135 | Convey("and 3 of those commands try to execute at the same time", func() { 136 | var good, bad int 137 | 138 | for i := 0; i < 3; i++ { 139 | errChan := GoC(context.Background(), "", run, nil) 140 | time.Sleep(10 * time.Millisecond) 141 | 142 | select { 143 | case err := <-errChan: 144 | if err == ErrMaxConcurrency { 145 | bad++ 146 | } 147 | default: 148 | good++ 149 | } 150 | } 151 | 152 | Convey("one will return a 'max concurrency' error", func() { 153 | So(bad, ShouldEqual, 1) 154 | So(good, ShouldEqual, 2) 155 | }) 156 | }) 157 | }) 158 | } 159 | 160 | func TestForceOpenCircuit(t *testing.T) { 161 | Convey("when a command with a forced open circuit is run", t, func() { 162 | defer Flush() 163 | 164 | cb, _, err := GetCircuit("") 165 | So(err, ShouldEqual, nil) 166 | 167 | cb.toggleForceOpen(true) 168 | 169 | errChan := GoC(context.Background(), "", func(ctx context.Context) error { 170 | return nil 171 | }, nil) 172 | 173 | Convey("a 'circuit open' error is returned", func() { 174 | So(<-errChan, ShouldResemble, ErrCircuitOpen) 175 | 176 | Convey("metrics are recorded", func() { 177 | time.Sleep(10 * time.Millisecond) 178 | cb, _, _ := GetCircuit("") 179 | So(cb.metrics.DefaultCollector().Successes().Sum(time.Now()), ShouldEqual, 0) 180 | So(cb.metrics.DefaultCollector().ShortCircuits().Sum(time.Now()), ShouldEqual, 1) 181 | }) 182 | }) 183 | }) 184 | } 185 | 186 | func TestNilFallbackRunError(t *testing.T) { 187 | Convey("when your run function returns an error and you have no fallback", t, func() { 188 | defer Flush() 189 | errChan := GoC(context.Background(), "", func(ctx context.Context) error { 190 | return fmt.Errorf("run_error") 191 | }, nil) 192 | 193 | Convey("the returned error should be the run error", func() { 194 | err := <-errChan 195 | 196 | So(err.Error(), ShouldEqual, "run_error") 197 | }) 198 | }) 199 | } 200 | 201 | func TestFailedFallback(t *testing.T) { 202 | Convey("when your run and fallback functions return an error", t, func() { 203 | defer Flush() 204 | errChan := GoC(context.Background(), "", func(ctx context.Context) error { 205 | return fmt.Errorf("run_error") 206 | }, func(ctx context.Context, err error) error { 207 | return fmt.Errorf("fallback_error") 208 | }) 209 | 210 | Convey("the returned error should contain both", func() { 211 | err := <-errChan 212 | 213 | So(err.Error(), ShouldEqual, "fallback failed with 'fallback_error'. run error was 'run_error'") 214 | }) 215 | }) 216 | } 217 | 218 | func TestCloseCircuitAfterSuccess(t *testing.T) { 219 | Convey("when a circuit is open", t, func() { 220 | defer Flush() 221 | cb, _, err := GetCircuit("") 222 | So(err, ShouldEqual, nil) 223 | 224 | cb.setOpen() 225 | 226 | Convey("commands immediately following should short-circuit", func() { 227 | errChan := GoC(context.Background(), "", func(ctx context.Context) error { 228 | return nil 229 | }, nil) 230 | 231 | So(<-errChan, ShouldResemble, ErrCircuitOpen) 232 | }) 233 | 234 | Convey("and a successful command is run after the sleep window", func() { 235 | time.Sleep(6 * time.Second) 236 | 237 | done := make(chan bool, 1) 238 | GoC(context.Background(), "", func(ctx context.Context) error { 239 | done <- true 240 | return nil 241 | }, nil) 242 | 243 | Convey("the circuit should be closed", func() { 244 | So(<-done, ShouldEqual, true) 245 | time.Sleep(10 * time.Millisecond) 246 | So(cb.IsOpen(), ShouldEqual, false) 247 | }) 248 | }) 249 | }) 250 | } 251 | 252 | func TestFailAfterTimeout(t *testing.T) { 253 | Convey("when a slow command fails after the timeout fires", t, func() { 254 | defer Flush() 255 | ConfigureCommand("", CommandConfig{Timeout: 10}) 256 | 257 | out := make(chan struct{}, 2) 258 | errChan := GoC(context.Background(), "", func(ctx context.Context) error { 259 | time.Sleep(50 * time.Millisecond) 260 | return fmt.Errorf("foo") 261 | }, func(ctx context.Context, err error) error { 262 | out <- struct{}{} 263 | return err 264 | }) 265 | 266 | Convey("we do not panic", func() { 267 | So((<-errChan).Error(), ShouldContainSubstring, "timeout") 268 | // wait for command to fail, should not panic 269 | time.Sleep(100 * time.Millisecond) 270 | }) 271 | 272 | Convey("we do not call the fallback twice", func() { 273 | time.Sleep(100 * time.Millisecond) 274 | So(len(out), ShouldEqual, 1) 275 | }) 276 | }) 277 | } 278 | 279 | func TestSlowFallbackOpenCircuit(t *testing.T) { 280 | Convey("with an open circuit and a slow fallback", t, func() { 281 | defer Flush() 282 | 283 | ConfigureCommand("", CommandConfig{Timeout: 10}) 284 | 285 | cb, _, err := GetCircuit("") 286 | So(err, ShouldEqual, nil) 287 | cb.setOpen() 288 | 289 | out := make(chan struct{}, 2) 290 | 291 | Convey("when the command short circuits", func() { 292 | GoC(context.Background(), "", func(ctx context.Context) error { 293 | return nil 294 | }, func(ctx context.Context, err error) error { 295 | time.Sleep(100 * time.Millisecond) 296 | out <- struct{}{} 297 | return nil 298 | }) 299 | 300 | Convey("the fallback only fires for the short-circuit, not both", func() { 301 | time.Sleep(250 * time.Millisecond) 302 | So(len(out), ShouldEqual, 1) 303 | 304 | Convey("and a timeout event is not recorded", func() { 305 | So(cb.metrics.DefaultCollector().ShortCircuits().Sum(time.Now()), ShouldEqual, 1) 306 | So(cb.metrics.DefaultCollector().Timeouts().Sum(time.Now()), ShouldEqual, 0) 307 | }) 308 | }) 309 | }) 310 | }) 311 | } 312 | 313 | func TestFallbackAfterRejected(t *testing.T) { 314 | Convey("with a circuit whose pool is full", t, func() { 315 | defer Flush() 316 | ConfigureCommand("", CommandConfig{MaxConcurrentRequests: 1}) 317 | cb, _, err := GetCircuit("") 318 | if err != nil { 319 | t.Fatal(err) 320 | } 321 | <-cb.executorPool.Tickets 322 | 323 | Convey("executing a successful fallback function due to rejection", func() { 324 | runChan := make(chan bool, 1) 325 | fallbackChan := make(chan bool, 1) 326 | GoC(context.Background(), "", func(ctx context.Context) error { 327 | // if run executes after fallback, this will panic due to sending to a closed channel 328 | runChan <- true 329 | close(fallbackChan) 330 | return nil 331 | }, func(ctx context.Context, err error) error { 332 | fallbackChan <- true 333 | close(runChan) 334 | return nil 335 | }) 336 | 337 | Convey("should not execute the run function", func() { 338 | So(<-fallbackChan, ShouldBeTrue) 339 | So(<-runChan, ShouldBeFalse) 340 | }) 341 | }) 342 | }) 343 | } 344 | 345 | func TestReturnTicket_QuickCheck(t *testing.T) { 346 | compareTicket := func() bool { 347 | defer Flush() 348 | ConfigureCommand("", CommandConfig{Timeout: 2}) 349 | errChan := GoC(context.Background(), "", func(ctx context.Context) error { 350 | c := make(chan struct{}) 351 | <-c // should block 352 | return nil 353 | }, nil) 354 | err := <-errChan 355 | So(err, ShouldResemble, ErrTimeout) 356 | cb, _, err := GetCircuit("") 357 | So(err, ShouldBeNil) 358 | return cb.executorPool.ActiveCount() == 0 359 | } 360 | 361 | Convey("with a run command that doesn't return", t, func() { 362 | Convey("checking many times that after GoC(context.Background(), ), the ticket returns to the pool after the timeout", func() { 363 | err := quick.Check(compareTicket, nil) 364 | So(err, ShouldBeNil) 365 | }) 366 | }) 367 | } 368 | 369 | func TestReturnTicket(t *testing.T) { 370 | Convey("with a run command that doesn't return", t, func() { 371 | defer Flush() 372 | 373 | ConfigureCommand("", CommandConfig{Timeout: 10}) 374 | 375 | errChan := GoC(context.Background(), "", func(ctx context.Context) error { 376 | c := make(chan struct{}) 377 | <-c // should block 378 | return nil 379 | }, nil) 380 | 381 | Convey("after GoC(context.Background(), ), the ticket returns to the pool after the timeout", func() { 382 | err := <-errChan 383 | So(err, ShouldResemble, ErrTimeout) 384 | 385 | cb, _, err := GetCircuit("") 386 | So(err, ShouldBeNil) 387 | So(cb.executorPool.ActiveCount(), ShouldEqual, 0) 388 | }) 389 | }) 390 | } 391 | 392 | func TestContextHandling(t *testing.T) { 393 | Convey("with a run command which times out", t, func() { 394 | defer Flush() 395 | 396 | ConfigureCommand("", CommandConfig{Timeout: 15}) 397 | cb, _, err := GetCircuit("") 398 | if err != nil { 399 | t.Fatal(err) 400 | } 401 | 402 | out := make(chan int, 1) 403 | run := func(ctx context.Context) error { 404 | time.Sleep(20 * time.Millisecond) 405 | out <- 1 406 | return nil 407 | } 408 | 409 | fallback := func(ctx context.Context, e error) error { 410 | return nil 411 | } 412 | 413 | Convey("with a valid context", func() { 414 | errChan := GoC(context.Background(), "", run, nil) 415 | time.Sleep(25 * time.Millisecond) 416 | So((<-errChan).Error(), ShouldEqual, ErrTimeout.Error()) 417 | So(cb.metrics.DefaultCollector().NumRequests().Sum(time.Now()), ShouldEqual, 1) 418 | So(cb.metrics.DefaultCollector().Failures().Sum(time.Now()), ShouldEqual, 0) 419 | So(cb.metrics.DefaultCollector().Timeouts().Sum(time.Now()), ShouldEqual, 1) 420 | So(cb.metrics.DefaultCollector().ContextCanceled().Sum(time.Now()), ShouldEqual, 0) 421 | So(cb.metrics.DefaultCollector().ContextDeadlineExceeded().Sum(time.Now()), ShouldEqual, 0) 422 | }) 423 | 424 | Convey("with a valid context and a fallback", func() { 425 | errChan := GoC(context.Background(), "", run, fallback) 426 | time.Sleep(25 * time.Millisecond) 427 | So(len(errChan), ShouldEqual, 0) 428 | So(cb.metrics.DefaultCollector().NumRequests().Sum(time.Now()), ShouldEqual, 1) 429 | So(cb.metrics.DefaultCollector().Failures().Sum(time.Now()), ShouldEqual, 0) 430 | So(cb.metrics.DefaultCollector().Timeouts().Sum(time.Now()), ShouldEqual, 1) 431 | So(cb.metrics.DefaultCollector().ContextCanceled().Sum(time.Now()), ShouldEqual, 0) 432 | So(cb.metrics.DefaultCollector().ContextDeadlineExceeded().Sum(time.Now()), ShouldEqual, 0) 433 | So(cb.metrics.DefaultCollector().FallbackSuccesses().Sum(time.Now()), ShouldEqual, 1) 434 | }) 435 | 436 | Convey("with a context timeout", func() { 437 | testCtx, cancel := context.WithTimeout(context.Background(), 5*time.Millisecond) 438 | errChan := GoC(testCtx, "", run, nil) 439 | time.Sleep(25 * time.Millisecond) 440 | So((<-errChan).Error(), ShouldEqual, context.DeadlineExceeded.Error()) 441 | So(cb.metrics.DefaultCollector().NumRequests().Sum(time.Now()), ShouldEqual, 1) 442 | So(cb.metrics.DefaultCollector().Failures().Sum(time.Now()), ShouldEqual, 0) 443 | So(cb.metrics.DefaultCollector().Timeouts().Sum(time.Now()), ShouldEqual, 0) 444 | So(cb.metrics.DefaultCollector().ContextCanceled().Sum(time.Now()), ShouldEqual, 0) 445 | So(cb.metrics.DefaultCollector().ContextDeadlineExceeded().Sum(time.Now()), ShouldEqual, 1) 446 | cancel() 447 | }) 448 | 449 | Convey("with a context timeout and a fallback", func() { 450 | testCtx, cancel := context.WithTimeout(context.Background(), 5*time.Millisecond) 451 | errChan := GoC(testCtx, "", run, fallback) 452 | time.Sleep(25 * time.Millisecond) 453 | So(len(errChan), ShouldEqual, 0) 454 | So(cb.metrics.DefaultCollector().NumRequests().Sum(time.Now()), ShouldEqual, 1) 455 | So(cb.metrics.DefaultCollector().Failures().Sum(time.Now()), ShouldEqual, 0) 456 | So(cb.metrics.DefaultCollector().Timeouts().Sum(time.Now()), ShouldEqual, 0) 457 | So(cb.metrics.DefaultCollector().ContextCanceled().Sum(time.Now()), ShouldEqual, 0) 458 | So(cb.metrics.DefaultCollector().ContextDeadlineExceeded().Sum(time.Now()), ShouldEqual, 1) 459 | So(cb.metrics.DefaultCollector().FallbackSuccesses().Sum(time.Now()), ShouldEqual, 1) 460 | cancel() 461 | }) 462 | 463 | Convey("with a canceled context", func() { 464 | testCtx, cancel := context.WithCancel(context.Background()) 465 | errChan := GoC(testCtx, "", run, nil) 466 | time.Sleep(5 * time.Millisecond) 467 | cancel() 468 | time.Sleep(20 * time.Millisecond) 469 | So((<-errChan).Error(), ShouldEqual, context.Canceled.Error()) 470 | So(cb.metrics.DefaultCollector().NumRequests().Sum(time.Now()), ShouldEqual, 1) 471 | So(cb.metrics.DefaultCollector().Failures().Sum(time.Now()), ShouldEqual, 0) 472 | So(cb.metrics.DefaultCollector().Timeouts().Sum(time.Now()), ShouldEqual, 0) 473 | So(cb.metrics.DefaultCollector().ContextCanceled().Sum(time.Now()), ShouldEqual, 1) 474 | So(cb.metrics.DefaultCollector().ContextDeadlineExceeded().Sum(time.Now()), ShouldEqual, 0) 475 | }) 476 | 477 | Convey("with a canceled context and a fallback", func() { 478 | testCtx, cancel := context.WithCancel(context.Background()) 479 | errChan := GoC(testCtx, "", run, fallback) 480 | time.Sleep(5 * time.Millisecond) 481 | cancel() 482 | time.Sleep(20 * time.Millisecond) 483 | So(len(errChan), ShouldEqual, 0) 484 | So(cb.metrics.DefaultCollector().NumRequests().Sum(time.Now()), ShouldEqual, 1) 485 | So(cb.metrics.DefaultCollector().Failures().Sum(time.Now()), ShouldEqual, 0) 486 | So(cb.metrics.DefaultCollector().Timeouts().Sum(time.Now()), ShouldEqual, 0) 487 | So(cb.metrics.DefaultCollector().ContextCanceled().Sum(time.Now()), ShouldEqual, 1) 488 | So(cb.metrics.DefaultCollector().ContextDeadlineExceeded().Sum(time.Now()), ShouldEqual, 0) 489 | So(cb.metrics.DefaultCollector().FallbackSuccesses().Sum(time.Now()), ShouldEqual, 1) 490 | }) 491 | 492 | }) 493 | } 494 | 495 | func TestDoC(t *testing.T) { 496 | Convey("with a command which succeeds", t, func() { 497 | defer Flush() 498 | 499 | out := make(chan bool, 1) 500 | run := func(ctx context.Context) error { 501 | out <- true 502 | return nil 503 | } 504 | 505 | Convey("the run function is executed", func() { 506 | err := DoC(context.Background(), "", run, nil) 507 | So(err, ShouldBeNil) 508 | So(<-out, ShouldEqual, true) 509 | }) 510 | }) 511 | 512 | Convey("with a command which fails", t, func() { 513 | defer Flush() 514 | 515 | run := func(ctx context.Context) error { 516 | return fmt.Errorf("i failed") 517 | } 518 | 519 | Convey("with no fallback", func() { 520 | err := DoC(context.Background(), "", run, nil) 521 | Convey("the error is returned", func() { 522 | So(err.Error(), ShouldEqual, "i failed") 523 | }) 524 | }) 525 | 526 | Convey("with a succeeding fallback", func() { 527 | out := make(chan bool, 1) 528 | fallback := func(ctx context.Context, err error) error { 529 | out <- true 530 | return nil 531 | } 532 | 533 | err := DoC(context.Background(), "", run, fallback) 534 | 535 | Convey("the fallback is executed", func() { 536 | So(err, ShouldBeNil) 537 | So(<-out, ShouldEqual, true) 538 | }) 539 | }) 540 | 541 | Convey("with a failing fallback", func() { 542 | fallback := func(ctx context.Context, err error) error { 543 | return fmt.Errorf("fallback failed") 544 | } 545 | 546 | err := DoC(context.Background(), "", run, fallback) 547 | 548 | Convey("both errors are returned", func() { 549 | So(err.Error(), ShouldEqual, "fallback failed with 'fallback failed'. run error was 'i failed'") 550 | }) 551 | }) 552 | }) 553 | 554 | Convey("with a command which times out", t, func() { 555 | defer Flush() 556 | 557 | ConfigureCommand("", CommandConfig{Timeout: 10}) 558 | 559 | err := DoC(context.Background(), "", func(ctx context.Context) error { 560 | time.Sleep(100 * time.Millisecond) 561 | return nil 562 | }, nil) 563 | 564 | Convey("the timeout error is returned", func() { 565 | So(err.Error(), ShouldEqual, "hystrix: timeout") 566 | }) 567 | }) 568 | } 569 | -------------------------------------------------------------------------------- /hystrix/logger.go: -------------------------------------------------------------------------------- 1 | package hystrix 2 | 3 | type logger interface { 4 | Printf(format string, items ...interface{}) 5 | } 6 | 7 | // NoopLogger does not log anything. 8 | type NoopLogger struct{} 9 | 10 | // Printf does nothing. 11 | func (l NoopLogger) Printf(format string, items ...interface{}) {} 12 | -------------------------------------------------------------------------------- /hystrix/metric_collector/default_metric_collector.go: -------------------------------------------------------------------------------- 1 | package metricCollector 2 | 3 | import ( 4 | "sync" 5 | 6 | "github.com/afex/hystrix-go/hystrix/rolling" 7 | ) 8 | 9 | // DefaultMetricCollector holds information about the circuit state. 10 | // This implementation of MetricCollector is the canonical source of information about the circuit. 11 | // It is used for for all internal hystrix operations 12 | // including circuit health checks and metrics sent to the hystrix dashboard. 13 | // 14 | // Metric Collectors do not need Mutexes as they are updated by circuits within a locked context. 15 | type DefaultMetricCollector struct { 16 | mutex *sync.RWMutex 17 | 18 | numRequests *rolling.Number 19 | errors *rolling.Number 20 | 21 | successes *rolling.Number 22 | failures *rolling.Number 23 | rejects *rolling.Number 24 | shortCircuits *rolling.Number 25 | timeouts *rolling.Number 26 | contextCanceled *rolling.Number 27 | contextDeadlineExceeded *rolling.Number 28 | 29 | fallbackSuccesses *rolling.Number 30 | fallbackFailures *rolling.Number 31 | totalDuration *rolling.Timing 32 | runDuration *rolling.Timing 33 | } 34 | 35 | func newDefaultMetricCollector(name string) MetricCollector { 36 | m := &DefaultMetricCollector{} 37 | m.mutex = &sync.RWMutex{} 38 | m.Reset() 39 | return m 40 | } 41 | 42 | // NumRequests returns the rolling number of requests 43 | func (d *DefaultMetricCollector) NumRequests() *rolling.Number { 44 | d.mutex.RLock() 45 | defer d.mutex.RUnlock() 46 | return d.numRequests 47 | } 48 | 49 | // Errors returns the rolling number of errors 50 | func (d *DefaultMetricCollector) Errors() *rolling.Number { 51 | d.mutex.RLock() 52 | defer d.mutex.RUnlock() 53 | return d.errors 54 | } 55 | 56 | // Successes returns the rolling number of successes 57 | func (d *DefaultMetricCollector) Successes() *rolling.Number { 58 | d.mutex.RLock() 59 | defer d.mutex.RUnlock() 60 | return d.successes 61 | } 62 | 63 | // Failures returns the rolling number of failures 64 | func (d *DefaultMetricCollector) Failures() *rolling.Number { 65 | d.mutex.RLock() 66 | defer d.mutex.RUnlock() 67 | return d.failures 68 | } 69 | 70 | // Rejects returns the rolling number of rejects 71 | func (d *DefaultMetricCollector) Rejects() *rolling.Number { 72 | d.mutex.RLock() 73 | defer d.mutex.RUnlock() 74 | return d.rejects 75 | } 76 | 77 | // ShortCircuits returns the rolling number of short circuits 78 | func (d *DefaultMetricCollector) ShortCircuits() *rolling.Number { 79 | d.mutex.RLock() 80 | defer d.mutex.RUnlock() 81 | return d.shortCircuits 82 | } 83 | 84 | // Timeouts returns the rolling number of timeouts 85 | func (d *DefaultMetricCollector) Timeouts() *rolling.Number { 86 | d.mutex.RLock() 87 | defer d.mutex.RUnlock() 88 | return d.timeouts 89 | } 90 | 91 | // FallbackSuccesses returns the rolling number of fallback successes 92 | func (d *DefaultMetricCollector) FallbackSuccesses() *rolling.Number { 93 | d.mutex.RLock() 94 | defer d.mutex.RUnlock() 95 | return d.fallbackSuccesses 96 | } 97 | 98 | func (d *DefaultMetricCollector) ContextCanceled() *rolling.Number { 99 | d.mutex.RLock() 100 | defer d.mutex.RUnlock() 101 | return d.contextCanceled 102 | } 103 | 104 | func (d *DefaultMetricCollector) ContextDeadlineExceeded() *rolling.Number { 105 | d.mutex.RLock() 106 | defer d.mutex.RUnlock() 107 | return d.contextDeadlineExceeded 108 | } 109 | 110 | // FallbackFailures returns the rolling number of fallback failures 111 | func (d *DefaultMetricCollector) FallbackFailures() *rolling.Number { 112 | d.mutex.RLock() 113 | defer d.mutex.RUnlock() 114 | return d.fallbackFailures 115 | } 116 | 117 | // TotalDuration returns the rolling total duration 118 | func (d *DefaultMetricCollector) TotalDuration() *rolling.Timing { 119 | d.mutex.RLock() 120 | defer d.mutex.RUnlock() 121 | return d.totalDuration 122 | } 123 | 124 | // RunDuration returns the rolling run duration 125 | func (d *DefaultMetricCollector) RunDuration() *rolling.Timing { 126 | d.mutex.RLock() 127 | defer d.mutex.RUnlock() 128 | return d.runDuration 129 | } 130 | 131 | func (d *DefaultMetricCollector) Update(r MetricResult) { 132 | d.mutex.RLock() 133 | defer d.mutex.RUnlock() 134 | 135 | d.numRequests.Increment(r.Attempts) 136 | d.errors.Increment(r.Errors) 137 | d.successes.Increment(r.Successes) 138 | d.failures.Increment(r.Failures) 139 | d.rejects.Increment(r.Rejects) 140 | d.shortCircuits.Increment(r.ShortCircuits) 141 | d.timeouts.Increment(r.Timeouts) 142 | d.fallbackSuccesses.Increment(r.FallbackSuccesses) 143 | d.fallbackFailures.Increment(r.FallbackFailures) 144 | d.contextCanceled.Increment(r.ContextCanceled) 145 | d.contextDeadlineExceeded.Increment(r.ContextDeadlineExceeded) 146 | 147 | d.totalDuration.Add(r.TotalDuration) 148 | d.runDuration.Add(r.RunDuration) 149 | } 150 | 151 | // Reset resets all metrics in this collector to 0. 152 | func (d *DefaultMetricCollector) Reset() { 153 | d.mutex.Lock() 154 | defer d.mutex.Unlock() 155 | 156 | d.numRequests = rolling.NewNumber() 157 | d.errors = rolling.NewNumber() 158 | d.successes = rolling.NewNumber() 159 | d.rejects = rolling.NewNumber() 160 | d.shortCircuits = rolling.NewNumber() 161 | d.failures = rolling.NewNumber() 162 | d.timeouts = rolling.NewNumber() 163 | d.fallbackSuccesses = rolling.NewNumber() 164 | d.fallbackFailures = rolling.NewNumber() 165 | d.contextCanceled = rolling.NewNumber() 166 | d.contextDeadlineExceeded = rolling.NewNumber() 167 | d.totalDuration = rolling.NewTiming() 168 | d.runDuration = rolling.NewTiming() 169 | } 170 | -------------------------------------------------------------------------------- /hystrix/metric_collector/metric_collector.go: -------------------------------------------------------------------------------- 1 | package metricCollector 2 | 3 | import ( 4 | "sync" 5 | "time" 6 | ) 7 | 8 | // Registry is the default metricCollectorRegistry that circuits will use to 9 | // collect statistics about the health of the circuit. 10 | var Registry = metricCollectorRegistry{ 11 | lock: &sync.RWMutex{}, 12 | registry: []func(name string) MetricCollector{ 13 | newDefaultMetricCollector, 14 | }, 15 | } 16 | 17 | type metricCollectorRegistry struct { 18 | lock *sync.RWMutex 19 | registry []func(name string) MetricCollector 20 | } 21 | 22 | // InitializeMetricCollectors runs the registried MetricCollector Initializers to create an array of MetricCollectors. 23 | func (m *metricCollectorRegistry) InitializeMetricCollectors(name string) []MetricCollector { 24 | m.lock.RLock() 25 | defer m.lock.RUnlock() 26 | 27 | metrics := make([]MetricCollector, len(m.registry)) 28 | for i, metricCollectorInitializer := range m.registry { 29 | metrics[i] = metricCollectorInitializer(name) 30 | } 31 | return metrics 32 | } 33 | 34 | // Register places a MetricCollector Initializer in the registry maintained by this metricCollectorRegistry. 35 | func (m *metricCollectorRegistry) Register(initMetricCollector func(string) MetricCollector) { 36 | m.lock.Lock() 37 | defer m.lock.Unlock() 38 | 39 | m.registry = append(m.registry, initMetricCollector) 40 | } 41 | 42 | type MetricResult struct { 43 | Attempts float64 44 | Errors float64 45 | Successes float64 46 | Failures float64 47 | Rejects float64 48 | ShortCircuits float64 49 | Timeouts float64 50 | FallbackSuccesses float64 51 | FallbackFailures float64 52 | ContextCanceled float64 53 | ContextDeadlineExceeded float64 54 | TotalDuration time.Duration 55 | RunDuration time.Duration 56 | ConcurrencyInUse float64 57 | } 58 | 59 | // MetricCollector represents the contract that all collectors must fulfill to gather circuit statistics. 60 | // Implementations of this interface do not have to maintain locking around thier data stores so long as 61 | // they are not modified outside of the hystrix context. 62 | type MetricCollector interface { 63 | // Update accepts a set of metrics from a command execution for remote instrumentation 64 | Update(MetricResult) 65 | // Reset resets the internal counters and timers. 66 | Reset() 67 | } 68 | -------------------------------------------------------------------------------- /hystrix/metrics.go: -------------------------------------------------------------------------------- 1 | package hystrix 2 | 3 | import ( 4 | "sync" 5 | "time" 6 | 7 | "github.com/afex/hystrix-go/hystrix/metric_collector" 8 | "github.com/afex/hystrix-go/hystrix/rolling" 9 | ) 10 | 11 | type commandExecution struct { 12 | Types []string `json:"types"` 13 | Start time.Time `json:"start_time"` 14 | RunDuration time.Duration `json:"run_duration"` 15 | ConcurrencyInUse float64 `json:"concurrency_inuse"` 16 | } 17 | 18 | type metricExchange struct { 19 | Name string 20 | Updates chan *commandExecution 21 | Mutex *sync.RWMutex 22 | 23 | metricCollectors []metricCollector.MetricCollector 24 | } 25 | 26 | func newMetricExchange(name string) *metricExchange { 27 | m := &metricExchange{} 28 | m.Name = name 29 | 30 | m.Updates = make(chan *commandExecution, 2000) 31 | m.Mutex = &sync.RWMutex{} 32 | m.metricCollectors = metricCollector.Registry.InitializeMetricCollectors(name) 33 | m.Reset() 34 | 35 | go m.Monitor() 36 | 37 | return m 38 | } 39 | 40 | // The Default Collector function will panic if collectors are not setup to specification. 41 | func (m *metricExchange) DefaultCollector() *metricCollector.DefaultMetricCollector { 42 | if len(m.metricCollectors) < 1 { 43 | panic("No Metric Collectors Registered.") 44 | } 45 | collection, ok := m.metricCollectors[0].(*metricCollector.DefaultMetricCollector) 46 | if !ok { 47 | panic("Default metric collector is not registered correctly. The default metric collector must be registered first.") 48 | } 49 | return collection 50 | } 51 | 52 | func (m *metricExchange) Monitor() { 53 | for update := range m.Updates { 54 | // we only grab a read lock to make sure Reset() isn't changing the numbers. 55 | m.Mutex.RLock() 56 | 57 | totalDuration := time.Since(update.Start) 58 | wg := &sync.WaitGroup{} 59 | for _, collector := range m.metricCollectors { 60 | wg.Add(1) 61 | go m.IncrementMetrics(wg, collector, update, totalDuration) 62 | } 63 | wg.Wait() 64 | 65 | m.Mutex.RUnlock() 66 | } 67 | } 68 | 69 | func (m *metricExchange) IncrementMetrics(wg *sync.WaitGroup, collector metricCollector.MetricCollector, update *commandExecution, totalDuration time.Duration) { 70 | // granular metrics 71 | r := metricCollector.MetricResult{ 72 | Attempts: 1, 73 | TotalDuration: totalDuration, 74 | RunDuration: update.RunDuration, 75 | ConcurrencyInUse: update.ConcurrencyInUse, 76 | } 77 | 78 | switch update.Types[0] { 79 | case "success": 80 | r.Successes = 1 81 | case "failure": 82 | r.Failures = 1 83 | r.Errors = 1 84 | case "rejected": 85 | r.Rejects = 1 86 | r.Errors = 1 87 | case "short-circuit": 88 | r.ShortCircuits = 1 89 | r.Errors = 1 90 | case "timeout": 91 | r.Timeouts = 1 92 | r.Errors = 1 93 | case "context_canceled": 94 | r.ContextCanceled = 1 95 | case "context_deadline_exceeded": 96 | r.ContextDeadlineExceeded = 1 97 | } 98 | 99 | if len(update.Types) > 1 { 100 | // fallback metrics 101 | if update.Types[1] == "fallback-success" { 102 | r.FallbackSuccesses = 1 103 | } 104 | if update.Types[1] == "fallback-failure" { 105 | r.FallbackFailures = 1 106 | } 107 | } 108 | 109 | collector.Update(r) 110 | 111 | wg.Done() 112 | } 113 | 114 | func (m *metricExchange) Reset() { 115 | m.Mutex.Lock() 116 | defer m.Mutex.Unlock() 117 | 118 | for _, collector := range m.metricCollectors { 119 | collector.Reset() 120 | } 121 | } 122 | 123 | func (m *metricExchange) Requests() *rolling.Number { 124 | m.Mutex.RLock() 125 | defer m.Mutex.RUnlock() 126 | return m.requestsLocked() 127 | } 128 | 129 | func (m *metricExchange) requestsLocked() *rolling.Number { 130 | return m.DefaultCollector().NumRequests() 131 | } 132 | 133 | func (m *metricExchange) ErrorPercent(now time.Time) int { 134 | m.Mutex.RLock() 135 | defer m.Mutex.RUnlock() 136 | 137 | var errPct float64 138 | reqs := m.requestsLocked().Sum(now) 139 | errs := m.DefaultCollector().Errors().Sum(now) 140 | 141 | if reqs > 0 { 142 | errPct = (float64(errs) / float64(reqs)) * 100 143 | } 144 | 145 | return int(errPct + 0.5) 146 | } 147 | 148 | func (m *metricExchange) IsHealthy(now time.Time) bool { 149 | return m.ErrorPercent(now) < getSettings(m.Name).ErrorPercentThreshold 150 | } 151 | -------------------------------------------------------------------------------- /hystrix/metrics_test.go: -------------------------------------------------------------------------------- 1 | package hystrix 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | 7 | . "github.com/smartystreets/goconvey/convey" 8 | ) 9 | 10 | func metricFailingPercent(p int) *metricExchange { 11 | m := newMetricExchange("") 12 | for i := 0; i < 100; i++ { 13 | t := "success" 14 | if i < p { 15 | t = "failure" 16 | } 17 | m.Updates <- &commandExecution{Types: []string{t}} 18 | } 19 | 20 | // Updates needs to be flushed 21 | time.Sleep(100 * time.Millisecond) 22 | 23 | return m 24 | } 25 | 26 | func TestErrorPercent(t *testing.T) { 27 | Convey("with a metric failing 40 percent of the time", t, func() { 28 | m := metricFailingPercent(40) 29 | now := time.Now() 30 | 31 | Convey("ErrorPercent() should return 40", func() { 32 | p := m.ErrorPercent(now) 33 | So(p, ShouldEqual, 40) 34 | }) 35 | 36 | Convey("and a error threshold set to 39", func() { 37 | ConfigureCommand("", CommandConfig{ErrorPercentThreshold: 39}) 38 | 39 | Convey("the metrics should be unhealthy", func() { 40 | So(m.IsHealthy(now), ShouldBeFalse) 41 | }) 42 | 43 | }) 44 | }) 45 | } 46 | -------------------------------------------------------------------------------- /hystrix/pool.go: -------------------------------------------------------------------------------- 1 | package hystrix 2 | 3 | type executorPool struct { 4 | Name string 5 | Metrics *poolMetrics 6 | Max int 7 | Tickets chan *struct{} 8 | } 9 | 10 | func newExecutorPool(name string) *executorPool { 11 | p := &executorPool{} 12 | p.Name = name 13 | p.Metrics = newPoolMetrics(name) 14 | p.Max = getSettings(name).MaxConcurrentRequests 15 | 16 | p.Tickets = make(chan *struct{}, p.Max) 17 | for i := 0; i < p.Max; i++ { 18 | p.Tickets <- &struct{}{} 19 | } 20 | 21 | return p 22 | } 23 | 24 | func (p *executorPool) Return(ticket *struct{}) { 25 | if ticket == nil { 26 | return 27 | } 28 | 29 | p.Metrics.Updates <- poolMetricsUpdate{ 30 | activeCount: p.ActiveCount(), 31 | } 32 | p.Tickets <- ticket 33 | } 34 | 35 | func (p *executorPool) ActiveCount() int { 36 | return p.Max - len(p.Tickets) 37 | } 38 | -------------------------------------------------------------------------------- /hystrix/pool_metrics.go: -------------------------------------------------------------------------------- 1 | package hystrix 2 | 3 | import ( 4 | "sync" 5 | 6 | "github.com/afex/hystrix-go/hystrix/rolling" 7 | ) 8 | 9 | type poolMetrics struct { 10 | Mutex *sync.RWMutex 11 | Updates chan poolMetricsUpdate 12 | 13 | Name string 14 | MaxActiveRequests *rolling.Number 15 | Executed *rolling.Number 16 | } 17 | 18 | type poolMetricsUpdate struct { 19 | activeCount int 20 | } 21 | 22 | func newPoolMetrics(name string) *poolMetrics { 23 | m := &poolMetrics{} 24 | m.Name = name 25 | m.Updates = make(chan poolMetricsUpdate) 26 | m.Mutex = &sync.RWMutex{} 27 | 28 | m.Reset() 29 | 30 | go m.Monitor() 31 | 32 | return m 33 | } 34 | 35 | func (m *poolMetrics) Reset() { 36 | m.Mutex.Lock() 37 | defer m.Mutex.Unlock() 38 | 39 | m.MaxActiveRequests = rolling.NewNumber() 40 | m.Executed = rolling.NewNumber() 41 | } 42 | 43 | func (m *poolMetrics) Monitor() { 44 | for u := range m.Updates { 45 | m.Mutex.RLock() 46 | 47 | m.Executed.Increment(1) 48 | m.MaxActiveRequests.UpdateMax(float64(u.activeCount)) 49 | 50 | m.Mutex.RUnlock() 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /hystrix/pool_test.go: -------------------------------------------------------------------------------- 1 | package hystrix 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | 7 | . "github.com/smartystreets/goconvey/convey" 8 | ) 9 | 10 | func TestReturn(t *testing.T) { 11 | defer Flush() 12 | 13 | Convey("when returning a ticket to the pool", t, func() { 14 | pool := newExecutorPool("pool") 15 | ticket := <-pool.Tickets 16 | pool.Return(ticket) 17 | time.Sleep(1 * time.Millisecond) 18 | Convey("total executed requests should increment", func() { 19 | So(pool.Metrics.Executed.Sum(time.Now()), ShouldEqual, 1) 20 | }) 21 | }) 22 | } 23 | 24 | func TestActiveCount(t *testing.T) { 25 | defer Flush() 26 | 27 | Convey("when 3 tickets are pulled", t, func() { 28 | pool := newExecutorPool("pool") 29 | <-pool.Tickets 30 | <-pool.Tickets 31 | ticket := <-pool.Tickets 32 | 33 | Convey("ActiveCount() should be 3", func() { 34 | So(pool.ActiveCount(), ShouldEqual, 3) 35 | }) 36 | 37 | Convey("and one is returned", func() { 38 | pool.Return(ticket) 39 | 40 | Convey("max active requests should be 3", func() { 41 | time.Sleep(1 * time.Millisecond) // allow poolMetrics to process channel 42 | So(pool.Metrics.MaxActiveRequests.Max(time.Now()), ShouldEqual, 3) 43 | }) 44 | }) 45 | }) 46 | } 47 | -------------------------------------------------------------------------------- /hystrix/rolling/rolling.go: -------------------------------------------------------------------------------- 1 | package rolling 2 | 3 | import ( 4 | "sync" 5 | "time" 6 | ) 7 | 8 | // Number tracks a numberBucket over a bounded number of 9 | // time buckets. Currently the buckets are one second long and only the last 10 seconds are kept. 10 | type Number struct { 11 | Buckets map[int64]*numberBucket 12 | Mutex *sync.RWMutex 13 | } 14 | 15 | type numberBucket struct { 16 | Value float64 17 | } 18 | 19 | // NewNumber initializes a RollingNumber struct. 20 | func NewNumber() *Number { 21 | r := &Number{ 22 | Buckets: make(map[int64]*numberBucket), 23 | Mutex: &sync.RWMutex{}, 24 | } 25 | return r 26 | } 27 | 28 | func (r *Number) getCurrentBucket() *numberBucket { 29 | now := time.Now().Unix() 30 | var bucket *numberBucket 31 | var ok bool 32 | 33 | if bucket, ok = r.Buckets[now]; !ok { 34 | bucket = &numberBucket{} 35 | r.Buckets[now] = bucket 36 | } 37 | 38 | return bucket 39 | } 40 | 41 | func (r *Number) removeOldBuckets() { 42 | now := time.Now().Unix() - 10 43 | 44 | for timestamp := range r.Buckets { 45 | // TODO: configurable rolling window 46 | if timestamp <= now { 47 | delete(r.Buckets, timestamp) 48 | } 49 | } 50 | } 51 | 52 | // Increment increments the number in current timeBucket. 53 | func (r *Number) Increment(i float64) { 54 | if i == 0 { 55 | return 56 | } 57 | 58 | r.Mutex.Lock() 59 | defer r.Mutex.Unlock() 60 | 61 | b := r.getCurrentBucket() 62 | b.Value += i 63 | r.removeOldBuckets() 64 | } 65 | 66 | // UpdateMax updates the maximum value in the current bucket. 67 | func (r *Number) UpdateMax(n float64) { 68 | r.Mutex.Lock() 69 | defer r.Mutex.Unlock() 70 | 71 | b := r.getCurrentBucket() 72 | if n > b.Value { 73 | b.Value = n 74 | } 75 | r.removeOldBuckets() 76 | } 77 | 78 | // Sum sums the values over the buckets in the last 10 seconds. 79 | func (r *Number) Sum(now time.Time) float64 { 80 | sum := float64(0) 81 | 82 | r.Mutex.RLock() 83 | defer r.Mutex.RUnlock() 84 | 85 | for timestamp, bucket := range r.Buckets { 86 | // TODO: configurable rolling window 87 | if timestamp >= now.Unix()-10 { 88 | sum += bucket.Value 89 | } 90 | } 91 | 92 | return sum 93 | } 94 | 95 | // Max returns the maximum value seen in the last 10 seconds. 96 | func (r *Number) Max(now time.Time) float64 { 97 | var max float64 98 | 99 | r.Mutex.RLock() 100 | defer r.Mutex.RUnlock() 101 | 102 | for timestamp, bucket := range r.Buckets { 103 | // TODO: configurable rolling window 104 | if timestamp >= now.Unix()-10 { 105 | if bucket.Value > max { 106 | max = bucket.Value 107 | } 108 | } 109 | } 110 | 111 | return max 112 | } 113 | 114 | func (r *Number) Avg(now time.Time) float64 { 115 | return r.Sum(now) / 10 116 | } 117 | -------------------------------------------------------------------------------- /hystrix/rolling/rolling_test.go: -------------------------------------------------------------------------------- 1 | package rolling 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | 7 | . "github.com/smartystreets/goconvey/convey" 8 | ) 9 | 10 | func TestMax(t *testing.T) { 11 | 12 | Convey("when adding values to a rolling number", t, func() { 13 | n := NewNumber() 14 | for _, x := range []float64{10, 11, 9} { 15 | n.UpdateMax(x) 16 | time.Sleep(1 * time.Second) 17 | } 18 | 19 | Convey("it should know the maximum", func() { 20 | So(n.Max(time.Now()), ShouldEqual, 11) 21 | }) 22 | }) 23 | } 24 | 25 | func TestAvg(t *testing.T) { 26 | Convey("when adding values to a rolling number", t, func() { 27 | n := NewNumber() 28 | for _, x := range []float64{0.5, 1.5, 2.5, 3.5, 4.5} { 29 | n.Increment(x) 30 | time.Sleep(1 * time.Second) 31 | } 32 | 33 | Convey("it should calculate the average over the number of configured buckets", func() { 34 | So(n.Avg(time.Now()), ShouldEqual, 1.25) 35 | }) 36 | }) 37 | } 38 | 39 | func BenchmarkRollingNumberIncrement(b *testing.B) { 40 | n := NewNumber() 41 | 42 | b.ResetTimer() 43 | 44 | for i := 0; i < b.N; i++ { 45 | n.Increment(1) 46 | } 47 | } 48 | 49 | func BenchmarkRollingNumberUpdateMax(b *testing.B) { 50 | n := NewNumber() 51 | 52 | b.ResetTimer() 53 | 54 | for i := 0; i < b.N; i++ { 55 | n.UpdateMax(float64(i)) 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /hystrix/rolling/rolling_timing.go: -------------------------------------------------------------------------------- 1 | package rolling 2 | 3 | import ( 4 | "math" 5 | "sort" 6 | "sync" 7 | "time" 8 | ) 9 | 10 | // Timing maintains time Durations for each time bucket. 11 | // The Durations are kept in an array to allow for a variety of 12 | // statistics to be calculated from the source data. 13 | type Timing struct { 14 | Buckets map[int64]*timingBucket 15 | Mutex *sync.RWMutex 16 | 17 | CachedSortedDurations []time.Duration 18 | LastCachedTime int64 19 | } 20 | 21 | type timingBucket struct { 22 | Durations []time.Duration 23 | } 24 | 25 | // NewTiming creates a RollingTiming struct. 26 | func NewTiming() *Timing { 27 | r := &Timing{ 28 | Buckets: make(map[int64]*timingBucket), 29 | Mutex: &sync.RWMutex{}, 30 | } 31 | return r 32 | } 33 | 34 | type byDuration []time.Duration 35 | 36 | func (c byDuration) Len() int { return len(c) } 37 | func (c byDuration) Swap(i, j int) { c[i], c[j] = c[j], c[i] } 38 | func (c byDuration) Less(i, j int) bool { return c[i] < c[j] } 39 | 40 | // SortedDurations returns an array of time.Duration sorted from shortest 41 | // to longest that have occurred in the last 60 seconds. 42 | func (r *Timing) SortedDurations() []time.Duration { 43 | r.Mutex.RLock() 44 | t := r.LastCachedTime 45 | r.Mutex.RUnlock() 46 | 47 | if t+time.Duration(1*time.Second).Nanoseconds() > time.Now().UnixNano() { 48 | // don't recalculate if current cache is still fresh 49 | return r.CachedSortedDurations 50 | } 51 | 52 | var durations byDuration 53 | now := time.Now() 54 | 55 | r.Mutex.Lock() 56 | defer r.Mutex.Unlock() 57 | 58 | for timestamp, b := range r.Buckets { 59 | // TODO: configurable rolling window 60 | if timestamp >= now.Unix()-60 { 61 | for _, d := range b.Durations { 62 | durations = append(durations, d) 63 | } 64 | } 65 | } 66 | 67 | sort.Sort(durations) 68 | 69 | r.CachedSortedDurations = durations 70 | r.LastCachedTime = time.Now().UnixNano() 71 | 72 | return r.CachedSortedDurations 73 | } 74 | 75 | func (r *Timing) getCurrentBucket() *timingBucket { 76 | r.Mutex.RLock() 77 | now := time.Now() 78 | bucket, exists := r.Buckets[now.Unix()] 79 | r.Mutex.RUnlock() 80 | 81 | if !exists { 82 | r.Mutex.Lock() 83 | defer r.Mutex.Unlock() 84 | 85 | r.Buckets[now.Unix()] = &timingBucket{} 86 | bucket = r.Buckets[now.Unix()] 87 | } 88 | 89 | return bucket 90 | } 91 | 92 | func (r *Timing) removeOldBuckets() { 93 | now := time.Now() 94 | 95 | for timestamp := range r.Buckets { 96 | // TODO: configurable rolling window 97 | if timestamp <= now.Unix()-60 { 98 | delete(r.Buckets, timestamp) 99 | } 100 | } 101 | } 102 | 103 | // Add appends the time.Duration given to the current time bucket. 104 | func (r *Timing) Add(duration time.Duration) { 105 | b := r.getCurrentBucket() 106 | 107 | r.Mutex.Lock() 108 | defer r.Mutex.Unlock() 109 | 110 | b.Durations = append(b.Durations, duration) 111 | r.removeOldBuckets() 112 | } 113 | 114 | // Percentile computes the percentile given with a linear interpolation. 115 | func (r *Timing) Percentile(p float64) uint32 { 116 | sortedDurations := r.SortedDurations() 117 | length := len(sortedDurations) 118 | if length <= 0 { 119 | return 0 120 | } 121 | 122 | pos := r.ordinal(len(sortedDurations), p) - 1 123 | return uint32(sortedDurations[pos].Nanoseconds() / 1000000) 124 | } 125 | 126 | func (r *Timing) ordinal(length int, percentile float64) int64 { 127 | if percentile == 0 && length > 0 { 128 | return 1 129 | } 130 | 131 | return int64(math.Ceil((percentile / float64(100)) * float64(length))) 132 | } 133 | 134 | // Mean computes the average timing in the last 60 seconds. 135 | func (r *Timing) Mean() uint32 { 136 | sortedDurations := r.SortedDurations() 137 | var sum time.Duration 138 | for _, d := range sortedDurations { 139 | sum += d 140 | } 141 | 142 | length := int64(len(sortedDurations)) 143 | if length == 0 { 144 | return 0 145 | } 146 | 147 | return uint32(sum.Nanoseconds()/length) / 1000000 148 | } 149 | -------------------------------------------------------------------------------- /hystrix/rolling/rolling_timing_test.go: -------------------------------------------------------------------------------- 1 | package rolling 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | 7 | . "github.com/smartystreets/goconvey/convey" 8 | ) 9 | 10 | func TestOrdinal(t *testing.T) { 11 | Convey("given a new rolling timing", t, func() { 12 | 13 | r := NewTiming() 14 | 15 | Convey("Mean() should be 0", func() { 16 | So(r.Mean(), ShouldEqual, 0) 17 | }) 18 | 19 | Convey("and given a set of lengths and percentiles", func() { 20 | var ordinalTests = []struct { 21 | length int 22 | perc float64 23 | expected int64 24 | }{ 25 | {1, 0, 1}, 26 | {2, 0, 1}, 27 | {2, 50, 1}, 28 | {2, 51, 2}, 29 | {5, 30, 2}, 30 | {5, 40, 2}, 31 | {5, 50, 3}, 32 | {11, 25, 3}, 33 | {11, 50, 6}, 34 | {11, 75, 9}, 35 | {11, 100, 11}, 36 | } 37 | 38 | Convey("each should generate the expected ordinal", func() { 39 | 40 | for _, s := range ordinalTests { 41 | So(r.ordinal(s.length, s.perc), ShouldEqual, s.expected) 42 | } 43 | }) 44 | }) 45 | 46 | Convey("after adding 2 timings", func() { 47 | r.Add(100 * time.Millisecond) 48 | time.Sleep(2 * time.Second) 49 | r.Add(200 * time.Millisecond) 50 | 51 | Convey("the mean should be the average of the timings", func() { 52 | So(r.Mean(), ShouldEqual, 150) 53 | }) 54 | }) 55 | 56 | Convey("after adding many timings", func() { 57 | durations := []int{1, 1004, 1004, 1004, 1004, 1004, 1004, 1004, 1004, 1004, 1005, 1005, 1005, 1005, 1005, 1005, 1005, 1005, 1005, 1005, 1005, 1005, 1005, 1005, 1006, 1006, 1006, 1006, 1007, 1007, 1007, 1008, 1015} 58 | for _, d := range durations { 59 | r.Add(time.Duration(d) * time.Millisecond) 60 | } 61 | 62 | Convey("calculates correct percentiles", func() { 63 | So(r.Percentile(0), ShouldEqual, 1) 64 | So(r.Percentile(75), ShouldEqual, 1006) 65 | So(r.Percentile(99), ShouldEqual, 1015) 66 | So(r.Percentile(100), ShouldEqual, 1015) 67 | }) 68 | }) 69 | }) 70 | } 71 | -------------------------------------------------------------------------------- /hystrix/settings.go: -------------------------------------------------------------------------------- 1 | package hystrix 2 | 3 | import ( 4 | "sync" 5 | "time" 6 | ) 7 | 8 | var ( 9 | // DefaultTimeout is how long to wait for command to complete, in milliseconds 10 | DefaultTimeout = 1000 11 | // DefaultMaxConcurrent is how many commands of the same type can run at the same time 12 | DefaultMaxConcurrent = 10 13 | // DefaultVolumeThreshold is the minimum number of requests needed before a circuit can be tripped due to health 14 | DefaultVolumeThreshold = 20 15 | // DefaultSleepWindow is how long, in milliseconds, to wait after a circuit opens before testing for recovery 16 | DefaultSleepWindow = 5000 17 | // DefaultErrorPercentThreshold causes circuits to open once the rolling measure of errors exceeds this percent of requests 18 | DefaultErrorPercentThreshold = 50 19 | // DefaultLogger is the default logger that will be used in the Hystrix package. By default prints nothing. 20 | DefaultLogger = NoopLogger{} 21 | ) 22 | 23 | type Settings struct { 24 | Timeout time.Duration 25 | MaxConcurrentRequests int 26 | RequestVolumeThreshold uint64 27 | SleepWindow time.Duration 28 | ErrorPercentThreshold int 29 | } 30 | 31 | // CommandConfig is used to tune circuit settings at runtime 32 | type CommandConfig struct { 33 | Timeout int `json:"timeout"` 34 | MaxConcurrentRequests int `json:"max_concurrent_requests"` 35 | RequestVolumeThreshold int `json:"request_volume_threshold"` 36 | SleepWindow int `json:"sleep_window"` 37 | ErrorPercentThreshold int `json:"error_percent_threshold"` 38 | } 39 | 40 | var circuitSettings map[string]*Settings 41 | var settingsMutex *sync.RWMutex 42 | var log logger 43 | 44 | func init() { 45 | circuitSettings = make(map[string]*Settings) 46 | settingsMutex = &sync.RWMutex{} 47 | log = DefaultLogger 48 | } 49 | 50 | // Configure applies settings for a set of circuits 51 | func Configure(cmds map[string]CommandConfig) { 52 | for k, v := range cmds { 53 | ConfigureCommand(k, v) 54 | } 55 | } 56 | 57 | // ConfigureCommand applies settings for a circuit 58 | func ConfigureCommand(name string, config CommandConfig) { 59 | settingsMutex.Lock() 60 | defer settingsMutex.Unlock() 61 | 62 | timeout := DefaultTimeout 63 | if config.Timeout != 0 { 64 | timeout = config.Timeout 65 | } 66 | 67 | max := DefaultMaxConcurrent 68 | if config.MaxConcurrentRequests != 0 { 69 | max = config.MaxConcurrentRequests 70 | } 71 | 72 | volume := DefaultVolumeThreshold 73 | if config.RequestVolumeThreshold != 0 { 74 | volume = config.RequestVolumeThreshold 75 | } 76 | 77 | sleep := DefaultSleepWindow 78 | if config.SleepWindow != 0 { 79 | sleep = config.SleepWindow 80 | } 81 | 82 | errorPercent := DefaultErrorPercentThreshold 83 | if config.ErrorPercentThreshold != 0 { 84 | errorPercent = config.ErrorPercentThreshold 85 | } 86 | 87 | circuitSettings[name] = &Settings{ 88 | Timeout: time.Duration(timeout) * time.Millisecond, 89 | MaxConcurrentRequests: max, 90 | RequestVolumeThreshold: uint64(volume), 91 | SleepWindow: time.Duration(sleep) * time.Millisecond, 92 | ErrorPercentThreshold: errorPercent, 93 | } 94 | } 95 | 96 | func getSettings(name string) *Settings { 97 | settingsMutex.RLock() 98 | s, exists := circuitSettings[name] 99 | settingsMutex.RUnlock() 100 | 101 | if !exists { 102 | ConfigureCommand(name, CommandConfig{}) 103 | s = getSettings(name) 104 | } 105 | 106 | return s 107 | } 108 | 109 | func GetCircuitSettings() map[string]*Settings { 110 | copy := make(map[string]*Settings) 111 | 112 | settingsMutex.RLock() 113 | for key, val := range circuitSettings { 114 | copy[key] = val 115 | } 116 | settingsMutex.RUnlock() 117 | 118 | return copy 119 | } 120 | 121 | // SetLogger configures the logger that will be used. This only applies to the hystrix package. 122 | func SetLogger(l logger) { 123 | log = l 124 | } 125 | -------------------------------------------------------------------------------- /hystrix/settings_test.go: -------------------------------------------------------------------------------- 1 | package hystrix 2 | 3 | import ( 4 | "testing" 5 | "time" 6 | 7 | . "github.com/smartystreets/goconvey/convey" 8 | ) 9 | 10 | func TestConfigureConcurrency(t *testing.T) { 11 | Convey("given a command configured for 100 concurrent requests", t, func() { 12 | ConfigureCommand("", CommandConfig{MaxConcurrentRequests: 100}) 13 | 14 | Convey("reading the concurrency should be the same", func() { 15 | So(getSettings("").MaxConcurrentRequests, ShouldEqual, 100) 16 | }) 17 | }) 18 | } 19 | 20 | func TestConfigureTimeout(t *testing.T) { 21 | Convey("given a command configured for a 10000 milliseconds", t, func() { 22 | ConfigureCommand("", CommandConfig{Timeout: 10000}) 23 | 24 | Convey("reading the timeout should be the same", func() { 25 | So(getSettings("").Timeout, ShouldEqual, time.Duration(10*time.Second)) 26 | }) 27 | }) 28 | } 29 | 30 | func TestConfigureRVT(t *testing.T) { 31 | Convey("given a command configured to need 30 requests before tripping the circuit", t, func() { 32 | ConfigureCommand("", CommandConfig{RequestVolumeThreshold: 30}) 33 | 34 | Convey("reading the threshold should be the same", func() { 35 | So(getSettings("").RequestVolumeThreshold, ShouldEqual, uint64(30)) 36 | }) 37 | }) 38 | } 39 | 40 | func TestSleepWindowDefault(t *testing.T) { 41 | Convey("given default settings", t, func() { 42 | ConfigureCommand("", CommandConfig{}) 43 | 44 | Convey("the sleep window should be 5 seconds", func() { 45 | So(getSettings("").SleepWindow, ShouldEqual, time.Duration(5*time.Second)) 46 | }) 47 | }) 48 | } 49 | 50 | func TestGetCircuitSettings(t *testing.T) { 51 | Convey("when calling GetCircuitSettings", t, func() { 52 | ConfigureCommand("test", CommandConfig{Timeout: 30000}) 53 | 54 | Convey("should read the same setting just added", func() { 55 | So(GetCircuitSettings()["test"], ShouldEqual, getSettings("test")) 56 | So(GetCircuitSettings()["test"].Timeout, ShouldEqual, time.Duration(30*time.Second)) 57 | }) 58 | }) 59 | } 60 | -------------------------------------------------------------------------------- /loadtest/README.md: -------------------------------------------------------------------------------- 1 | integration app to measure behavior of circuits under load. 2 | 3 | `go run service/main.go -statsd mystatsdhost:8125` 4 | 5 | `ab -n 10000000 -c 10 http://localhost:8888/` -------------------------------------------------------------------------------- /loadtest/service/main.go: -------------------------------------------------------------------------------- 1 | // Package main implements an http server which executes a hystrix command each request and 2 | // sends metrics to a statsd instance to aid performance testing. 3 | package main 4 | 5 | import ( 6 | "flag" 7 | "log" 8 | "math/rand" 9 | "net/http" 10 | _ "net/http/pprof" 11 | "runtime" 12 | "time" 13 | 14 | "github.com/afex/hystrix-go/hystrix" 15 | "github.com/afex/hystrix-go/hystrix/metric_collector" 16 | "github.com/afex/hystrix-go/plugins" 17 | "github.com/cactus/go-statsd-client/statsd" 18 | ) 19 | 20 | const ( 21 | deltaWindow = 10 22 | minDelay = 35 23 | maxDelay = 55 24 | ) 25 | 26 | var ( 27 | delay int 28 | ) 29 | 30 | const ( 31 | up = iota 32 | down 33 | ) 34 | 35 | func init() { 36 | delay = minDelay 37 | } 38 | 39 | func main() { 40 | runtime.GOMAXPROCS(runtime.NumCPU()) 41 | statsdHost := flag.String("statsd", "", "Statsd host to record load test metrics") 42 | flag.Parse() 43 | 44 | stats, err := statsd.NewClient(*statsdHost, "hystrix.loadtest.service") 45 | if err != nil { 46 | log.Fatalf("could not initialize statsd client: %v", err) 47 | } 48 | 49 | c, err := plugins.InitializeStatsdCollector(&plugins.StatsdCollectorConfig{ 50 | StatsdAddr: *statsdHost, 51 | Prefix: "hystrix.loadtest.circuits", 52 | }) 53 | if err != nil { 54 | log.Fatalf("could not initialize statsd client: %v", err) 55 | } 56 | metricCollector.Registry.Register(c.NewStatsdCollector) 57 | 58 | hystrix.ConfigureCommand("test", hystrix.CommandConfig{ 59 | Timeout: 50, 60 | }) 61 | 62 | go rotateDelay() 63 | 64 | http.HandleFunc("/", timedHandler(handle, stats)) 65 | log.Print("starting server") 66 | log.Fatal(http.ListenAndServe(":8888", nil)) 67 | } 68 | 69 | func timedHandler(fn func(w http.ResponseWriter, r *http.Request), stats statsd.Statter) func(w http.ResponseWriter, r *http.Request) { 70 | return func(w http.ResponseWriter, r *http.Request) { 71 | start := time.Now() 72 | fn(w, r) 73 | stats.TimingDuration("request", time.Since(start), 1) 74 | } 75 | } 76 | 77 | func handle(w http.ResponseWriter, r *http.Request) { 78 | done := make(chan struct{}, 1) 79 | errChan := hystrix.Go("test", func() error { 80 | delta := rand.Intn(deltaWindow) 81 | time.Sleep(time.Duration(delay+delta) * time.Millisecond) 82 | done <- struct{}{} 83 | return nil 84 | }, func(err error) error { 85 | done <- struct{}{} 86 | return nil 87 | }) 88 | 89 | select { 90 | case err := <-errChan: 91 | http.Error(w, err.Error(), 500) 92 | case <-done: 93 | w.Write([]byte("OK")) 94 | } 95 | } 96 | 97 | func rotateDelay() { 98 | direction := up 99 | for { 100 | if direction == up && delay == maxDelay { 101 | direction = down 102 | } 103 | if direction == down && delay == minDelay { 104 | direction = up 105 | } 106 | 107 | if direction == up { 108 | delay += 1 109 | } else { 110 | delay -= 1 111 | } 112 | 113 | time.Sleep(5 * time.Second) 114 | log.Printf("setting delay to %v", delay) 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /plugins/datadog_collector.go: -------------------------------------------------------------------------------- 1 | package plugins 2 | 3 | import ( 4 | 5 | // Developed on https://github.com/DataDog/datadog-go/tree/a27810dd518c69be741a7fd5d0e39f674f615be8 6 | "github.com/DataDog/datadog-go/statsd" 7 | "github.com/afex/hystrix-go/hystrix/metric_collector" 8 | ) 9 | 10 | // These metrics are constants because we're leveraging the Datadog tagging 11 | // extension to statsd. 12 | // 13 | // They only apply to the DatadogCollector and are only useful if providing your 14 | // own implemenation of DatadogClient 15 | const ( 16 | // DM = Datadog Metric 17 | DM_CircuitOpen = "hystrix.circuitOpen" 18 | DM_Attempts = "hystrix.attempts" 19 | DM_Errors = "hystrix.errors" 20 | DM_Successes = "hystrix.successes" 21 | DM_Failures = "hystrix.failures" 22 | DM_Rejects = "hystrix.rejects" 23 | DM_ShortCircuits = "hystrix.shortCircuits" 24 | DM_Timeouts = "hystrix.timeouts" 25 | DM_FallbackSuccesses = "hystrix.fallbackSuccesses" 26 | DM_FallbackFailures = "hystrix.fallbackFailures" 27 | DM_TotalDuration = "hystrix.totalDuration" 28 | DM_RunDuration = "hystrix.runDuration" 29 | ) 30 | 31 | type ( 32 | // DatadogClient is the minimum interface needed by 33 | // NewDatadogCollectorWithClient 34 | DatadogClient interface { 35 | Count(name string, value int64, tags []string, rate float64) error 36 | Gauge(name string, value float64, tags []string, rate float64) error 37 | TimeInMilliseconds(name string, value float64, tags []string, rate float64) error 38 | } 39 | 40 | // DatadogCollector fulfills the metricCollector interface allowing users to 41 | // ship circuit stats to Datadog. 42 | // 43 | // This Collector, by default, uses github.com/DataDog/datadog-go/statsd for 44 | // transport. The main advantage of this over statsd is building graphs and 45 | // multi-alert monitors around single metrics (constantized above) and 46 | // adding tag dimensions. You can set up a single monitor to rule them all 47 | // across services and geographies. Graphs become much simpler to setup by 48 | // allowing you to create queries like the following 49 | // 50 | // { 51 | // "viz": "timeseries", 52 | // "requests": [ 53 | // { 54 | // "q": "max:hystrix.runDuration.95percentile{$region} by {hystrixcircuit}", 55 | // "type": "line" 56 | // } 57 | // ] 58 | // } 59 | // 60 | // As new circuits come online you get graphing and monitoring "for free". 61 | DatadogCollector struct { 62 | client DatadogClient 63 | tags []string 64 | } 65 | ) 66 | 67 | // NewDatadogCollector creates a collector for a specific circuit with a 68 | // "github.com/DataDog/datadog-go/statsd".(*Client). 69 | // 70 | // addr is in the format ":" (e.g. "localhost:8125") 71 | // 72 | // prefix may be an empty string 73 | // 74 | // Example use 75 | // package main 76 | // 77 | // import ( 78 | // "github.com/afex/hystrix-go/plugins" 79 | // "github.com/afex/hystrix-go/hystrix/metric_collector" 80 | // ) 81 | // 82 | // func main() { 83 | // collector, err := plugins.NewDatadogCollector("localhost:8125", "") 84 | // if err != nil { 85 | // panic(err) 86 | // } 87 | // metricCollector.Registry.Register(collector) 88 | // } 89 | func NewDatadogCollector(addr, prefix string) (func(string) metricCollector.MetricCollector, error) { 90 | 91 | c, err := statsd.NewBuffered(addr, 100) 92 | if err != nil { 93 | return nil, err 94 | } 95 | 96 | // Prefix every metric with the app name 97 | c.Namespace = prefix 98 | 99 | return NewDatadogCollectorWithClient(c), nil 100 | } 101 | 102 | // NewDatadogCollectorWithClient accepts an interface which allows you to 103 | // provide your own implementation of a statsd client, alter configuration on 104 | // "github.com/DataDog/datadog-go/statsd".(*Client), provide additional tags per 105 | // circuit-metric tuple, and add logging if you need it. 106 | func NewDatadogCollectorWithClient(client DatadogClient) func(string) metricCollector.MetricCollector { 107 | 108 | return func(name string) metricCollector.MetricCollector { 109 | 110 | return &DatadogCollector{ 111 | client: client, 112 | tags: []string{"hystrixcircuit:" + name}, 113 | } 114 | } 115 | } 116 | 117 | func (dc *DatadogCollector) Update(r metricCollector.MetricResult) { 118 | if r.Attempts > 0 { 119 | dc.client.Count(DM_Attempts, int64(r.Attempts), dc.tags, 1.0) 120 | } 121 | if r.Errors > 0 { 122 | dc.client.Count(DM_Errors, int64(r.Errors), dc.tags, 1.0) 123 | } 124 | if r.Successes > 0 { 125 | dc.client.Gauge(DM_CircuitOpen, 0, dc.tags, 1.0) 126 | dc.client.Count(DM_Successes, int64(r.Successes), dc.tags, 1.0) 127 | } 128 | if r.Failures > 0 { 129 | dc.client.Count(DM_Failures, int64(r.Failures), dc.tags, 1.0) 130 | } 131 | if r.Rejects > 0 { 132 | dc.client.Count(DM_Rejects, int64(r.Rejects), dc.tags, 1.0) 133 | } 134 | if r.ShortCircuits > 0 { 135 | dc.client.Gauge(DM_CircuitOpen, 1, dc.tags, 1.0) 136 | dc.client.Count(DM_ShortCircuits, int64(r.ShortCircuits), dc.tags, 1.0) 137 | } 138 | if r.Timeouts > 0 { 139 | dc.client.Count(DM_Timeouts, int64(r.Timeouts), dc.tags, 1.0) 140 | } 141 | if r.FallbackSuccesses > 0 { 142 | dc.client.Count(DM_FallbackSuccesses, int64(r.FallbackSuccesses), dc.tags, 1.0) 143 | } 144 | if r.FallbackFailures > 0 { 145 | dc.client.Count(DM_FallbackFailures, int64(r.FallbackFailures), dc.tags, 1.0) 146 | } 147 | 148 | ms := float64(r.TotalDuration.Nanoseconds() / 1000000) 149 | dc.client.TimeInMilliseconds(DM_TotalDuration, ms, dc.tags, 1.0) 150 | 151 | ms = float64(r.RunDuration.Nanoseconds() / 1000000) 152 | dc.client.TimeInMilliseconds(DM_RunDuration, ms, dc.tags, 1.0) 153 | } 154 | 155 | // Reset is a noop operation in this collector. 156 | func (dc *DatadogCollector) Reset() {} 157 | -------------------------------------------------------------------------------- /plugins/graphite_aggregator.go: -------------------------------------------------------------------------------- 1 | // Plugins allows users to operate on statistics recorded for each circuit operation. 2 | // Plugins should be careful to be lightweight as they will be called frequently. 3 | package plugins 4 | 5 | import ( 6 | "net" 7 | "strings" 8 | "time" 9 | 10 | "github.com/afex/hystrix-go/hystrix/metric_collector" 11 | "github.com/rcrowley/go-metrics" 12 | ) 13 | 14 | var makeTimerFunc = func() interface{} { return metrics.NewTimer() } 15 | var makeCounterFunc = func() interface{} { return metrics.NewCounter() } 16 | 17 | // GraphiteCollector fulfills the metricCollector interface allowing users to ship circuit 18 | // stats to a graphite backend. To use users must call InitializeGraphiteCollector before 19 | // circuits are started. Then register NewGraphiteCollector with metricCollector.Registry.Register(NewGraphiteCollector). 20 | // 21 | // This Collector uses github.com/rcrowley/go-metrics for aggregation. See that repo for more details 22 | // on how metrics are aggregated and expressed in graphite. 23 | type GraphiteCollector struct { 24 | attemptsPrefix string 25 | errorsPrefix string 26 | successesPrefix string 27 | failuresPrefix string 28 | rejectsPrefix string 29 | shortCircuitsPrefix string 30 | timeoutsPrefix string 31 | fallbackSuccessesPrefix string 32 | fallbackFailuresPrefix string 33 | totalDurationPrefix string 34 | runDurationPrefix string 35 | } 36 | 37 | // GraphiteCollectorConfig provides configuration that the graphite client will need. 38 | type GraphiteCollectorConfig struct { 39 | // GraphiteAddr is the tcp address of the graphite server 40 | GraphiteAddr *net.TCPAddr 41 | // Prefix is the prefix that will be prepended to all metrics sent from this collector. 42 | Prefix string 43 | // TickInterval spcifies the period that this collector will send metrics to the server. 44 | TickInterval time.Duration 45 | } 46 | 47 | // InitializeGraphiteCollector creates the connection to the graphite server 48 | // and should be called before any metrics are recorded. 49 | func InitializeGraphiteCollector(config *GraphiteCollectorConfig) { 50 | go metrics.Graphite(metrics.DefaultRegistry, config.TickInterval, config.Prefix, config.GraphiteAddr) 51 | } 52 | 53 | // NewGraphiteCollector creates a collector for a specific circuit. The 54 | // prefix given to this circuit will be {config.Prefix}.{circuit_name}.{metric}. 55 | // Circuits with "/" in their names will have them replaced with ".". 56 | func NewGraphiteCollector(name string) metricCollector.MetricCollector { 57 | name = strings.Replace(name, "/", "-", -1) 58 | name = strings.Replace(name, ":", "-", -1) 59 | name = strings.Replace(name, ".", "-", -1) 60 | return &GraphiteCollector{ 61 | attemptsPrefix: name + ".attempts", 62 | errorsPrefix: name + ".errors", 63 | successesPrefix: name + ".successes", 64 | failuresPrefix: name + ".failures", 65 | rejectsPrefix: name + ".rejects", 66 | shortCircuitsPrefix: name + ".shortCircuits", 67 | timeoutsPrefix: name + ".timeouts", 68 | fallbackSuccessesPrefix: name + ".fallbackSuccesses", 69 | fallbackFailuresPrefix: name + ".fallbackFailures", 70 | totalDurationPrefix: name + ".totalDuration", 71 | runDurationPrefix: name + ".runDuration", 72 | } 73 | } 74 | 75 | func (g *GraphiteCollector) incrementCounterMetric(prefix string, i float64) { 76 | if i == 0 { 77 | return 78 | } 79 | c, ok := metrics.GetOrRegister(prefix, makeCounterFunc).(metrics.Counter) 80 | if !ok { 81 | return 82 | } 83 | c.Inc(int64(i)) 84 | } 85 | 86 | func (g *GraphiteCollector) updateTimerMetric(prefix string, dur time.Duration) { 87 | c, ok := metrics.GetOrRegister(prefix, makeTimerFunc).(metrics.Timer) 88 | if !ok { 89 | return 90 | } 91 | c.Update(dur) 92 | } 93 | 94 | func (g *GraphiteCollector) Update(r metricCollector.MetricResult) { 95 | g.incrementCounterMetric(g.attemptsPrefix, r.Attempts) 96 | g.incrementCounterMetric(g.errorsPrefix, r.Errors) 97 | g.incrementCounterMetric(g.successesPrefix, r.Successes) 98 | g.incrementCounterMetric(g.failuresPrefix, r.Failures) 99 | g.incrementCounterMetric(g.rejectsPrefix, r.Rejects) 100 | g.incrementCounterMetric(g.shortCircuitsPrefix, r.ShortCircuits) 101 | g.incrementCounterMetric(g.timeoutsPrefix, r.Timeouts) 102 | g.incrementCounterMetric(g.fallbackSuccessesPrefix, r.FallbackSuccesses) 103 | g.incrementCounterMetric(g.fallbackFailuresPrefix, r.FallbackFailures) 104 | g.updateTimerMetric(g.totalDurationPrefix, r.TotalDuration) 105 | g.updateTimerMetric(g.runDurationPrefix, r.RunDuration) 106 | } 107 | 108 | // Reset is a noop operation in this collector. 109 | func (g *GraphiteCollector) Reset() {} 110 | -------------------------------------------------------------------------------- /plugins/statsd_collector.go: -------------------------------------------------------------------------------- 1 | package plugins 2 | 3 | import ( 4 | "log" 5 | "strings" 6 | "time" 7 | 8 | "github.com/afex/hystrix-go/hystrix/metric_collector" 9 | "github.com/cactus/go-statsd-client/statsd" 10 | ) 11 | 12 | // StatsdCollector fulfills the metricCollector interface allowing users to ship circuit 13 | // stats to a Statsd backend. To use users must call InitializeStatsdCollector before 14 | // circuits are started. Then register NewStatsdCollector with metricCollector.Registry.Register(NewStatsdCollector). 15 | // 16 | // This Collector uses https://github.com/cactus/go-statsd-client/ for transport. 17 | type StatsdCollector struct { 18 | client statsd.Statter 19 | circuitOpenPrefix string 20 | attemptsPrefix string 21 | errorsPrefix string 22 | successesPrefix string 23 | failuresPrefix string 24 | rejectsPrefix string 25 | shortCircuitsPrefix string 26 | timeoutsPrefix string 27 | fallbackSuccessesPrefix string 28 | fallbackFailuresPrefix string 29 | canceledPrefix string 30 | deadlinePrefix string 31 | totalDurationPrefix string 32 | runDurationPrefix string 33 | concurrencyInUsePrefix string 34 | sampleRate float32 35 | } 36 | 37 | type StatsdCollectorClient struct { 38 | client statsd.Statter 39 | sampleRate float32 40 | } 41 | 42 | // https://github.com/etsy/statsd/blob/master/docs/metric_types.md#multi-metric-packets 43 | const ( 44 | WANStatsdFlushBytes = 512 45 | LANStatsdFlushBytes = 1432 46 | GigabitStatsdFlushBytes = 8932 47 | ) 48 | 49 | // StatsdCollectorConfig provides configuration that the Statsd client will need. 50 | type StatsdCollectorConfig struct { 51 | // StatsdAddr is the tcp address of the Statsd server 52 | StatsdAddr string 53 | // Prefix is the prefix that will be prepended to all metrics sent from this collector. 54 | Prefix string 55 | // StatsdSampleRate sets statsd sampling. If 0, defaults to 1.0. (no sampling) 56 | SampleRate float32 57 | // FlushBytes sets message size for statsd packets. If 0, defaults to LANFlushSize. 58 | FlushBytes int 59 | } 60 | 61 | // InitializeStatsdCollector creates the connection to the Statsd server 62 | // and should be called before any metrics are recorded. 63 | // 64 | // Users should ensure to call Close() on the client. 65 | func InitializeStatsdCollector(config *StatsdCollectorConfig) (*StatsdCollectorClient, error) { 66 | flushBytes := config.FlushBytes 67 | if flushBytes == 0 { 68 | flushBytes = LANStatsdFlushBytes 69 | } 70 | 71 | sampleRate := config.SampleRate 72 | if sampleRate == 0 { 73 | sampleRate = 1 74 | } 75 | 76 | c, err := statsd.NewBufferedClient(config.StatsdAddr, config.Prefix, 1*time.Second, flushBytes) 77 | if err != nil { 78 | log.Printf("Could not initiale buffered client: %s. Falling back to a Noop Statsd client", err) 79 | c, _ = statsd.NewNoopClient() 80 | } 81 | return &StatsdCollectorClient{ 82 | client: c, 83 | sampleRate: sampleRate, 84 | }, err 85 | } 86 | 87 | // NewStatsdCollector creates a collector for a specific circuit. The 88 | // prefix given to this circuit will be {config.Prefix}.{circuit_name}.{metric}. 89 | // Circuits with "/" in their names will have them replaced with ".". 90 | func (s *StatsdCollectorClient) NewStatsdCollector(name string) metricCollector.MetricCollector { 91 | if s.client == nil { 92 | log.Fatalf("Statsd client must be initialized before circuits are created.") 93 | } 94 | name = strings.Replace(name, "/", "-", -1) 95 | name = strings.Replace(name, ":", "-", -1) 96 | name = strings.Replace(name, ".", "-", -1) 97 | return &StatsdCollector{ 98 | client: s.client, 99 | circuitOpenPrefix: name + ".circuitOpen", 100 | attemptsPrefix: name + ".attempts", 101 | errorsPrefix: name + ".errors", 102 | successesPrefix: name + ".successes", 103 | failuresPrefix: name + ".failures", 104 | rejectsPrefix: name + ".rejects", 105 | shortCircuitsPrefix: name + ".shortCircuits", 106 | timeoutsPrefix: name + ".timeouts", 107 | fallbackSuccessesPrefix: name + ".fallbackSuccesses", 108 | fallbackFailuresPrefix: name + ".fallbackFailures", 109 | canceledPrefix: name + ".contextCanceled", 110 | deadlinePrefix: name + ".contextDeadlineExceeded", 111 | totalDurationPrefix: name + ".totalDuration", 112 | runDurationPrefix: name + ".runDuration", 113 | concurrencyInUsePrefix: name + ".concurrencyInUse", 114 | sampleRate: s.sampleRate, 115 | } 116 | } 117 | 118 | func (g *StatsdCollector) setGauge(prefix string, value int64) { 119 | err := g.client.Gauge(prefix, value, g.sampleRate) 120 | if err != nil { 121 | log.Printf("Error sending statsd metrics %s", prefix) 122 | } 123 | } 124 | 125 | func (g *StatsdCollector) incrementCounterMetric(prefix string, i float64) { 126 | if i == 0 { 127 | return 128 | } 129 | err := g.client.Inc(prefix, int64(i), g.sampleRate) 130 | if err != nil { 131 | log.Printf("Error sending statsd metrics %s", prefix) 132 | } 133 | } 134 | 135 | func (g *StatsdCollector) updateTimerMetric(prefix string, dur time.Duration) { 136 | err := g.client.TimingDuration(prefix, dur, g.sampleRate) 137 | if err != nil { 138 | log.Printf("Error sending statsd metrics %s", prefix) 139 | } 140 | } 141 | 142 | func (g *StatsdCollector) updateTimingMetric(prefix string, i int64) { 143 | err := g.client.Timing(prefix, i, g.sampleRate) 144 | if err != nil { 145 | log.Printf("Error sending statsd metrics %s", prefix) 146 | } 147 | } 148 | 149 | func (g *StatsdCollector) Update(r metricCollector.MetricResult) { 150 | if r.Successes > 0 { 151 | g.setGauge(g.circuitOpenPrefix, 0) 152 | } else if r.ShortCircuits > 0 { 153 | g.setGauge(g.circuitOpenPrefix, 1) 154 | } 155 | 156 | g.incrementCounterMetric(g.attemptsPrefix, r.Attempts) 157 | g.incrementCounterMetric(g.errorsPrefix, r.Errors) 158 | g.incrementCounterMetric(g.successesPrefix, r.Successes) 159 | g.incrementCounterMetric(g.failuresPrefix, r.Failures) 160 | g.incrementCounterMetric(g.rejectsPrefix, r.Rejects) 161 | g.incrementCounterMetric(g.shortCircuitsPrefix, r.ShortCircuits) 162 | g.incrementCounterMetric(g.timeoutsPrefix, r.Timeouts) 163 | g.incrementCounterMetric(g.fallbackSuccessesPrefix, r.FallbackSuccesses) 164 | g.incrementCounterMetric(g.fallbackFailuresPrefix, r.FallbackFailures) 165 | g.incrementCounterMetric(g.canceledPrefix, r.ContextCanceled) 166 | g.incrementCounterMetric(g.deadlinePrefix, r.ContextDeadlineExceeded) 167 | g.updateTimerMetric(g.totalDurationPrefix, r.TotalDuration) 168 | g.updateTimerMetric(g.runDurationPrefix, r.RunDuration) 169 | g.updateTimingMetric(g.concurrencyInUsePrefix, int64(100*r.ConcurrencyInUse)) 170 | } 171 | 172 | // Reset is a noop operation in this collector. 173 | func (g *StatsdCollector) Reset() {} 174 | -------------------------------------------------------------------------------- /plugins/statsd_collector_test.go: -------------------------------------------------------------------------------- 1 | package plugins 2 | 3 | import ( 4 | "testing" 5 | 6 | . "github.com/smartystreets/goconvey/convey" 7 | ) 8 | 9 | func TestSampleRate(t *testing.T) { 10 | Convey("when initializing the collector", t, func() { 11 | Convey("with no sample rate", func() { 12 | client, err := InitializeStatsdCollector(&StatsdCollectorConfig{ 13 | StatsdAddr: "localhost:8125", 14 | Prefix: "test", 15 | }) 16 | So(err, ShouldBeNil) 17 | 18 | collector := client.NewStatsdCollector("foo").(*StatsdCollector) 19 | Convey("it defaults to no sampling", func() { 20 | So(collector.sampleRate, ShouldEqual, 1.0) 21 | }) 22 | }) 23 | Convey("with a sample rate", func() { 24 | client, err := InitializeStatsdCollector(&StatsdCollectorConfig{ 25 | StatsdAddr: "localhost:8125", 26 | Prefix: "test", 27 | SampleRate: 0.5, 28 | }) 29 | So(err, ShouldBeNil) 30 | 31 | collector := client.NewStatsdCollector("foo").(*StatsdCollector) 32 | Convey("the rate is set", func() { 33 | So(collector.sampleRate, ShouldEqual, 0.5) 34 | }) 35 | }) 36 | }) 37 | } 38 | -------------------------------------------------------------------------------- /scripts/vagrant.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | wget -q https://storage.googleapis.com/golang/go1.9.4.linux-amd64.tar.gz 5 | tar -C /usr/local -xzf go1.9.4.linux-amd64.tar.gz 6 | 7 | apt-get update 8 | apt-get -y install git mercurial apache2-utils 9 | 10 | echo 'export PATH=$PATH:/usr/local/go/bin:/go/bin 11 | export GOPATH=/go' >> /home/vagrant/.profile 12 | 13 | source /home/vagrant/.profile 14 | 15 | go get golang.org/x/tools/cmd/goimports 16 | go get github.com/golang/lint/golint 17 | go get github.com/smartystreets/goconvey/convey 18 | go get github.com/cactus/go-statsd-client/statsd 19 | go get github.com/rcrowley/go-metrics 20 | go get github.com/DataDog/datadog-go/statsd 21 | 22 | chown -R vagrant:vagrant /go 23 | 24 | echo "cd /go/src/github.com/afex/hystrix-go" >> /home/vagrant/.bashrc 25 | --------------------------------------------------------------------------------