├── main.trace ├── trace.png ├── go.mod ├── go.sum ├── README.md └── main.go /main.trace: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/felixge/go-cpu-utilization/HEAD/main.trace -------------------------------------------------------------------------------- /trace.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/felixge/go-cpu-utilization/HEAD/trace.png -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/felixge/go-cpu-utilization 2 | 3 | go 1.19 4 | 5 | require ( 6 | github.com/davecgh/go-spew v1.1.1 // indirect 7 | github.com/pmezard/go-difflib v1.0.0 // indirect 8 | github.com/stretchr/objx v0.5.0 // indirect 9 | github.com/stretchr/testify v1.8.1 // indirect 10 | gopkg.in/yaml.v3 v3.0.1 // indirect 11 | ) 12 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 2 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 3 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 4 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 5 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 6 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 7 | github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= 8 | github.com/stretchr/objx v0.5.0 h1:1zr/of2m5FGMsad5YfcqgdqdWrIhu+EBEJRhR1U7z/c= 9 | github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= 10 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 11 | github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= 12 | github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= 13 | github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= 14 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 15 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 16 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 17 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 18 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # go-cpu-utilization 2 | 3 | This repository contains a naive attempt at measuring the CPU utilization of a 4 | Go program and running a background workload when the utilization falls below a 5 | certain threshold. 6 | 7 | This is done by using [getrusage(2)](https://man7.org/linux/man-pages/man2/getrusage.2.html) 8 | to measure the CPU time of the process. 9 | 10 | ## Results 11 | 12 | The example program runs a foreground workload that has 2 goroutines running a 13 | loop that burns CPU time for a second, followed by a one second sleep. 14 | 15 | Additionally there is a background workload that kicks in whenever the CPU 16 | utilization of the previous 100ms period falls below 1.5 CPU cores and stops 17 | when it exceeds this threshold again. 18 | 19 | Example output: 20 | 21 | ``` 22 | $ go run . 23 | 2022/12/17 22:28:24 starting background work: cores=0.09 < 1.50 24 | 2022/12/17 22:28:25 stopping background work: cores: 3.05 > 1.50 25 | 2022/12/17 22:28:26 starting background work: cores=0.03 < 1.50 26 | 2022/12/17 22:28:27 stopping background work: cores: 2.98 > 1.50 27 | 2022/12/17 22:28:28 starting background work: cores=0.08 < 1.50 28 | ``` 29 | 30 | Example trace: 31 | 32 | ![](./trace.png) 33 | 34 | ## Discussion 35 | 36 | The result seems promising, but it's also showing the challenges of building 37 | such a control loop against a lagging indicator. The background workload does a 38 | good job of not kicking in while the foreground workload is running, but takes 39 | up to 100ms to stop when the foreground workload comes back. 40 | 41 | But unless the Go runtime introduces the concept of priorities into the 42 | scheduler, users may have to resort to this or [similar](https://www.cockroachlabs.com/blog/rubbing-control-theory/) workarounds. 43 | 44 | Future study is needed to determine practical limits for the cpu utilization 45 | measurement period. Theoretically the cost of a syscall (a few usec?) should be 46 | the absolute lower bound. Practically speaking the jiffy interval (usually 4ms) 47 | of the kernel may result in a higher floor. 48 | 49 | Disclaimer: The above may be partially or entirely wrong. My knowledge of the 50 | involved components and underlaying scheduling theory is very limited and I've 51 | never used the presented mechanism in a production system. 52 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "log" 5 | "os" 6 | "runtime/trace" 7 | "syscall" 8 | "time" 9 | ) 10 | 11 | func main() { 12 | // take a runtime trace to evaluate the behavior 13 | file, err := os.Create("main.trace") 14 | if err != nil { 15 | panic(err) 16 | } else if err := trace.Start(file); err != nil { 17 | panic(err) 18 | } 19 | defer trace.Stop() 20 | 21 | for i := 0; i < 2; i++ { 22 | go foregroundWork(time.Second) 23 | } 24 | go backgroundWork() 25 | time.Sleep(6 * time.Second) 26 | } 27 | 28 | // foregroundWork runs a loop that does CPU heavy work for one period followed 29 | // by one period of sleep. 30 | func foregroundWork(period time.Duration) { 31 | timer := time.After(period) 32 | for { 33 | select { 34 | case <-timer: 35 | time.Sleep(period) 36 | timer = time.After(period) 37 | default: 38 | _ = "burn cpu cycles" 39 | } 40 | } 41 | } 42 | 43 | func backgroundWork() { 44 | period := 100 * time.Millisecond 45 | u := NewCPUUtilization(period) 46 | threshold := float64(1.5) 47 | 48 | for { 49 | cores := <-u.C 50 | if cores > threshold { 51 | continue 52 | } 53 | log.Printf("starting background work: cores=%.2f < %.2f", cores, threshold) 54 | 55 | workLoop: 56 | for { 57 | select { 58 | case cores := <-u.C: 59 | if cores > threshold { 60 | log.Printf("stopping background work: cores: %.2f > %.2f", cores, threshold) 61 | break workLoop 62 | } 63 | default: 64 | _ = "do a small amount of CPU work here" 65 | } 66 | } 67 | } 68 | } 69 | 70 | func NewCPUUtilization(period time.Duration) *CPUUtilization { 71 | c := &CPUUtilization{ 72 | C: make(chan float64, 1), 73 | stop: make(chan struct{}), 74 | } 75 | go c.measure(period) 76 | return c 77 | } 78 | 79 | type CPUUtilization struct { 80 | C chan float64 81 | stop chan struct{} 82 | } 83 | 84 | func (c *CPUUtilization) measure(period time.Duration) { 85 | var before syscall.Rusage 86 | var after syscall.Rusage 87 | t := time.NewTicker(period) 88 | defer t.Stop() 89 | for { 90 | start := time.Now() 91 | beforeErr := syscall.Getrusage(syscall.RUSAGE_SELF, &before) 92 | select { 93 | case <-t.C: 94 | case <-c.stop: 95 | return 96 | } 97 | afterErr := syscall.Getrusage(syscall.RUSAGE_SELF, &after) 98 | 99 | var cores float64 100 | if beforeErr != nil || afterErr != nil { 101 | cores = -1 // should be impossible according to getrusage(2) docs, but let's handle it 102 | } else { 103 | cpuNano := after.Utime.Nano() + after.Stime.Nano() - before.Utime.Nano() - before.Stime.Nano() 104 | cores = float64(cpuNano) / float64(time.Since(start)) 105 | } 106 | 107 | select { 108 | case c.C <- cores: 109 | case <-c.stop: 110 | return 111 | default: 112 | continue 113 | } 114 | } 115 | } 116 | 117 | // Stop stops the cpu utilization measurement. 118 | func (c *CPUUtilization) Stop() { 119 | select { 120 | case <-c.stop: 121 | return 122 | default: 123 | close(c.stop) 124 | } 125 | } 126 | --------------------------------------------------------------------------------