├── .circleci └── config.yml ├── .dockerignore ├── Dockerfile.dlv ├── Dockerfile.test ├── LICENSE-APACHE ├── LICENSE-MIT ├── Makefile ├── README.md ├── adaptive.go ├── adaptive_test.go ├── doc.go ├── go.mod ├── go.sum ├── log.go ├── notification.go ├── watchdog.go ├── watchdog_linux.go ├── watchdog_linux_test.go ├── watchdog_other.go ├── watchdog_other_test.go ├── watchdog_test.go ├── watermarks.go └── watermarks_test.go /.circleci/config.yml: -------------------------------------------------------------------------------- 1 | # Golang CircleCI 2.0 configuration file 2 | version: 2.1 3 | 4 | parameters: 5 | go-version: 6 | type: string 7 | default: "1.15.5" 8 | workspace-dir: 9 | type: string 10 | default: "/home/circleci" 11 | 12 | commands: 13 | setup: 14 | description: "install go, checkout and restore cache" 15 | steps: 16 | - run: 17 | name: "install go" 18 | command: | 19 | curl --create-dirs -o $GOPATH/go.tar.gz https://dl.google.com/go/go${GOVERSION}.linux-amd64.tar.gz 20 | tar --strip-components=1 -C $GOPATH -xzf $GOPATH/go.tar.gz 21 | rm -rf $GOPATH/go.tar.gz 22 | - checkout 23 | - restore_cache: 24 | keys: 25 | - 'v2-pkg-cache-{{ checksum "go.sum" }}-{{ .Environment.GOVERSION }}' 26 | - 'bin-cache-{{ .Branch }}' 27 | setup-macos: 28 | description: "install go, checkout and restore cache" 29 | steps: 30 | - run: 31 | name: "install go on macOS" 32 | command: | 33 | brew --version 34 | [ ! -d /usr/local/opt/go@1.14 ] && brew update && brew install go@1.14 && echo "done installing go" 35 | echo 'export GOPATH="$HOME/go"' >> $BASH_ENV 36 | echo 'export PATH="/usr/local/opt/go@1.14/bin:$GOPATH/bin:$PATH"' >> $BASH_ENV 37 | source $BASH_ENV 38 | go version 39 | - checkout 40 | 41 | executors: 42 | linux: 43 | machine: 44 | image: ubuntu-1604:201903-01 45 | working_directory: << pipeline.parameters.workspace-dir >>/project 46 | environment: 47 | GOPATH: << pipeline.parameters.workspace-dir >>/go/<< pipeline.parameters.go-version >> 48 | PATH: << pipeline.parameters.workspace-dir >>/go/<< pipeline.parameters.go-version >>/bin:<< pipeline.parameters.workspace-dir >>/bin:/usr/local/bin:/usr/bin:/bin 49 | GOVERSION: << pipeline.parameters.go-version >> 50 | 51 | jobs: 52 | build: 53 | executor: linux 54 | steps: 55 | - checkout 56 | - setup 57 | - run: make 58 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | Makefile -------------------------------------------------------------------------------- /Dockerfile.dlv: -------------------------------------------------------------------------------- 1 | ## This Dockerfile compiles the watchdog with delve support. It enables the tests 2 | ## to be debugged inside a container. 3 | ## 4 | ## Run with: 5 | ## docker run --memory=64MiB --memory-swap=64MiB -p 2345:2345 \ 6 | ## --listen=:2345 --headless=true --log=true \ 7 | ## --log-output=debugger,debuglineerr,gdbwire,lldbout,rpc \ 8 | ## --accept-multiclient --api-version=2 exec /root/watchdog.test 9 | ## 10 | FROM golang:1.15.5 11 | WORKDIR /watchdog 12 | COPY . . 13 | RUN CGO_ENABLED=0 go get -ldflags "-s -w -extldflags '-static'" github.com/go-delve/delve/cmd/dlv 14 | RUN CGO_ENABLED=0 go test -gcflags "all=-N -l" -c -o ./watchdog.test 15 | 16 | FROM alpine:latest 17 | RUN apk --no-cache add ca-certificates 18 | WORKDIR /root/ 19 | COPY --from=0 /go/bin/dlv /dlv 20 | COPY --from=0 /watchdog/watchdog.test . 21 | ENTRYPOINT [ "/dlv" ] 22 | EXPOSE 2345 -------------------------------------------------------------------------------- /Dockerfile.test: -------------------------------------------------------------------------------- 1 | FROM golang:1.15.5 2 | WORKDIR /watchdog 3 | COPY . . 4 | RUN CGO_ENABLED=0 GOOS=linux go test -c -o watchdog.test 5 | 6 | FROM alpine:latest 7 | RUN apk --no-cache add ca-certificates 8 | WORKDIR /root/ 9 | COPY --from=0 /watchdog/watchdog.test . 10 | CMD ["/root/watchdog.test", "-test.v"] 11 | -------------------------------------------------------------------------------- /LICENSE-APACHE: -------------------------------------------------------------------------------- 1 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at 2 | 3 | http://www.apache.org/licenses/LICENSE-2.0 4 | 5 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. 6 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | SHELL = /bin/bash 2 | 3 | .PHONY: test 4 | 5 | # these tests run in isolation by calling go test -run=... or the equivalent. 6 | ISOLATED_TESTS += 7 | ifdef CI 8 | ISOLATED_TESTS = TestControl_Isolated \ 9 | TestSystemDriven_Isolated \ 10 | TestHeapDriven_Isolated 11 | else 12 | ISOLATED_TESTS = TestControl_Isolated \ 13 | TestSystemDriven_Isolated \ 14 | TestHeapDriven_Isolated \ 15 | TestCgroupsDriven_Create_Isolated \ 16 | TestCgroupsDriven_Docker_Isolated 17 | endif 18 | 19 | test: test-binary test-docker 20 | 21 | test-binary: 22 | go test -v ./... # run all the non-isolated tests. 23 | # foreach does not actually execute each iteration; it expands the text, and it's executed all at once 24 | # that's why we use && true, to shorcircuit if a test fails. 25 | $(foreach name,$(ISOLATED_TESTS),TEST_ISOLATED=1 go test -v -test.run=$(name) ./... && ) true 26 | 27 | test-docker: docker 28 | docker run --memory=32MiB --memory-swap=32MiB -e TEST_DOCKER_MEMLIMIT=33554432 raulk/watchdog:latest 29 | $(foreach name,$(ISOLATED_TESTS),docker run \ 30 | --memory=32MiB --memory-swap=32MiB \ 31 | -e TEST_ISOLATED=1 \ 32 | -e TEST_DOCKER_MEMLIMIT=33554432 \ 33 | raulk/watchdog:latest /root/watchdog.test -test.v -test.run=$(name) ./... && ) true 34 | 35 | docker: 36 | docker build -f ./Dockerfile.test -t raulk/watchdog:latest . 37 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Go memory watchdog 2 | 3 | > 🐺 A library to curb OOMs by running Go GC according to a user-defined policy. 4 | 5 | [![godocs](https://img.shields.io/badge/godoc-reference-5272B4.svg?style=flat-square)](https://godoc.org/github.com/raulk/go-watchdog) 6 | [![build status](https://circleci.com/gh/raulk/go-watchdog.svg?style=svg)](https://circleci.com/gh/raulk/go-watchdog) 7 | 8 | Package watchdog runs a singleton memory watchdog in the process, which 9 | watches memory utilization and forces Go GC in accordance with a 10 | user-defined policy. 11 | 12 | There three kinds of watchdogs: 13 | 14 | 1. heap-driven (`watchdog.HeapDriven()`): applies a heap limit, adjusting GOGC 15 | dynamically in accordance with the policy. 16 | 2. system-driven (`watchdog.SystemDriven()`): applies a limit to the total 17 | system memory used, obtaining the current usage through elastic/go-sigar. 18 | 3. cgroups-driven (`watchdog.CgroupDriven()`): discovers the memory limit from 19 | the cgroup of the process (derived from /proc/self/cgroup), or from the 20 | root cgroup path if the PID == 1 (which indicates that the process is 21 | running in a container). It uses the cgroup stats to obtain the 22 | current usage. 23 | 24 | The watchdog's behaviour is controlled by the policy, a pluggable function 25 | that determines when to trigger GC based on the current utilization. This 26 | library ships with two policies: 27 | 28 | 1. watermarks policy (`watchdog.NewWatermarkPolicy()`): runs GC at configured 29 | watermarks of memory utilisation. 30 | 2. adaptive policy (`watchdog.NewAdaptivePolicy()`): runs GC when the current 31 | usage surpasses a dynamically-set threshold. 32 | 33 | You can easily write a custom policy tailored to the allocation patterns of 34 | your program. 35 | 36 | ## Recommended way to set up the watchdog 37 | 38 | The recommended way to set up the watchdog is as follows, in descending order 39 | of precedence. This logic assumes that the library supports setting a heap 40 | limit through an environment variable (e.g. MYAPP_HEAP_MAX) or config key. 41 | 42 | 1. If heap limit is set and legal, initialize a heap-driven watchdog. 43 | 2. Otherwise, try to use the cgroup-driven watchdog. If it succeeds, return. 44 | 3. Otherwise, try to initialize a system-driven watchdog. If it succeeds, return. 45 | 4. Watchdog initialization failed. Log a warning to inform the user that 46 | they're flying solo. 47 | 48 | ## Running the tests 49 | 50 | Given the low-level nature of this component, some tests need to run in 51 | isolation, so that they don't carry over Go runtime metrics. For completeness, 52 | this module uses a Docker image for testing, so we can simulate cgroup memory 53 | limits. 54 | 55 | The test execution and docker builds have been conveniently packaged in a 56 | Makefile. Run with: 57 | 58 | ```shell 59 | $ make 60 | ``` 61 | 62 | ## Why is this even needed? 63 | 64 | The garbage collector that ships with the go runtime is pretty good in some 65 | regards (low-latency, negligible no stop-the-world), but it's insatisfactory in 66 | a number of situations that yield ill-fated outcomes: 67 | 68 | 1. it is incapable of dealing with bursty/spiky allocations efficiently; 69 | depending on the workload, the program may OOM as a consequence of not 70 | scheduling GC in a timely manner. 71 | 2. part of the above is due to the fact that go doesn't concern itself with any 72 | limits. To date, it is not possible to set a maximum heap size. 73 | 2. its default policy of scheduling GC when the heap doubles, coupled with its 74 | ignorance of system or process limits, can easily cause it to OOM. 75 | 76 | For more information, check out these GitHub issues: 77 | 78 | * https://github.com/golang/go/issues/42805 79 | * https://github.com/golang/go/issues/42430 80 | * https://github.com/golang/go/issues/14735 81 | * https://github.com/golang/go/issues/16843 82 | * https://github.com/golang/go/issues/10064 83 | * https://github.com/golang/go/issues/9849 84 | 85 | ## License 86 | 87 | Dual-licensed: [MIT](./LICENSE-MIT), [Apache Software License v2](./LICENSE-APACHE), by way of the 88 | [Permissive License Stack](https://protocol.ai/blog/announcing-the-permissive-license-stack/). 89 | -------------------------------------------------------------------------------- /adaptive.go: -------------------------------------------------------------------------------- 1 | package watchdog 2 | 3 | // NewAdaptivePolicy creates a policy that forces GC when the usage surpasses a 4 | // user-configured percentage (factor) of the available memory. 5 | // 6 | // This policy recalculates the next target as usage+(limit-usage)*factor. 7 | func NewAdaptivePolicy(factor float64) PolicyCtor { 8 | return func(limit uint64) (Policy, error) { 9 | return &adaptivePolicy{ 10 | factor: factor, 11 | limit: limit, 12 | }, nil 13 | } 14 | } 15 | 16 | type adaptivePolicy struct { 17 | factor float64 18 | limit uint64 19 | } 20 | 21 | var _ Policy = (*adaptivePolicy)(nil) 22 | 23 | func (p *adaptivePolicy) Evaluate(_ UtilizationType, used uint64) (next uint64) { 24 | if used >= p.limit { 25 | return used 26 | } 27 | 28 | available := float64(p.limit) - float64(used) 29 | next = used + uint64(available*p.factor) 30 | return next 31 | } 32 | -------------------------------------------------------------------------------- /adaptive_test.go: -------------------------------------------------------------------------------- 1 | package watchdog 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/benbjohnson/clock" 7 | "github.com/stretchr/testify/require" 8 | ) 9 | 10 | func TestAdaptivePolicy(t *testing.T) { 11 | clk := clock.NewMock() 12 | Clock = clk 13 | 14 | p, err := NewAdaptivePolicy(0.5)(limit64MiB) 15 | require.NoError(t, err) 16 | 17 | // at zero; next = 50%. 18 | next := p.Evaluate(UtilizationSystem, 0) 19 | require.EqualValues(t, limit64MiB/2, next) 20 | 21 | // at half; next = 75%. 22 | next = p.Evaluate(UtilizationSystem, limit64MiB/2) 23 | require.EqualValues(t, 3*(limit64MiB/4), next) 24 | 25 | // at limit. 26 | next = p.Evaluate(UtilizationSystem, limit64MiB) 27 | require.EqualValues(t, limit64MiB, next) 28 | } 29 | -------------------------------------------------------------------------------- /doc.go: -------------------------------------------------------------------------------- 1 | // Package watchdog runs a singleton memory watchdog in the process, which 2 | // watches memory utilization and forces Go GC in accordance with a 3 | // user-defined policy. 4 | // 5 | // There three kinds of watchdogs: 6 | // 7 | // 1. heap-driven (watchdog.HeapDriven()): applies a heap limit, adjusting GOGC 8 | // dynamically in accordance with the policy. 9 | // 2. system-driven (watchdog.SystemDriven()): applies a limit to the total 10 | // system memory used, obtaining the current usage through elastic/go-sigar. 11 | // 3. cgroups-driven (watchdog.CgroupDriven()): discovers the memory limit from 12 | // the cgroup of the process (derived from /proc/self/cgroup), or from the 13 | // root cgroup path if the PID == 1 (which indicates that the process is 14 | // running in a container). It uses the cgroup stats to obtain the 15 | // current usage. 16 | // 17 | // The watchdog's behaviour is controlled by the policy, a pluggable function 18 | // that determines when to trigger GC based on the current utilization. This 19 | // library ships with two policies: 20 | // 21 | // 1. watermarks policy (watchdog.NewWatermarkPolicy()): runs GC at configured 22 | // watermarks of memory utilisation. 23 | // 2. adaptive policy (watchdog.NewAdaptivePolicy()): runs GC when the current 24 | // usage surpasses a dynamically-set threshold. 25 | // 26 | // You can easily write a custom policy tailored to the allocation patterns of 27 | // your program. 28 | // 29 | // Recommended way to set up the watchdog 30 | // 31 | // The recommended way to set up the watchdog is as follows, in descending order 32 | // of precedence. This logic assumes that the library supports setting a heap 33 | // limit through an environment variable (e.g. MYAPP_HEAP_MAX) or config key. 34 | // 35 | // 1. If heap limit is set and legal, initialize a heap-driven watchdog. 36 | // 2. Otherwise, try to use the cgroup-driven watchdog. If it succeeds, return. 37 | // 3. Otherwise, try to initialize a system-driven watchdog. If it succeeds, return. 38 | // 4. Watchdog initialization failed. Log a warning to inform the user that 39 | // they're flying solo. 40 | package watchdog 41 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/raulk/go-watchdog 2 | 3 | go 1.15 4 | 5 | require ( 6 | github.com/benbjohnson/clock v1.3.0 7 | github.com/containerd/cgroups v0.0.0-20201119153540-4cbc285b3327 8 | github.com/elastic/gosigar v0.12.0 9 | github.com/kr/pretty v0.1.0 // indirect 10 | github.com/opencontainers/runtime-spec v1.0.2 11 | github.com/stretchr/testify v1.4.0 12 | gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 // indirect 13 | ) 14 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= 2 | github.com/benbjohnson/clock v1.3.0 h1:ip6w0uFQkncKQ979AypyG0ER7mqUSBdKLOgAle/AT8A= 3 | github.com/benbjohnson/clock v1.3.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= 4 | github.com/cilium/ebpf v0.2.0/go.mod h1:To2CFviqOWL/M0gIMsvSMlqe7em/l1ALkX1PyjrX2Qs= 5 | github.com/containerd/cgroups v0.0.0-20201119153540-4cbc285b3327 h1:7grrpcfCtbZLsjtB0DgMuzs1umsJmpzaHMZ6cO6iAWw= 6 | github.com/containerd/cgroups v0.0.0-20201119153540-4cbc285b3327/go.mod h1:ZJeTFisyysqgcCdecO57Dj79RfL0LNeGiFUqLYQRYLE= 7 | github.com/coreos/go-systemd/v22 v22.1.0 h1:kq/SbG2BCKLkDKkjQf5OWwKWUKj1lgs3lFI4PxnR5lg= 8 | github.com/coreos/go-systemd/v22 v22.1.0/go.mod h1:xO0FLkIi5MaZafQlIrOotqXZ90ih+1atmu1JpKERPPk= 9 | github.com/cpuguy83/go-md2man/v2 v2.0.0-20190314233015-f79a8a8ca69d/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= 10 | github.com/cpuguy83/go-md2man/v2 v2.0.0/go.mod h1:maD7wRr/U5Z6m/iR4s+kqSMx2CaBsrgA7czyZG/E6dU= 11 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 12 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 13 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 14 | github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw= 15 | github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= 16 | github.com/elastic/gosigar v0.12.0 h1:AsdhYCJlTudhfOYQyFNgx+fIVTfrDO0V1ST0vHgiapU= 17 | github.com/elastic/gosigar v0.12.0/go.mod h1:iXRIGg2tLnu7LBdpqzyQfGDEidKCfWcCMS0WKyPWoMs= 18 | github.com/godbus/dbus/v5 v5.0.3 h1:ZqHaoEF7TBzh4jzPmqVhE/5A1z9of6orkAe5uHoAeME= 19 | github.com/godbus/dbus/v5 v5.0.3/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= 20 | github.com/gogo/protobuf v1.3.1 h1:DqDEcV5aeaTmdFBePNpYsp3FlcVH/2ISVVM9Qf8PSls= 21 | github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXPKa29o= 22 | github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 23 | github.com/kisielk/errcheck v1.2.0/go.mod h1:/BMXB+zMLi60iA8Vv6Ksmxu/1UDYcXs4uQLJ+jE2L00= 24 | github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= 25 | github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= 26 | github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= 27 | github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= 28 | github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= 29 | github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= 30 | github.com/opencontainers/runtime-spec v1.0.2 h1:UfAcuLBJB9Coz72x1hgl8O5RVzTdNiaglX6v2DM6FI0= 31 | github.com/opencontainers/runtime-spec v1.0.2/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= 32 | github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= 33 | github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= 34 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 35 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 36 | github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= 37 | github.com/shurcooL/sanitized_anchor_name v1.0.0/go.mod h1:1NzhyTcUVG4SuEtjjoZeVRXNmyL/1OwPU0+IJeTBvfc= 38 | github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= 39 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 40 | github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= 41 | github.com/stretchr/testify v1.4.0 h1:2E4SXV/wtOkTonXsotYi4li6zVWxYlZuYNCXe9XRJyk= 42 | github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= 43 | github.com/urfave/cli v1.22.2/go.mod h1:Gos4lmkARVdJ6EkW0WaNv/tZAAMe9V7XWyB60NtXRu0= 44 | golang.org/x/sys v0.0.0-20180810173357-98c5dad5d1a0/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 45 | golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 46 | golang.org/x/sys v0.0.0-20200124204421-9fbb57f87de9 h1:1/DFK4b7JH8DmkqhUk48onnSfrPzImPoVxuomtbT2nk= 47 | golang.org/x/sys v0.0.0-20200124204421-9fbb57f87de9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 48 | golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 49 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 50 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 51 | gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= 52 | gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 53 | gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= 54 | gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= 55 | -------------------------------------------------------------------------------- /log.go: -------------------------------------------------------------------------------- 1 | package watchdog 2 | 3 | import "log" 4 | 5 | // logger is an interface to be implemented by custom loggers. 6 | type logger interface { 7 | Debugf(template string, args ...interface{}) 8 | Infof(template string, args ...interface{}) 9 | Warnf(template string, args ...interface{}) 10 | Errorf(template string, args ...interface{}) 11 | } 12 | 13 | var _ logger = (*stdlog)(nil) 14 | 15 | // stdlog is a logger that proxies to a standard log.logger. 16 | type stdlog struct { 17 | log *log.Logger 18 | debug bool 19 | } 20 | 21 | func (s *stdlog) Debugf(template string, args ...interface{}) { 22 | if !s.debug { 23 | return 24 | } 25 | s.log.Printf(template, args...) 26 | } 27 | 28 | func (s *stdlog) Infof(template string, args ...interface{}) { 29 | s.log.Printf(template, args...) 30 | } 31 | 32 | func (s *stdlog) Warnf(template string, args ...interface{}) { 33 | s.log.Printf(template, args...) 34 | } 35 | 36 | func (s *stdlog) Errorf(template string, args ...interface{}) { 37 | s.log.Printf(template, args...) 38 | } 39 | -------------------------------------------------------------------------------- /notification.go: -------------------------------------------------------------------------------- 1 | package watchdog 2 | 3 | import "sync" 4 | 5 | var ( 6 | gcNotifeeMutex sync.Mutex 7 | gcNotifees []notifeeEntry 8 | 9 | forcedGCNotifeeMutex sync.Mutex 10 | forcedGCNotifees []notifeeEntry 11 | ) 12 | 13 | // RegisterPostGCNotifee registers a function that is called every time a GC has happened, 14 | // both GC runs triggered by the Go runtime and by watchdog. 15 | // The unregister function returned can be used to unregister this notifee. 16 | func RegisterPostGCNotifee(f func()) (unregister func()) { 17 | gcNotifeeMutex.Lock() 18 | defer gcNotifeeMutex.Unlock() 19 | 20 | var id int 21 | if len(gcNotifees) > 0 { 22 | id = gcNotifees[len(gcNotifees)-1].id + 1 23 | } 24 | gcNotifees = append(gcNotifees, notifeeEntry{id: id, f: f}) 25 | 26 | return func() { 27 | gcNotifeeMutex.Lock() 28 | defer gcNotifeeMutex.Unlock() 29 | 30 | for i, entry := range gcNotifees { 31 | if entry.id == id { 32 | gcNotifees = append(gcNotifees[:i], gcNotifees[i+1:]...) 33 | } 34 | } 35 | } 36 | } 37 | 38 | func notifyGC() { 39 | if NotifyGC != nil { 40 | NotifyGC() 41 | } 42 | gcNotifeeMutex.Lock() 43 | defer gcNotifeeMutex.Unlock() 44 | for _, entry := range gcNotifees { 45 | entry.f() 46 | } 47 | } 48 | 49 | // RegisterPreGCNotifee registers a function that is called before watchdog triggers a GC run. 50 | // It is ONLY called when watchdog triggers a GC run, not when the Go runtime triggers it. 51 | // The unregister function returned can be used to unregister this notifee. 52 | func RegisterPreGCNotifee(f func()) (unregister func()) { 53 | forcedGCNotifeeMutex.Lock() 54 | defer forcedGCNotifeeMutex.Unlock() 55 | 56 | var id int 57 | if len(forcedGCNotifees) > 0 { 58 | id = forcedGCNotifees[len(forcedGCNotifees)-1].id + 1 59 | } 60 | forcedGCNotifees = append(forcedGCNotifees, notifeeEntry{id: id, f: f}) 61 | 62 | return func() { 63 | forcedGCNotifeeMutex.Lock() 64 | defer forcedGCNotifeeMutex.Unlock() 65 | 66 | for i, entry := range forcedGCNotifees { 67 | if entry.id == id { 68 | forcedGCNotifees = append(forcedGCNotifees[:i], forcedGCNotifees[i+1:]...) 69 | } 70 | } 71 | } 72 | } 73 | 74 | func notifyForcedGC() { 75 | forcedGCNotifeeMutex.Lock() 76 | defer forcedGCNotifeeMutex.Unlock() 77 | for _, entry := range forcedGCNotifees { 78 | entry.f() 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /watchdog.go: -------------------------------------------------------------------------------- 1 | package watchdog 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "log" 7 | "math" 8 | "os" 9 | "path/filepath" 10 | "runtime" 11 | "runtime/debug" 12 | "runtime/pprof" 13 | "sync" 14 | "time" 15 | 16 | "github.com/elastic/gosigar" 17 | "github.com/benbjohnson/clock" 18 | ) 19 | 20 | // ErrNotSupported is returned when the watchdog does not support the requested 21 | // run mode in the current OS/arch. 22 | var ErrNotSupported = errors.New("watchdog run mode not supported") 23 | 24 | // PolicyTempDisabled is a marker value for policies to signal that the policy 25 | // is temporarily disabled. Use it when all hope is lost to turn around from 26 | // significant memory pressure (such as when above an "extreme" watermark). 27 | const PolicyTempDisabled uint64 = math.MaxUint64 28 | 29 | // The watchdog is designed to be used as a singleton; global vars are OK for 30 | // that reason. 31 | var ( 32 | // Logger is the logger to use. If nil, it will default to a logger that 33 | // proxies to a standard logger using the "[watchdog]" prefix. 34 | Logger logger = &stdlog{log: log.New(log.Writer(), "[watchdog] ", log.LstdFlags|log.Lmsgprefix)} 35 | 36 | // Clock can be used to inject a mock clock for testing. 37 | Clock = clock.New() 38 | 39 | // ForcedGCFunc specifies the function to call when forced GC is necessary. 40 | // Its default value is runtime.GC, but it can be set to debug.FreeOSMemory 41 | // to force the release of memory to the OS. 42 | ForcedGCFunc = runtime.GC 43 | 44 | // NotifyGC, if non-nil, will be called when a GC has happened. 45 | // Deprecated: use RegisterPostGCNotifee instead. 46 | NotifyGC func() = func() {} 47 | 48 | // HeapProfileThreshold sets the utilization threshold that will trigger a 49 | // heap profile to be taken automatically. A zero value disables this feature. 50 | // By default, it is disabled. 51 | HeapProfileThreshold float64 52 | 53 | // HeapProfileMaxCaptures sets the maximum amount of heap profiles a process will generate. 54 | // This limits the amount of episodes that will be captured, in case the 55 | // utilization climbs repeatedly over the threshold. By default, it is 10. 56 | HeapProfileMaxCaptures = uint(10) 57 | 58 | // HeapProfileDir is the directory where the watchdog will write the heap profile. 59 | // It will be created if it doesn't exist upon initialization. An error when 60 | // creating the dir will not prevent heapdog initialization; it will just 61 | // disable the heap profile capture feature. If zero-valued, the feature is 62 | // disabled. 63 | // 64 | // HeapProfiles will be written to path /.heap. 65 | HeapProfileDir string 66 | ) 67 | 68 | var ( 69 | // ReadMemStats stops the world. But as of go1.9, it should only 70 | // take ~25µs to complete. 71 | // 72 | // Before go1.15, calls to ReadMemStats during an ongoing GC would 73 | // block due to the worldsema lock. As of go1.15, this was optimized 74 | // and the runtime holds on to worldsema less during GC (only during 75 | // sweep termination and mark termination). 76 | // 77 | // For users using go1.14 and earlier, if this call happens during 78 | // GC, it will just block for longer until serviced, but it will not 79 | // take longer in itself. No harm done. 80 | // 81 | // Actual benchmarks 82 | // ----------------- 83 | // 84 | // In Go 1.15.5, ReadMem with no ongoing GC takes ~27µs in a MBP 16 85 | // i9 busy with another million things. During GC, it takes an 86 | // average of less than 175µs per op. 87 | // 88 | // goos: darwin 89 | // goarch: amd64 90 | // pkg: github.com/filecoin-project/lotus/api 91 | // BenchmarkReadMemStats-16 44530 27523 ns/op 92 | // BenchmarkReadMemStats-16 43743 26879 ns/op 93 | // BenchmarkReadMemStats-16 45627 26791 ns/op 94 | // BenchmarkReadMemStats-16 44538 26219 ns/op 95 | // BenchmarkReadMemStats-16 44958 26757 ns/op 96 | // BenchmarkReadMemStatsWithGCContention-16 10 183733 p50-ns 211859 p90-ns 211859 p99-ns 97 | // BenchmarkReadMemStatsWithGCContention-16 7 198765 p50-ns 314873 p90-ns 314873 p99-ns 98 | // BenchmarkReadMemStatsWithGCContention-16 10 195151 p50-ns 311408 p90-ns 311408 p99-ns 99 | // BenchmarkReadMemStatsWithGCContention-16 10 217279 p50-ns 295308 p90-ns 295308 p99-ns 100 | // BenchmarkReadMemStatsWithGCContention-16 10 167054 p50-ns 327072 p90-ns 327072 p99-ns 101 | // PASS 102 | // 103 | // See: https://github.com/golang/go/issues/19812 104 | // See: https://github.com/prometheus/client_golang/issues/403 105 | memstatsFn = runtime.ReadMemStats 106 | sysmemFn = (*gosigar.Mem).Get 107 | ) 108 | 109 | type notifeeEntry struct { 110 | id int 111 | f func() 112 | } 113 | 114 | var ( 115 | // ErrAlreadyStarted is returned when the user tries to start the watchdog more than once. 116 | ErrAlreadyStarted = fmt.Errorf("singleton memory watchdog was already started") 117 | ) 118 | 119 | const ( 120 | // stateUnstarted represents an unstarted state. 121 | stateUnstarted int32 = iota 122 | // stateRunning represents an operational state. 123 | stateRunning 124 | ) 125 | 126 | // _watchdog is a global singleton watchdog. 127 | var _watchdog struct { 128 | lk sync.Mutex 129 | state int32 130 | 131 | scope UtilizationType 132 | 133 | hpleft uint // tracks the amount of heap profiles left. 134 | hpcurr bool // tracks whether a heap profile has already been taken for this episode. 135 | 136 | closing chan struct{} 137 | wg sync.WaitGroup 138 | } 139 | 140 | // UtilizationType is the utilization metric in use. 141 | type UtilizationType int 142 | 143 | const ( 144 | // UtilizationSystem specifies that the policy compares against actual used 145 | // system memory. 146 | UtilizationSystem UtilizationType = iota 147 | // UtilizationProcess specifies that the watchdog is using process limits. 148 | UtilizationProcess 149 | // UtilizationHeap specifies that the policy compares against heap used. 150 | UtilizationHeap 151 | ) 152 | 153 | // PolicyCtor is a policy constructor. 154 | type PolicyCtor func(limit uint64) (Policy, error) 155 | 156 | // Policy is polled by the watchdog to determine the next utilisation at which 157 | // a GC should be forced. 158 | type Policy interface { 159 | // Evaluate determines when the next GC should take place. It receives the 160 | // current usage, and it returns the next usage at which to trigger GC. 161 | Evaluate(scope UtilizationType, used uint64) (next uint64) 162 | } 163 | 164 | // HeapDriven starts a singleton heap-driven watchdog, which adjusts GOGC 165 | // dynamically after every GC, to honour the policy requirements. 166 | // 167 | // Providing a zero-valued limit will error. A minimum GOGC value is required, 168 | // so as to avoid overscheduling GC, and overfitting to a specific target. 169 | func HeapDriven(limit uint64, minGOGC int, policyCtor PolicyCtor) (err error, stopFn func()) { 170 | if limit == 0 { 171 | return fmt.Errorf("cannot use zero limit for heap-driven watchdog"), nil 172 | } 173 | 174 | policy, err := policyCtor(limit) 175 | if err != nil { 176 | return fmt.Errorf("failed to construct policy with limit %d: %w", limit, err), nil 177 | } 178 | 179 | if err := start(UtilizationHeap); err != nil { 180 | return err, nil 181 | } 182 | 183 | gcTriggered := make(chan struct{}, 16) 184 | setupGCSentinel(gcTriggered) 185 | 186 | _watchdog.wg.Add(1) 187 | go func() { 188 | defer _watchdog.wg.Done() 189 | defer wdrecover() // recover from panics. 190 | 191 | // get the initial effective GOGC; guess it's 100 (default), and restore 192 | // it to whatever it actually was. This works because SetGCPercent 193 | // returns the previous value. 194 | originalGOGC := debug.SetGCPercent(100) 195 | debug.SetGCPercent(originalGOGC) 196 | currGOGC := originalGOGC 197 | 198 | var memstats runtime.MemStats 199 | for { 200 | select { 201 | case <-gcTriggered: 202 | notifyGC() 203 | 204 | case <-_watchdog.closing: 205 | return 206 | } 207 | 208 | // recompute the next trigger. 209 | memstatsFn(&memstats) 210 | 211 | maybeCaptureHeapProfile(memstats.HeapAlloc, limit) 212 | 213 | // heapMarked is the amount of heap that was marked as live by GC. 214 | // it is inferred from our current GOGC and the new target picked. 215 | // 216 | // this accurately represents 217 | heapMarked := uint64(float64(memstats.NextGC) / (1 + float64(currGOGC)/100)) 218 | if heapMarked == 0 { 219 | // this shouldn't happen, but just in case; avoiding a div by 0. 220 | Logger.Warnf("heap-driven watchdog: inferred zero heap marked; skipping evaluation") 221 | continue 222 | } 223 | 224 | // evaluate the policy. 225 | next := policy.Evaluate(UtilizationHeap, memstats.HeapAlloc) 226 | 227 | // calculate how much to set GOGC to honour the next trigger point. 228 | // next=PolicyTempDisabled value would make currGOGC extremely high, 229 | // greater than originalGOGC, and therefore we'd restore originalGOGC. 230 | currGOGC = int(((float64(next) / float64(heapMarked)) - float64(1)) * 100) 231 | if currGOGC >= originalGOGC { 232 | Logger.Debugf("heap watchdog: requested GOGC percent higher than default; capping at default; requested: %d; default: %d", currGOGC, originalGOGC) 233 | currGOGC = originalGOGC 234 | } else { 235 | if currGOGC < minGOGC { 236 | currGOGC = minGOGC // cap GOGC to avoid overscheduling. 237 | } 238 | Logger.Debugf("heap watchdog: setting GOGC percent: %d", currGOGC) 239 | } 240 | 241 | debug.SetGCPercent(currGOGC) 242 | 243 | memstatsFn(&memstats) 244 | Logger.Infof("gc finished; heap watchdog stats: heap_alloc: %d, heap_marked: %d, next_gc: %d, policy_next_gc: %d, gogc: %d", 245 | memstats.HeapAlloc, heapMarked, memstats.NextGC, next, currGOGC) 246 | } 247 | }() 248 | 249 | return nil, stop 250 | } 251 | 252 | // SystemDriven starts a singleton system-driven watchdog. 253 | // 254 | // The system-driven watchdog keeps a threshold, above which GC will be forced. 255 | // The watchdog polls the system utilization at the specified frequency. When 256 | // the actual utilization exceeds the threshold, a GC is forced. 257 | // 258 | // This threshold is calculated by querying the policy every time that GC runs, 259 | // either triggered by the runtime, or forced by us. 260 | func SystemDriven(limit uint64, frequency time.Duration, policyCtor PolicyCtor) (err error, stopFn func()) { 261 | if limit == 0 { 262 | var sysmem gosigar.Mem 263 | if err := sysmemFn(&sysmem); err != nil { 264 | return fmt.Errorf("failed to get system memory stats: %w", err), nil 265 | } 266 | limit = sysmem.Total 267 | } 268 | 269 | policy, err := policyCtor(limit) 270 | if err != nil { 271 | return fmt.Errorf("failed to construct policy with limit %d: %w", limit, err), nil 272 | } 273 | 274 | if err := start(UtilizationSystem); err != nil { 275 | return err, nil 276 | } 277 | 278 | _watchdog.wg.Add(1) 279 | var sysmem gosigar.Mem 280 | go pollingWatchdog(policy, frequency, limit, func() (uint64, error) { 281 | if err := sysmemFn(&sysmem); err != nil { 282 | return 0, err 283 | } 284 | return sysmem.ActualUsed, nil 285 | }) 286 | 287 | return nil, stop 288 | } 289 | 290 | // pollingWatchdog starts a polling watchdog with the provided policy, using 291 | // the supplied polling frequency. On every tick, it calls usageFn and, if the 292 | // usage is greater or equal to the threshold at the time, it forces GC. 293 | // usageFn is guaranteed to be called serially, so no locking should be 294 | // necessary. 295 | func pollingWatchdog(policy Policy, frequency time.Duration, limit uint64, usageFn func() (uint64, error)) { 296 | defer _watchdog.wg.Done() 297 | defer wdrecover() // recover from panics. 298 | 299 | gcTriggered := make(chan struct{}, 16) 300 | setupGCSentinel(gcTriggered) 301 | 302 | var ( 303 | memstats runtime.MemStats 304 | threshold uint64 305 | ) 306 | 307 | renewThreshold := func() { 308 | // get the current usage. 309 | usage, err := usageFn() 310 | if err != nil { 311 | Logger.Warnf("failed to obtain memory utilization stats; err: %s", err) 312 | return 313 | } 314 | // calculate the threshold. 315 | threshold = policy.Evaluate(_watchdog.scope, usage) 316 | } 317 | 318 | // initialize the threshold. 319 | renewThreshold() 320 | 321 | // initialize an empty timer. 322 | timer := Clock.Timer(0) 323 | stopTimer := func() { 324 | if !timer.Stop() { 325 | <-timer.C 326 | } 327 | } 328 | 329 | for { 330 | timer.Reset(frequency) 331 | 332 | select { 333 | case <-timer.C: 334 | // get the current usage. 335 | usage, err := usageFn() 336 | if err != nil { 337 | Logger.Warnf("failed to obtain memory utilizationstats; err: %s", err) 338 | continue 339 | } 340 | 341 | // evaluate if a heap profile needs to be captured. 342 | maybeCaptureHeapProfile(usage, limit) 343 | 344 | if usage < threshold { 345 | // nothing to do. 346 | continue 347 | } 348 | // trigger GC; this will emit a gcTriggered event which we'll 349 | // consume next to readjust the threshold. 350 | Logger.Warnf("system-driven watchdog triggering GC; %d/%d bytes (used/threshold)", usage, threshold) 351 | forceGC(&memstats) 352 | 353 | case <-gcTriggered: 354 | notifyGC() 355 | 356 | renewThreshold() 357 | 358 | stopTimer() 359 | 360 | case <-_watchdog.closing: 361 | stopTimer() 362 | return 363 | } 364 | } 365 | } 366 | 367 | // forceGC forces a manual GC. 368 | func forceGC(memstats *runtime.MemStats) { 369 | Logger.Infof("watchdog is forcing GC") 370 | 371 | startNotify := time.Now() 372 | notifyForcedGC() 373 | // it's safe to assume that the finalizer will attempt to run before 374 | // runtime.GC() returns because runtime.GC() waits for the sweep phase to 375 | // finish before returning. 376 | // finalizers are run in the sweep phase. 377 | start := time.Now() 378 | notificationsTook := start.Sub(startNotify) 379 | ForcedGCFunc() 380 | took := time.Since(start) 381 | 382 | memstatsFn(memstats) 383 | Logger.Infof("watchdog-triggered GC finished; notifications took: %s, took: %s; current heap allocated: %d bytes", notificationsTook, took, memstats.HeapAlloc) 384 | } 385 | 386 | func setupGCSentinel(gcTriggered chan struct{}) { 387 | logger := Logger 388 | 389 | // this non-zero sized struct is used as a sentinel to detect when a GC 390 | // run has finished, by setting and resetting a finalizer on it. 391 | // it essentially creates a GC notification "flywheel"; every GC will 392 | // trigger this finalizer, which will reset itself so it gets notified 393 | // of the next GC, breaking the cycle when the watchdog is stopped. 394 | type sentinel struct{ a *int } 395 | var finalizer func(o *sentinel) 396 | finalizer = func(o *sentinel) { 397 | _watchdog.lk.Lock() 398 | defer _watchdog.lk.Unlock() 399 | 400 | if _watchdog.state != stateRunning { 401 | // this GC triggered after the watchdog was stopped; ignore 402 | // and do not reset the finalizer. 403 | return 404 | } 405 | 406 | // reset so it triggers on the next GC. 407 | runtime.SetFinalizer(o, finalizer) 408 | 409 | select { 410 | case gcTriggered <- struct{}{}: 411 | default: 412 | logger.Warnf("failed to queue gc trigger; channel backlogged") 413 | } 414 | } 415 | 416 | runtime.SetFinalizer(&sentinel{}, finalizer) // start the flywheel. 417 | } 418 | 419 | func start(scope UtilizationType) error { 420 | _watchdog.lk.Lock() 421 | defer _watchdog.lk.Unlock() 422 | 423 | if _watchdog.state != stateUnstarted { 424 | return ErrAlreadyStarted 425 | } 426 | 427 | _watchdog.state = stateRunning 428 | _watchdog.scope = scope 429 | _watchdog.closing = make(chan struct{}) 430 | 431 | initHeapProfileCapture() 432 | 433 | return nil 434 | } 435 | 436 | func stop() { 437 | _watchdog.lk.Lock() 438 | defer _watchdog.lk.Unlock() 439 | 440 | if _watchdog.state != stateRunning { 441 | return 442 | } 443 | 444 | close(_watchdog.closing) 445 | _watchdog.wg.Wait() 446 | _watchdog.state = stateUnstarted 447 | } 448 | 449 | func initHeapProfileCapture() { 450 | if HeapProfileDir == "" || HeapProfileThreshold <= 0 { 451 | Logger.Debugf("heap profile capture disabled") 452 | return 453 | } 454 | if HeapProfileThreshold >= 1 { 455 | Logger.Warnf("failed to initialize heap profile capture: threshold must be 0 < t < 1") 456 | return 457 | } 458 | if fi, err := os.Stat(HeapProfileDir); os.IsNotExist(err) { 459 | if err := os.MkdirAll(HeapProfileDir, 0777); err != nil { 460 | Logger.Warnf("failed to initialize heap profile capture: failed to create dir: %s; err: %s", HeapProfileDir, err) 461 | return 462 | } 463 | } else if err != nil { 464 | Logger.Warnf("failed to initialize heap profile capture: failed to stat path: %s; err: %s", HeapProfileDir, err) 465 | return 466 | } else if !fi.IsDir() { 467 | Logger.Warnf("failed to initialize heap profile capture: path exists but is not a directory: %s", HeapProfileDir) 468 | return 469 | } 470 | // all good, set the amount of heap profile captures left. 471 | _watchdog.hpleft = HeapProfileMaxCaptures 472 | Logger.Infof("initialized heap profile capture; threshold: %f; max captures: %d; dir: %s", HeapProfileThreshold, HeapProfileMaxCaptures, HeapProfileDir) 473 | } 474 | 475 | func maybeCaptureHeapProfile(usage, limit uint64) { 476 | if _watchdog.hpleft <= 0 { 477 | // nothing to do; no captures remaining (or captures disabled), or 478 | // already captured a heap profile for this episode. 479 | return 480 | } 481 | if float64(usage)/float64(limit) < HeapProfileThreshold { 482 | // we are below the threshold, reset the hpcurr flag. 483 | _watchdog.hpcurr = false 484 | return 485 | } 486 | // we are above the threshold. 487 | if _watchdog.hpcurr { 488 | return // we've already captured this episode, skip. 489 | } 490 | 491 | path := filepath.Join(HeapProfileDir, time.Now().Format(time.RFC3339Nano)+".heap") 492 | file, err := os.Create(path) 493 | if err != nil { 494 | Logger.Warnf("failed to create heap profile file; path: %s; err: %s", path, err) 495 | return 496 | } 497 | defer file.Close() 498 | 499 | if err = pprof.WriteHeapProfile(file); err != nil { 500 | Logger.Warnf("failed to write heap profile; path: %s; err: %s", path, err) 501 | return 502 | } 503 | 504 | Logger.Infof("heap profile captured; path: %s", path) 505 | _watchdog.hpcurr = true 506 | _watchdog.hpleft-- 507 | } 508 | 509 | func wdrecover() { 510 | if r := recover(); r != nil { 511 | msg := fmt.Sprintf("WATCHDOG PANICKED; recovered but watchdog is disarmed: %s", r) 512 | if Logger != nil { 513 | Logger.Errorf(msg) 514 | } else { 515 | _, _ = fmt.Fprintln(os.Stderr, msg) 516 | } 517 | } 518 | } 519 | -------------------------------------------------------------------------------- /watchdog_linux.go: -------------------------------------------------------------------------------- 1 | package watchdog 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "time" 7 | 8 | "github.com/containerd/cgroups" 9 | ) 10 | 11 | var ( 12 | pid = os.Getpid() 13 | memSubsystem = cgroups.SingleSubsystem(cgroups.V1, cgroups.Memory) 14 | ) 15 | 16 | // CgroupDriven initializes a cgroups-driven watchdog. It will try to discover 17 | // the memory limit from the cgroup of the process (derived from /proc/self/cgroup), 18 | // or from the root cgroup path if the PID == 1 (which indicates that the process 19 | // is running in a container). 20 | // 21 | // Memory usage is calculated by querying the cgroup stats. 22 | // 23 | // This function will return an error immediately if the OS does not support cgroups, 24 | // or if another error occurs during initialization. The caller can then safely fall 25 | // back to the system driven watchdog. 26 | func CgroupDriven(frequency time.Duration, policyCtor PolicyCtor) (err error, stopFn func()) { 27 | // use self path unless our PID is 1, in which case we're running inside 28 | // a container and our limits are in the root path. 29 | path := cgroups.NestedPath("") 30 | if pid := os.Getpid(); pid == 1 { 31 | path = cgroups.RootPath 32 | } 33 | 34 | cgroup, err := cgroups.Load(memSubsystem, path) 35 | if err != nil { 36 | return fmt.Errorf("failed to load cgroup for process: %w", err), nil 37 | } 38 | 39 | var limit uint64 40 | if stat, err := cgroup.Stat(); err != nil { 41 | return fmt.Errorf("failed to load memory cgroup stats: %w", err), nil 42 | } else if stat.Memory == nil || stat.Memory.Usage == nil { 43 | return fmt.Errorf("cgroup memory stats are nil; aborting"), nil 44 | } else { 45 | limit = stat.Memory.Usage.Limit 46 | } 47 | 48 | if limit == 0 { 49 | return fmt.Errorf("cgroup limit is 0; refusing to start memory watchdog"), nil 50 | } 51 | 52 | policy, err := policyCtor(limit) 53 | if err != nil { 54 | return fmt.Errorf("failed to construct policy with limit %d: %w", limit, err), nil 55 | } 56 | 57 | if err := start(UtilizationProcess); err != nil { 58 | return err, nil 59 | } 60 | 61 | _watchdog.wg.Add(1) 62 | go pollingWatchdog(policy, frequency, limit, func() (uint64, error) { 63 | stat, err := cgroup.Stat() 64 | if err != nil { 65 | return 0, err 66 | } else if stat.Memory == nil || stat.Memory.Usage == nil { 67 | return 0, fmt.Errorf("cgroup memory stats are nil; aborting") 68 | } 69 | return stat.Memory.Usage.Usage, nil 70 | }) 71 | 72 | return nil, stop 73 | } 74 | -------------------------------------------------------------------------------- /watchdog_linux_test.go: -------------------------------------------------------------------------------- 1 | package watchdog 2 | 3 | import ( 4 | "fmt" 5 | "log" 6 | "os" 7 | "runtime" 8 | "runtime/debug" 9 | "testing" 10 | "time" 11 | 12 | "github.com/containerd/cgroups" 13 | "github.com/opencontainers/runtime-spec/specs-go" 14 | "github.com/benbjohnson/clock" 15 | "github.com/stretchr/testify/require" 16 | ) 17 | 18 | // retained will hoard unreclaimable byte buffers in the heap. 19 | var retained [][]byte 20 | 21 | func TestCgroupsDriven_Create_Isolated(t *testing.T) { 22 | skipIfNotIsolated(t) 23 | 24 | if os.Getpid() == 1 { 25 | // we are running in Docker and cannot create a cgroup. 26 | t.Skipf("cannot create a cgroup while running in non-privileged docker") 27 | } 28 | 29 | // new cgroup limit. 30 | var limit = uint64(32 << 20) // 32MiB. 31 | createMemoryCgroup(t, limit) 32 | 33 | testCgroupsWatchdog(t, limit) 34 | } 35 | 36 | func TestCgroupsDriven_Docker_Isolated(t *testing.T) { 37 | skipIfNotIsolated(t) 38 | 39 | if os.Getpid() != 1 { 40 | // we are not running in a container. 41 | t.Skipf("test only runs inside a container") 42 | } 43 | 44 | testCgroupsWatchdog(t, uint64(DockerMemLimit)) 45 | } 46 | 47 | func testCgroupsWatchdog(t *testing.T, limit uint64) { 48 | t.Cleanup(func() { 49 | retained = nil 50 | }) 51 | 52 | runtime.GC() // first GC to clear any junk from other tests. 53 | debug.SetGCPercent(100000000) // disable GC. 54 | 55 | clk := clock.NewMock() 56 | Clock = clk 57 | 58 | notifyCh := make(chan struct{}, 1) 59 | NotifyGC = func() { 60 | notifyCh <- struct{}{} 61 | } 62 | 63 | err, stopFn := CgroupDriven(5*time.Second, NewAdaptivePolicy(0.5)) 64 | require.NoError(t, err) 65 | defer stopFn() 66 | 67 | time.Sleep(200 * time.Millisecond) // give time for the watchdog to init. 68 | 69 | maxSlabs := limit / (1 << 20) // number of 1MiB slabs to take up the entire limit. 70 | 71 | // first tick; nothing should happen. 72 | clk.Add(5 * time.Second) 73 | time.Sleep(200 * time.Millisecond) 74 | require.Len(t, notifyCh, 0) // no GC has taken place. 75 | 76 | // allocate 50% of limit in heap (to be added to other mem usage). 77 | for i := 0; i < (int(maxSlabs))/2; i++ { 78 | retained = append(retained, func() []byte { 79 | b := make([]byte, 1*1024*1024) 80 | for i := range b { 81 | b[i] = 0xff 82 | } 83 | return b 84 | }()) 85 | } 86 | 87 | // second tick; used = just over 50%; will trigger GC. 88 | clk.Add(5 * time.Second) 89 | time.Sleep(200 * time.Millisecond) 90 | require.NotNil(t, <-notifyCh) 91 | 92 | var memstats runtime.MemStats 93 | runtime.ReadMemStats(&memstats) 94 | require.EqualValues(t, 2, memstats.NumForcedGC) 95 | } 96 | 97 | // createMemoryCgroup creates a memory cgroup to restrict the memory available 98 | // to this test. 99 | func createMemoryCgroup(t *testing.T, limit uint64) { 100 | l := int64(limit) 101 | path := cgroups.NestedPath(fmt.Sprintf("/%d", time.Now().UnixNano())) 102 | cgroup, err := cgroups.New(cgroups.V1, path, &specs.LinuxResources{ 103 | Memory: &specs.LinuxMemory{ 104 | Limit: &l, 105 | Swap: &l, 106 | }, 107 | }) 108 | 109 | require.NoError(t, err, "failed to create a cgroup") 110 | t.Cleanup(func() { 111 | root, err := cgroups.Load(cgroups.V1, cgroups.RootPath) 112 | if err != nil { 113 | t.Logf("failed to resolve root cgroup: %s", err) 114 | return 115 | } 116 | if err = root.Add(cgroups.Process{Pid: pid}); err != nil { 117 | t.Logf("failed to move process to root cgroup: %s", err) 118 | return 119 | } 120 | if err = cgroup.Delete(); err != nil { 121 | t.Logf("failed to clean up temp cgroup: %s", err) 122 | } 123 | }) 124 | 125 | log.Printf("cgroup created") 126 | 127 | // add process to cgroup. 128 | err = cgroup.Add(cgroups.Process{Pid: pid}) 129 | require.NoError(t, err) 130 | } 131 | -------------------------------------------------------------------------------- /watchdog_other.go: -------------------------------------------------------------------------------- 1 | // +build !linux 2 | 3 | package watchdog 4 | 5 | import ( 6 | "fmt" 7 | "time" 8 | ) 9 | 10 | // CgroupDriven is only available in Linux. This method will error. 11 | func CgroupDriven(frequency time.Duration, policyCtor PolicyCtor) (err error, stopFn func()) { 12 | return fmt.Errorf("cgroups-driven watchdog: %w", ErrNotSupported), nil 13 | } 14 | -------------------------------------------------------------------------------- /watchdog_other_test.go: -------------------------------------------------------------------------------- 1 | // +build !linux 2 | 3 | package watchdog 4 | 5 | import "testing" 6 | 7 | func TestCgroupsDriven_Create_Isolated(t *testing.T) { 8 | // this test only runs on linux. 9 | t.Skip("test only valid on linux") 10 | } 11 | 12 | func TestCgroupsDriven_Docker_Isolated(t *testing.T) { 13 | // this test only runs on linux. 14 | t.Skip("test only valid on linux") 15 | } 16 | -------------------------------------------------------------------------------- /watchdog_test.go: -------------------------------------------------------------------------------- 1 | package watchdog 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "io" 7 | "io/ioutil" 8 | "log" 9 | "os" 10 | "path/filepath" 11 | "runtime" 12 | "runtime/debug" 13 | "strconv" 14 | "testing" 15 | "time" 16 | 17 | "github.com/elastic/gosigar" 18 | "github.com/benbjohnson/clock" 19 | "github.com/stretchr/testify/require" 20 | ) 21 | 22 | const ( 23 | // EnvTestIsolated is a marker property for the runner to confirm that this 24 | // test is running in isolation (i.e. a dedicated process). 25 | EnvTestIsolated = "TEST_ISOLATED" 26 | 27 | // EnvTestDockerMemLimit is the memory limit applied in a docker container. 28 | EnvTestDockerMemLimit = "TEST_DOCKER_MEMLIMIT" 29 | ) 30 | 31 | // DockerMemLimit is initialized in the init() function from the 32 | // EnvTestDockerMemLimit env variable. 33 | var DockerMemLimit int // bytes 34 | 35 | func init() { 36 | Logger = &stdlog{log: log.New(os.Stdout, "[watchdog test] ", log.LstdFlags|log.Lmsgprefix), debug: true} 37 | 38 | if l := os.Getenv(EnvTestDockerMemLimit); l != "" { 39 | l, err := strconv.Atoi(l) 40 | if err != nil { 41 | panic(err) 42 | } 43 | DockerMemLimit = l 44 | } 45 | } 46 | 47 | func skipIfNotIsolated(t *testing.T) { 48 | if os.Getenv(EnvTestIsolated) != "1" { 49 | t.Skipf("skipping test in non-isolated mode") 50 | } 51 | } 52 | 53 | var ( 54 | limit64MiB uint64 = 64 << 20 // 64MiB. 55 | ) 56 | 57 | func TestControl_Isolated(t *testing.T) { 58 | skipIfNotIsolated(t) 59 | 60 | debug.SetGCPercent(100) 61 | 62 | rounds := 100 63 | if DockerMemLimit != 0 { 64 | rounds /= int(float64(DockerMemLimit)*0.8) / 1024 / 1024 65 | } 66 | 67 | // retain 1MiB every iteration. 68 | var retained [][]byte 69 | for i := 0; i < rounds; i++ { 70 | b := make([]byte, 1*1024*1024) 71 | for i := range b { 72 | b[i] = byte(i) 73 | } 74 | retained = append(retained, b) 75 | } 76 | 77 | for _, b := range retained { 78 | for i := range b { 79 | b[i] = byte(i) 80 | } 81 | } 82 | 83 | var ms runtime.MemStats 84 | runtime.ReadMemStats(&ms) 85 | require.NotZero(t, ms.NumGC) // GCs have taken place, but... 86 | require.Zero(t, ms.NumForcedGC) // ... no forced GCs beyond our initial one. 87 | } 88 | 89 | func TestHeapDriven_Isolated(t *testing.T) { 90 | skipIfNotIsolated(t) 91 | 92 | // we can't mock ReadMemStats, because we're relying on the go runtime to 93 | // enforce the GC run, and the go runtime won't use our mock. Therefore, we 94 | // need to do the actual thing. 95 | debug.SetGCPercent(100) 96 | 97 | clk := clock.NewMock() 98 | Clock = clk 99 | 100 | observations := make([]*runtime.MemStats, 0, 100) 101 | NotifyGC = func() { 102 | var ms runtime.MemStats 103 | runtime.ReadMemStats(&ms) 104 | observations = append(observations, &ms) 105 | } 106 | 107 | // limit is 64MiB. 108 | err, stopFn := HeapDriven(limit64MiB, 0, NewAdaptivePolicy(0.5)) 109 | require.NoError(t, err) 110 | defer stopFn() 111 | 112 | time.Sleep(500 * time.Millisecond) // give time for the watchdog to init. 113 | 114 | // retain 1MiB every iteration, up to 100MiB (beyond heap limit!). 115 | var retained [][]byte 116 | for i := 0; i < 100; i++ { 117 | retained = append(retained, make([]byte, 1*1024*1024)) 118 | } 119 | 120 | for _, o := range observations { 121 | fmt.Println("heap alloc:", o.HeapAlloc, "next gc:", o.NextGC, "gc count:", o.NumGC, "forced gc:", o.NumForcedGC) 122 | } 123 | 124 | var ms runtime.MemStats 125 | runtime.ReadMemStats(&ms) 126 | require.GreaterOrEqual(t, ms.NumGC, uint32(5)) // over 5 GCs should've taken place. 127 | } 128 | 129 | func TestSystemDriven_Isolated(t *testing.T) { 130 | skipIfNotIsolated(t) 131 | 132 | debug.SetGCPercent(100) 133 | 134 | clk := clock.NewMock() 135 | Clock = clk 136 | 137 | // mock the system reporting. 138 | var actualUsed uint64 139 | sysmemFn = func(g *gosigar.Mem) error { 140 | g.ActualUsed = actualUsed 141 | return nil 142 | } 143 | 144 | // limit is 64MiB. 145 | err, stopFn := SystemDriven(limit64MiB, 5*time.Second, NewAdaptivePolicy(0.5)) 146 | require.NoError(t, err) 147 | defer stopFn() 148 | 149 | time.Sleep(200 * time.Millisecond) // give time for the watchdog to init. 150 | 151 | notifyChDeprecated := make(chan struct{}, 1) 152 | notifyCh := make(chan struct{}, 1) 153 | NotifyGC = func() { 154 | notifyChDeprecated <- struct{}{} 155 | } 156 | unregister := RegisterPostGCNotifee(func() { 157 | notifyCh <- struct{}{} 158 | }) 159 | defer unregister() 160 | 161 | // first tick; used = 0. 162 | clk.Add(5 * time.Second) 163 | time.Sleep(200 * time.Millisecond) 164 | require.Len(t, notifyChDeprecated, 0) // no GC has taken place. 165 | require.Len(t, notifyCh, 0) // no GC has taken place. 166 | 167 | // second tick; used = just over 50%; will trigger GC. 168 | actualUsed = (limit64MiB / 2) + 1 169 | clk.Add(5 * time.Second) 170 | time.Sleep(200 * time.Millisecond) 171 | require.Len(t, notifyChDeprecated, 1) 172 | require.Len(t, notifyCh, 1) 173 | <-notifyChDeprecated 174 | <-notifyCh 175 | 176 | // third tick; just below 75%; no GC. 177 | actualUsed = uint64(float64(limit64MiB)*0.75) - 1 178 | clk.Add(5 * time.Second) 179 | time.Sleep(200 * time.Millisecond) 180 | require.Len(t, notifyChDeprecated, 0) 181 | require.Len(t, notifyCh, 0) 182 | 183 | // fourth tick; 75% exactly; will trigger GC. 184 | actualUsed = uint64(float64(limit64MiB)*0.75) + 1 185 | clk.Add(5 * time.Second) 186 | time.Sleep(200 * time.Millisecond) 187 | require.Len(t, notifyCh, 1) 188 | require.Len(t, notifyChDeprecated, 1) 189 | <-notifyChDeprecated 190 | <-notifyCh 191 | 192 | var ms runtime.MemStats 193 | runtime.ReadMemStats(&ms) 194 | require.GreaterOrEqual(t, ms.NumForcedGC, uint32(2)) 195 | } 196 | 197 | // TestHeapdumpCapture tests that heap dumps are captured appropriately. 198 | func TestHeapdumpCapture(t *testing.T) { 199 | debug.SetGCPercent(100) 200 | 201 | dir, err := ioutil.TempDir("", "") 202 | require.NoError(t, err) 203 | 204 | t.Cleanup(func() { 205 | _ = os.RemoveAll(dir) 206 | }) 207 | 208 | assertFileCount := func(expected int) { 209 | glob, err := filepath.Glob(filepath.Join(dir, "*")) 210 | require.NoError(t, err) 211 | require.Len(t, glob, expected) 212 | } 213 | 214 | HeapProfileDir = dir 215 | HeapProfileThreshold = 0.5 216 | HeapProfileMaxCaptures = 5 217 | 218 | // mock clock. 219 | clk := clock.NewMock() 220 | Clock = clk 221 | 222 | // mock the system reporting. 223 | var actualUsed uint64 224 | sysmemFn = func(g *gosigar.Mem) error { 225 | g.ActualUsed = actualUsed 226 | return nil 227 | } 228 | 229 | // init a system driven watchdog. 230 | err, stopFn := SystemDriven(limit64MiB, 5*time.Second, NewAdaptivePolicy(0.5)) 231 | require.NoError(t, err) 232 | defer stopFn() 233 | time.Sleep(200 * time.Millisecond) // give time for the watchdog to init. 234 | 235 | // first tick; used = 0. 236 | clk.Add(5 * time.Second) 237 | time.Sleep(200 * time.Millisecond) 238 | assertFileCount(0) 239 | 240 | // second tick; used = just over 50%; will trigger a heapdump. 241 | actualUsed = (limit64MiB / 2) + 1 242 | clk.Add(5 * time.Second) 243 | time.Sleep(200 * time.Millisecond) 244 | assertFileCount(1) 245 | 246 | // third tick; continues above 50%; same episode, no heapdump. 247 | actualUsed = (limit64MiB / 2) + 10 248 | clk.Add(5 * time.Second) 249 | time.Sleep(200 * time.Millisecond) 250 | assertFileCount(1) 251 | 252 | // fourth tick; below 50%; this resets the episodic flag. 253 | actualUsed = limit64MiB / 3 254 | clk.Add(5 * time.Second) 255 | time.Sleep(200 * time.Millisecond) 256 | assertFileCount(1) 257 | 258 | // fifth tick; above 50%; this triggers a new heapdump. 259 | actualUsed = (limit64MiB / 2) + 1 260 | clk.Add(5 * time.Second) 261 | time.Sleep(200 * time.Millisecond) 262 | assertFileCount(2) 263 | 264 | for i := 0; i < 20; i++ { 265 | // below 50%; this resets the episodic flag. 266 | actualUsed = limit64MiB / 3 267 | clk.Add(5 * time.Second) 268 | time.Sleep(200 * time.Millisecond) 269 | 270 | // above 50%; this triggers a new heapdump. 271 | actualUsed = (limit64MiB / 2) + 1 272 | clk.Add(5 * time.Second) 273 | time.Sleep(200 * time.Millisecond) 274 | } 275 | 276 | assertFileCount(5) // we only generated 5 heap dumps even though we had more episodes. 277 | 278 | // verify that heap dump file sizes aren't zero. 279 | glob, err := filepath.Glob(filepath.Join(dir, "*")) 280 | require.NoError(t, err) 281 | for _, f := range glob { 282 | fi, err := os.Stat(f) 283 | require.NoError(t, err) 284 | require.NotZero(t, fi.Size()) 285 | } 286 | } 287 | 288 | type panickingPolicy struct{} 289 | 290 | func (panickingPolicy) Evaluate(_ UtilizationType, _ uint64) (_ uint64) { 291 | panic("oops!") 292 | } 293 | 294 | func TestPanicRecover(t *testing.T) { 295 | // replace the logger with one that tees into the buffer. 296 | b := new(bytes.Buffer) 297 | Logger.(*stdlog).log.SetOutput(io.MultiWriter(b, os.Stdout)) 298 | 299 | // simulate a polling watchdog with a panicking policy. 300 | _watchdog.wg.Add(1) 301 | pollingWatchdog(panickingPolicy{}, 1*time.Millisecond, 10000000, func() (uint64, error) { 302 | return 0, nil 303 | }) 304 | require.Contains(t, b.String(), "WATCHDOG PANICKED") 305 | 306 | b.Reset() // reset buffer. 307 | require.NotContains(t, b.String(), "WATCHDOG PANICKED") 308 | 309 | // simulate a polling watchdog with a panicking usage. 310 | _watchdog.wg.Add(1) 311 | pollingWatchdog(&adaptivePolicy{factor: 0.5}, 1*time.Millisecond, 10000000, func() (uint64, error) { 312 | panic("bang!") 313 | }) 314 | require.Contains(t, b.String(), "WATCHDOG PANICKED") 315 | } 316 | -------------------------------------------------------------------------------- /watermarks.go: -------------------------------------------------------------------------------- 1 | package watchdog 2 | 3 | // NewWatermarkPolicy creates a watchdog policy that schedules GC at concrete 4 | // watermarks. When queried, it will determine the next trigger point based 5 | // on the current utilisation. If the last watermark is surpassed, 6 | // the policy will be disarmed. It is recommended to set an extreme watermark 7 | // as the last element (e.g. 0.99) to prevent the policy from disarming too soon. 8 | func NewWatermarkPolicy(watermarks ...float64) PolicyCtor { 9 | return func(limit uint64) (Policy, error) { 10 | p := new(watermarkPolicy) 11 | p.limit = limit 12 | p.thresholds = make([]uint64, 0, len(watermarks)) 13 | for _, m := range watermarks { 14 | p.thresholds = append(p.thresholds, uint64(float64(limit)*m)) 15 | } 16 | Logger.Infof("initialized watermark watchdog policy; watermarks: %v; thresholds: %v", p.watermarks, p.thresholds) 17 | return p, nil 18 | } 19 | } 20 | 21 | type watermarkPolicy struct { 22 | // watermarks are the percentual amounts of limit. 23 | watermarks []float64 24 | // thresholds are the absolute trigger points of this policy. 25 | thresholds []uint64 26 | limit uint64 27 | } 28 | 29 | var _ Policy = (*watermarkPolicy)(nil) 30 | 31 | func (w *watermarkPolicy) Evaluate(_ UtilizationType, used uint64) (next uint64) { 32 | Logger.Debugf("watermark policy: evaluating; utilization: %d/%d (used/limit)", used, w.limit) 33 | var i int 34 | for ; i < len(w.thresholds); i++ { 35 | t := w.thresholds[i] 36 | if used < t { 37 | return t 38 | } 39 | } 40 | // we reached the maximum threshold, so we disable this policy. 41 | return PolicyTempDisabled 42 | } 43 | -------------------------------------------------------------------------------- /watermarks_test.go: -------------------------------------------------------------------------------- 1 | package watchdog 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/benbjohnson/clock" 7 | "github.com/stretchr/testify/require" 8 | ) 9 | 10 | var ( 11 | watermarks = []float64{0.50, 0.75, 0.80} 12 | thresholds = func() []uint64 { 13 | var ret []uint64 14 | for _, w := range watermarks { 15 | ret = append(ret, uint64(float64(limit64MiB)*w)) 16 | } 17 | return ret 18 | }() 19 | ) 20 | 21 | func TestProgressiveWatermarks(t *testing.T) { 22 | clk := clock.NewMock() 23 | Clock = clk 24 | 25 | p, err := NewWatermarkPolicy(watermarks...)(limit64MiB) 26 | require.NoError(t, err) 27 | 28 | // at zero 29 | next := p.Evaluate(UtilizationSystem, uint64(0)) 30 | require.EqualValues(t, thresholds[0], next) 31 | 32 | // before the watermark. 33 | next = p.Evaluate(UtilizationSystem, uint64(float64(limit64MiB)*watermarks[0])-1) 34 | require.EqualValues(t, thresholds[0], next) 35 | 36 | // exactly at the watermark; gives us the next watermark, as the watchdodg would've 37 | // taken care of triggering the first watermark. 38 | next = p.Evaluate(UtilizationSystem, uint64(float64(limit64MiB)*watermarks[0])) 39 | require.EqualValues(t, thresholds[1], next) 40 | 41 | // after the watermark gives us the next watermark. 42 | next = p.Evaluate(UtilizationSystem, uint64(float64(limit64MiB)*watermarks[0])+1) 43 | require.EqualValues(t, thresholds[1], next) 44 | 45 | // last watermark; disable the policy. 46 | next = p.Evaluate(UtilizationSystem, uint64(float64(limit64MiB)*watermarks[2])) 47 | require.EqualValues(t, PolicyTempDisabled, next) 48 | 49 | next = p.Evaluate(UtilizationSystem, uint64(float64(limit64MiB)*watermarks[2]+1)) 50 | require.EqualValues(t, PolicyTempDisabled, next) 51 | 52 | next = p.Evaluate(UtilizationSystem, limit64MiB) 53 | require.EqualValues(t, PolicyTempDisabled, next) 54 | } 55 | --------------------------------------------------------------------------------