├── .github └── workflows │ ├── build.yaml │ ├── commitlint.yaml │ ├── golangci-lint.yaml │ └── release.yaml ├── .gitignore ├── .golangci.yaml ├── .goreleaser.yaml ├── LICENSE.txt ├── README.md ├── go.mod ├── go.sum ├── main.go └── retry ├── retry.go ├── retry_test.go └── task.go /.github/workflows/build.yaml: -------------------------------------------------------------------------------- 1 | name: Build 2 | on: 3 | pull_request: 4 | branches: 5 | - '*' 6 | push: 7 | branches: 8 | - "master" 9 | 10 | jobs: 11 | build: 12 | name: Build 13 | runs-on: ubuntu-22.04 14 | 15 | steps: 16 | - name: Setup go 17 | uses: actions/setup-go@v2 18 | with: 19 | go-version: 1.19 20 | 21 | - name: Checkout code 22 | uses: actions/checkout@v2 23 | 24 | - name: Build binary 25 | run: go build -o dist/retry -trimpath -ldflags="-s -w -X main.version=$(git describe --always)" . 26 | 27 | - name: Sanity check version 28 | run: ./dist/retry -version 29 | 30 | test: 31 | name: Test 32 | runs-on: ubuntu-22.04 33 | 34 | steps: 35 | - name: Setup go 36 | uses: actions/setup-go@v2 37 | with: 38 | go-version: 1.19 39 | 40 | - name: Checkout code 41 | uses: actions/checkout@v2 42 | 43 | - name: Run tests 44 | run: go test -v -race -cover ./... 45 | -------------------------------------------------------------------------------- /.github/workflows/commitlint.yaml: -------------------------------------------------------------------------------- 1 | name: Commitlint 2 | on: pull_request 3 | 4 | jobs: 5 | lint: 6 | name: Commitlint 7 | runs-on: ubuntu-22.04 8 | 9 | steps: 10 | - name: Checkout code 11 | uses: actions/checkout@v2 12 | with: 13 | fetch-depth: 0 14 | 15 | - name: Run commitlint 16 | uses: wagoid/commitlint-github-action@v5 17 | -------------------------------------------------------------------------------- /.github/workflows/golangci-lint.yaml: -------------------------------------------------------------------------------- 1 | name: GolangCI-Lint 2 | on: 3 | pull_request: 4 | branches: 5 | - '*' 6 | push: 7 | branches: 8 | - "master" 9 | 10 | jobs: 11 | lint: 12 | name: GolangCI-Lint 13 | runs-on: ubuntu-22.04 14 | 15 | steps: 16 | - name: Checkout code 17 | uses: actions/checkout@v2 18 | 19 | - name: Setup go 20 | uses: actions/setup-go@v2 21 | with: 22 | go-version: 1.19 23 | 24 | - name: Run golangci-lint 25 | uses: golangci/golangci-lint-action@v2 26 | with: 27 | # https://github.com/golangci/golangci-lint/releases/tag/v1.50.1 28 | version: v1.50.1 29 | -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- 1 | name: Release 2 | on: 3 | push: 4 | tags: 5 | - "v*.*.*" 6 | 7 | jobs: 8 | build: 9 | name: Release 10 | runs-on: ubuntu-22.04 11 | 12 | steps: 13 | - name: Setup go 14 | uses: actions/setup-go@v2 15 | with: 16 | go-version: 1.19 17 | 18 | - name: Checkout code 19 | uses: actions/checkout@v2 20 | 21 | - name: Build and publish release artifacts 22 | uses: goreleaser/goreleaser-action@v2 23 | with: 24 | version: latest 25 | args: release 26 | env: 27 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 28 | HOMEBREW_GITHUB_TOKEN: ${{ secrets.HOMEBREW_GITHUB_TOKEN }} 29 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | dist/ 2 | -------------------------------------------------------------------------------- /.golangci.yaml: -------------------------------------------------------------------------------- 1 | issues: 2 | exclude-use-default: true 3 | exclude: 4 | # Triggered by long table tests. 5 | - Function 'Test\w+' is too long 6 | 7 | linters: 8 | enable-all: true 9 | disable: 10 | # Linters that are deprecated. 11 | - exhaustivestruct 12 | - scopelint 13 | - interfacer 14 | - maligned 15 | - golint 16 | - ifshort 17 | - structcheck 18 | - nosnakecase 19 | - deadcode 20 | - varcheck 21 | 22 | # Linters that are disabled because of generics. 23 | - rowserrcheck 24 | - sqlclosecheck 25 | - wastedassign 26 | 27 | # Linters that are not used for this project. 28 | - cyclop 29 | - exhaustruct 30 | - goerr113 31 | - gomnd 32 | - gosec 33 | - nlreturn 34 | - paralleltest 35 | - testpackage 36 | - tparallel 37 | - wrapcheck 38 | - wsl 39 | -------------------------------------------------------------------------------- /.goreleaser.yaml: -------------------------------------------------------------------------------- 1 | project_name: retry 2 | 3 | builds: 4 | - id: retry 5 | binary: retry 6 | 7 | targets: 8 | - darwin_amd64 9 | - darwin_arm64 10 | - linux_amd64 11 | - linux_arm64 12 | - windows_386 13 | - windows_amd64 14 | 15 | flags: 16 | - -buildvcs=false 17 | - -trimpath 18 | 19 | ldflags: 20 | - -s -w 21 | - -buildid= 22 | - -X main.version={{ .Tag }} 23 | 24 | env: 25 | - CGO_ENABLED=0 26 | 27 | archives: 28 | - id: retry 29 | builds: [retry] 30 | name_template: "{{.ProjectName}}-{{.Os}}-{{.Arch}}" 31 | 32 | format_overrides: 33 | - goos: windows 34 | format: zip 35 | 36 | release: 37 | name_template: "{{.Tag}} Release" 38 | prerelease: auto 39 | 40 | checksum: 41 | name_template: "checksums.txt" 42 | 43 | changelog: 44 | skip: true 45 | 46 | brews: 47 | - name: retry 48 | 49 | tap: 50 | owner: joshdk 51 | name: homebrew-tap 52 | token: "{{ .Env.HOMEBREW_GITHUB_TOKEN }}" 53 | 54 | url_template: "https://github.com/joshdk/retry/releases/download/{{ .Tag }}/{{ .ArtifactName }}" 55 | 56 | commit_msg_template: "feat: brew formula update for {{ .ProjectName }} {{ .Tag }}" 57 | commit_author: 58 | name: Josh Komoroske 59 | email: jdkomo@gmail.com 60 | 61 | folder: Formula 62 | 63 | caveats: "Run retry --help for usage" 64 | homepage: "https://github.com/joshdk/retry" 65 | description: "Rerun a command until it eventually succeeds, or doesn't" 66 | license: "MIT" 67 | 68 | test: | 69 | system "#{bin}/retry -version" 70 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Josh Komoroske 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies 9 | of the Software, and to permit persons to whom the Software is furnished to do 10 | so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Actions][github-actions-badge]][github-actions-link] 2 | [![License][license-badge]][license-link] 3 | [![Go Report Card][go-report-card-badge]][go-report-card-link] 4 | [![Godoc][godoc-badge]][godoc-link] 5 | [![Releases][github-release-badge]][github-release-link] 6 | 7 | # Retry 8 | 9 | ⏰ Rerun a command until it eventually succeeds, or doesn't! 10 | 11 | ## Installation 12 | 13 | Prebuilt binaries for several architectures can be found attached to any of the available [releases][github-release-link]. 14 | 15 | For Linux: 16 | ```shell 17 | wget https://github.com/joshdk/retry/releases/download/v1.4.0/retry-linux-amd64.tar.gz 18 | tar -xf retry-linux-amd64.tar.gz 19 | sudo install retry /usr/bin/retry 20 | ``` 21 | 22 | For Mac: 23 | ```shell 24 | brew tap joshdk/tap 25 | brew install joshdk/tap/retry 26 | ``` 27 | 28 | A development version can also be built directly from this repository. 29 | Requires that you already have a functional Go toolchain installed. 30 | ```shell 31 | go install github.com/joshdk/retry@master 32 | ``` 33 | 34 | ## Motivations 35 | 36 | I kept seeing folks write bespoke code to retry commands that were either flaky, or took time to succeed. This usually manifested as some sort of loop in bash, with a counter, and a return code check. 37 | 38 | Searching around, this doesn't seem to be an isolated problem, which has an even larger number of bespoke solutions. Take for example this handful of Stack Overflow threads: 39 | 40 | - [How to retry a command in Bash?](https://stackoverflow.com/questions/7449772/how-to-retry-a-command-in-bash) 41 | - [Retry a Bash command with timeout](https://stackoverflow.com/questions/12321469/retry-a-bash-command-with-timeout) 42 | - [How do I write a retry logic in script to keep retrying to run it upto 5 times?](https://unix.stackexchange.com/questions/82598/how-do-i-write-a-retry-logic-in-script-to-keep-retrying-to-run-it-upto-5-times) 43 | 44 | These are perfectly legitimate questions, with many reasonable answers. The downside is that the solutions were usually specific to the question asked, and not always applicable to the broader problem. 45 | 46 | This tool is an attempt to solve that broader problem. ⏰ 47 | 48 | ## Usage 49 | 50 | ### Help! 51 | 52 | ```bash 53 | Usage: retry [flags] command|url 54 | -attempts int 55 | maximum number of attempts (default 3) 56 | -backoff 57 | use exponential backoff when sleeping 58 | -consecutive int 59 | required number of back to back successes 60 | -delay duration 61 | initial delay period before tasks are run 62 | -invert 63 | wait for task to fail rather than succeed 64 | -jitter duration 65 | time range randomly added to sleep 66 | -max-time duration 67 | maximum total time (default 1m0s) 68 | -quiet 69 | silence all output 70 | -sleep duration 71 | time to sleep between attempts (default 5s) 72 | -task-time duration 73 | maximum time for a single attempt 74 | -version 75 | print the version "v1.4.0" and exit 76 | ``` 77 | 78 | ### Running a command 79 | 80 | Retry will run a given command repeatedly, until it is deemed an overall success of failure. The conditions and limits for what determine success/failure can be tuned with command line flags. 81 | 82 | As a special case, if a URL is given, retry will GET that URL and check for a 200 OK to be returned. 83 | 84 | ### Limit attempts 85 | 86 | The `-attempts` flag limits the maximum number of times a command can be run. A value of 0 allows unlimited attempts. 87 | 88 | > Run `cat kubeconfig.yml` a maximum of 3 times, or less if the command succeeds earlier: 89 | > 90 | > ```bash 91 | > $ retry -attempts=3 cat kubeconfig.yml 92 | > ``` 93 | 94 | ### Limit task time 95 | 96 | The `-task-time` flag limits the maximum time that a command can run for. A value of 0 allows unlimited time. 97 | 98 | > Run `wget https://example.com`, but limit the command to only run for a maximum of 15 seconds. 99 | > 100 | > ```bash 101 | > $ retry -task-time=15s wget https://example.com 102 | > ``` 103 | 104 | ### Limit overall time 105 | 106 | The `-max-time` flag limits the maximum total time that `retry` will run for. A value of 0 allows unlimited time. 107 | 108 | > GET `https://example.com` repeatedly, but stop running after a total of 60 seconds. 109 | > 110 | > ```bash 111 | > $ retry -max-time=60s https://example.com 112 | > ``` 113 | 114 | ### Initial delay 115 | 116 | The `-delay` flag inserts a one-time delay before initial starting to run commands. 117 | 118 | > Run `wget https://example.com`, but start only after initially sleeping for 15 seconds. 119 | > 120 | > ```bash 121 | > $ retry -delay=15s wget https://example.com 122 | > ``` 123 | 124 | ### Sleep between attempts 125 | 126 | The `-sleep` flag inserts a timed delay between command runs. 127 | 128 | > Run `cat kubeconfig.yml`, but sleep for 15 seconds between runs. 129 | > 130 | > ```bash 131 | > $ retry -sleep=15s cat kubeconfig.yml 132 | > ``` 133 | 134 | ### Exponential backoff 135 | 136 | The `-backoff` flag is used with `-sleep`, and will double the time delay between failed runs. Delay is reset after a successful run. 137 | 138 | > Run `wget https://example.com`, sleeping for 15 seconds after the first failure, 30 seconds after the second failure, 1 minute after the third failure, etc... 139 | > 140 | > ```bash 141 | > $ retry -sleep=15s -backoff wget https://example.com 142 | > ``` 143 | 144 | ### Invert status 145 | 146 | The `-invert` flag is used to flip a task's failure status. Successful task runs will become failures, and vice versa. Useful for when you want to retry a command until it fails. 147 | 148 | > Run `curl https://example.com/health`, a maximum of 20 times, until it becomes unresponsive. 149 | > 150 | > ```bash 151 | > $ retry -attempts=20 -invert curl https://example.com/health 152 | > ``` 153 | 154 | ### Random jitter 155 | 156 | The `-jitter` flag adds a random time range to the sleep duration. Jitter added on top of exponential backoff. 157 | 158 | > Run `cat kubeconfig.yml`, sleep for 15 seconds minimum, plus a random 0-10 seconds between each run. 159 | > 160 | > ```bash 161 | > $ retry -sleep=15s -jitter=10s cat kubeconfig.yml 162 | > ``` 163 | 164 | ### Consecutive successes 165 | 166 | The `-consecutive` flag requires a number of successful command runs to occur in a row in order to be considered successful. Useful for health checking a service that is inconsistent until if if fully started. 167 | 168 | > GET `https://example.com`, requiring the command to be successful 3 times in a row. 169 | > 170 | > ```bash 171 | > $ retry -consecutive=3 wget https://example.com 172 | > ``` 173 | 174 | ### Be quiet! 175 | 176 | Lastly, the `-quiet` flag silences all output (STDOUT and STDERR) from the command. Useful when running `retry` inside an `if`. 177 | 178 | > Run `ls -R`, but swallow all output. 179 | > 180 | > ```bash 181 | > $ retry -quiet ls -R 182 | > ``` 183 | 184 | ### Altogether now 185 | 186 | > Run `wget https://example.com` a maximum of **10** times. Each run can take a maximum of **15 seconds**, and a total of **2 minutes**. Delay for **15 seconds** before starting. Sleep for **5 seconds** between failures with exponential **backoff**. Lastly, require that the command succeeds **3 times** in a row. 187 | > 188 | > ```bash 189 | > $ retry -attempts=10 -task-time=15s -max-time=2m -delay=15s -sleep=5s -backoff -consecutive=3 wget https://example.com 190 | >``` 191 | 192 | ## License 193 | 194 | This code is distributed under the [MIT License][license-link], see [LICENSE.txt][license-file] for more information. 195 | 196 | [github-actions-badge]: https://github.com/joshdk/retry/workflows/build/badge.svg 197 | [github-actions-link]: https://github.com/joshdk/retry/actions 198 | [github-release-badge]: https://img.shields.io/github/release/joshdk/retry/all.svg 199 | [github-release-link]: https://github.com/joshdk/retry/releases 200 | [go-report-card-badge]: https://goreportcard.com/badge/github.com/joshdk/retry 201 | [go-report-card-link]: https://goreportcard.com/report/github.com/joshdk/retry 202 | [godoc-badge]: https://pkg.go.dev/badge/github.com/joshdk/retry/retry 203 | [godoc-link]: https://pkg.go.dev/github.com/joshdk/retry/retry 204 | [license-badge]: https://img.shields.io/badge/license-MIT-green.svg 205 | [license-file]: https://github.com/joshdk/retry/blob/master/LICENSE.txt 206 | [license-link]: https://opensource.org/licenses/MIT 207 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/joshdk/retry 2 | 3 | go 1.19 4 | 5 | require github.com/stretchr/testify v1.8.1 6 | 7 | require ( 8 | github.com/davecgh/go-spew v1.1.1 // indirect 9 | github.com/pmezard/go-difflib v1.0.0 // indirect 10 | gopkg.in/yaml.v3 v3.0.1 // indirect 11 | ) 12 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 2 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 3 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 4 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 5 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 6 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 7 | github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= 8 | github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= 9 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 10 | github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= 11 | github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk= 12 | github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= 13 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 14 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 15 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 16 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 17 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 18 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | // Copyright Josh Komoroske. All rights reserved. 2 | // Use of this source code is governed by the MIT license, 3 | // a copy of which can be found in the LICENSE.txt file. 4 | 5 | package main 6 | 7 | import ( 8 | "errors" 9 | "flag" 10 | "fmt" 11 | "os" 12 | "strings" 13 | "time" 14 | 15 | "github.com/joshdk/retry/retry" 16 | ) 17 | 18 | // version is used to hold the version string. Will be replaced at go build 19 | // time with -ldflags. 20 | var version = "development" 21 | 22 | // cmdFlags represents the assorted command line flags that can be passed. 23 | type cmdFlags struct { 24 | retry.Spec 25 | quiet bool 26 | version bool 27 | } 28 | 29 | func main() { 30 | var flags cmdFlags 31 | flag.IntVar(&flags.Attempts, "attempts", 3, "maximum number of attempts") 32 | flag.BoolVar(&flags.Backoff, "backoff", false, "use exponential backoff when sleeping") 33 | flag.IntVar(&flags.Consecutive, "consecutive", 0, "required number of back to back successes") 34 | flag.DurationVar(&flags.InitialDelay, "delay", 0, "initial delay period before tasks are run") 35 | flag.BoolVar(&flags.Invert, "invert", false, "wait for task to fail rather than succeed") 36 | flag.DurationVar(&flags.Jitter, "jitter", 0, "time range randomly added to sleep") 37 | flag.DurationVar(&flags.TotalTime, "max-time", time.Minute, "maximum total time") 38 | flag.BoolVar(&flags.quiet, "quiet", false, "silence all output") 39 | flag.DurationVar(&flags.Sleep, "sleep", 5*time.Second, "time to sleep between attempts") 40 | flag.DurationVar(&flags.TaskTime, "task-time", 0, "maximum time for a single attempt") 41 | flag.BoolVar(&flags.version, "version", false, fmt.Sprintf("print the version %q and exit", version)) 42 | flag.Usage = usage 43 | flag.Parse() 44 | 45 | if err := mainCmd(flags); err != nil { 46 | if !flags.quiet { 47 | fmt.Fprintf(os.Stderr, "retry: %v\n", err) 48 | } 49 | os.Exit(1) 50 | } 51 | } 52 | 53 | func mainCmd(flags cmdFlags) error { 54 | // If the version flag (-version) was given, print the version and exit. 55 | if flags.version { 56 | fmt.Println(version) //nolint:forbidigo 57 | return nil 58 | } 59 | 60 | // If no arguments were given, there's nothing to do. 61 | if flag.NArg() == 0 { 62 | return errors.New("no command given") 63 | } 64 | 65 | var ( 66 | task retry.Task 67 | command = flag.Args()[0] 68 | args = flag.Args()[1:] 69 | ) 70 | 71 | if strings.HasPrefix(command, "http://") || strings.HasPrefix(command, "https://") { 72 | // The command looks like it references a url (starts with http:// or 73 | // https://). 74 | task = retry.HTTPTask{URL: command} 75 | } else { 76 | // Otherwise, assume the command references a (shell) command. 77 | task = retry.ExecTask{Name: command, Args: args, Quiet: flags.quiet} 78 | } 79 | 80 | return retry.Retry(flags.Spec, task) 81 | } 82 | 83 | func usage() { 84 | fmt.Fprintf(flag.CommandLine.Output(), "Usage: retry [flags] command|url\n") 85 | flag.PrintDefaults() 86 | } 87 | -------------------------------------------------------------------------------- /retry/retry.go: -------------------------------------------------------------------------------- 1 | // Copyright Josh Komoroske. All rights reserved. 2 | // Use of this source code is governed by the MIT license, 3 | // a copy of which can be found in the LICENSE.txt file. 4 | 5 | package retry 6 | 7 | import ( 8 | "context" 9 | "errors" 10 | "math/rand" 11 | "time" 12 | ) 13 | 14 | var ( 15 | // ErrExceededAttempts is an error that is returned for a task that was run 16 | // too many times, without being successful. 17 | ErrExceededAttempts = errors.New("maximum attempts exceeded") 18 | 19 | // ErrExceededTime is an error that is returned for a task that ran too 20 | // long, without being successful. 21 | ErrExceededTime = errors.New("maximum time exceeded") 22 | ) 23 | 24 | // Spec represents the various behavior parameters for retrying a task. 25 | type Spec struct { 26 | // Attempts is the maximum number of total times a task is run. A value of 27 | // zero removes this restriction, and a task will be run indefinitely. 28 | Attempts int 29 | 30 | // Backoff is for enabling exponential backoff between task invocations. 31 | // The time between tasks will double each time there is a failure, but 32 | // will reset if there is a subsequent success. 33 | Backoff bool 34 | 35 | // Consecutive is the number of successful task runs that must happen in a 36 | // row in order for the task to be considered successful overall. 37 | Consecutive int 38 | 39 | // InitialDelay is the duration to pause before initially starting task 40 | // invocations. 41 | InitialDelay time.Duration 42 | 43 | // Invert is used to indicate that the task success status should be 44 | // reversed. Failed tasks count as successful, and vice versa. 45 | Invert bool 46 | 47 | // Jitter is the duration range to randomly add to the Sleep time. 48 | // Sleep + [0, Jitter) 49 | Jitter time.Duration 50 | 51 | // Sleep is the duration to pause between individual task invocations. 52 | Sleep time.Duration 53 | 54 | // TaskTime is the maximum time that an individual task invocation is 55 | // allowed to take. 56 | TaskTime time.Duration 57 | 58 | // TotalTime is the maximum time that all combined task invocations are 59 | // allowed to take. 60 | TotalTime time.Duration 61 | } 62 | 63 | // Retry will repeatedly run the given task, until it is successful. The given 64 | // spec is used for determining what exactly is considered "successful", and 65 | // how to handle timing of the potentially multiple task invocations. 66 | func Retry(spec Spec, task Task) error { 67 | ctxBackground := context.Background() 68 | ctxMaxTime, cancel := maybeTimed(ctxBackground, spec.TotalTime) 69 | defer cancel() 70 | 71 | // Sleep for the amount of time specified by the initial delay, but not 72 | // more than the max time. 73 | if err := contextSleep(ctxMaxTime, spec.InitialDelay); err != nil { 74 | return ErrExceededTime 75 | } 76 | 77 | var totalRuns int 78 | var multiplier int64 = 1 79 | var consecutive int 80 | for { 81 | ctxMaxTask, _ := maybeTimed(ctxMaxTime, spec.TaskTime) 82 | 83 | select { 84 | case <-ctxMaxTime.Done(): 85 | return ErrExceededTime 86 | case err := <-runnerChan(ctxMaxTask, task): 87 | if err != nil != spec.Invert { 88 | // Task failed, so drop the number of consecutive successful 89 | // runs back down to zero. 90 | consecutive = 0 91 | } else { 92 | // Task succeeded, so reset exponential backoff. 93 | multiplier = 1 94 | consecutive++ 95 | } 96 | totalRuns++ 97 | 98 | // The desired number of consecutive successful runs was hit. 99 | // Return successfully. 100 | if consecutive >= max(spec.Consecutive, 1) { 101 | return nil 102 | } 103 | 104 | // The maximum number of runs was exceeded. Return with a "maximum 105 | // attempts exceeded" failure. 106 | if spec.Attempts != 0 && totalRuns >= spec.Attempts { 107 | return ErrExceededAttempts 108 | } 109 | 110 | // Sleep for the specified duration. 111 | snooze := spec.Sleep*time.Duration(multiplier) + jitter(spec.Jitter) 112 | if err := contextSleep(ctxMaxTime, snooze); err != nil { 113 | return ErrExceededTime 114 | } 115 | 116 | // Effectively double the sleep time, if (exponential) backoff was 117 | // specified. 118 | if spec.Backoff { 119 | multiplier *= 2 120 | } 121 | } 122 | } 123 | } 124 | 125 | func max(a, b int) int { 126 | if a > b { 127 | return a 128 | } 129 | return b 130 | } 131 | 132 | func maybeTimed(parent context.Context, timeout time.Duration) (context.Context, context.CancelFunc) { 133 | if timeout == 0 { 134 | return context.WithCancel(parent) 135 | } 136 | return context.WithTimeout(parent, timeout) 137 | } 138 | 139 | // jitter returns a random duration in the range of: 140 | // 141 | // [0, variance) 142 | func jitter(variance time.Duration) time.Duration { 143 | if variance <= 0 { 144 | return 0 145 | } 146 | 147 | // rng is a seeded source capable of generating random values. 148 | rng := rand.New(rand.NewSource(time.Now().UnixNano())) 149 | 150 | return time.Duration(rng.Int63n(int64(variance))) 151 | } 152 | 153 | // contextSleep is a context-aware sleep. It will sleep for the given timeout, 154 | // but will return early if the given context is cancelled. The return value 155 | // will be nil after a full sleep, and non-nil if the given context was 156 | // cancelled. 157 | func contextSleep(ctx context.Context, timeout time.Duration) error { 158 | sleepCtx, cancel := context.WithTimeout(ctx, timeout) 159 | defer cancel() 160 | 161 | // Wait for either context to be done. Despite the child context being 162 | // derived from the parent, and how context cancellation is propagated, we 163 | // still cannot be sure which context was expired when selecting on both 164 | // in this way. This is why ctx.Err() is returned in both cases. 165 | select { 166 | case <-ctx.Done(): 167 | // The parent context is done, so return its error reason. 168 | return ctx.Err() 169 | case <-sleepCtx.Done(): 170 | // The child, parent, or both contexts are done, so return the parent 171 | // error reason if any. Will return nil if the child context expired 172 | // and the parent context was still active. 173 | return ctx.Err() 174 | } 175 | } 176 | 177 | // runnerChan runs the given task, and returns a channel that will report that 178 | // task's return value. 179 | func runnerChan(ctx context.Context, task Task) <-chan error { 180 | errch := make(chan error, 1) 181 | go func() { 182 | defer close(errch) 183 | errch <- task.Run(ctx) 184 | }() 185 | return errch 186 | } 187 | -------------------------------------------------------------------------------- /retry/retry_test.go: -------------------------------------------------------------------------------- 1 | package retry 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "testing" 7 | "time" 8 | 9 | "github.com/stretchr/testify/assert" 10 | "github.com/stretchr/testify/require" 11 | ) 12 | 13 | func TestRetry(t *testing.T) { 14 | tests := []struct { 15 | title string 16 | task Task 17 | spec Spec 18 | results []result 19 | failed bool 20 | duration time.Duration 21 | }{ 22 | { 23 | title: "succeed fast", 24 | task: ExecTask{Name: "true"}, 25 | spec: Spec{ 26 | Attempts: 3, 27 | }, 28 | results: []result{ 29 | {elapsed: 0, failed: false}, 30 | }, 31 | failed: false, 32 | duration: 0, 33 | }, 34 | { 35 | title: "fail fast", 36 | task: ExecTask{Name: "false"}, 37 | spec: Spec{ 38 | Attempts: 3, 39 | }, 40 | results: []result{ 41 | {elapsed: 0, failed: true}, 42 | {elapsed: 0, failed: true}, 43 | {elapsed: 0, failed: true}, 44 | }, 45 | failed: true, 46 | duration: 0, 47 | }, 48 | { 49 | title: "succeed invert", 50 | task: ExecTask{Name: "false"}, 51 | spec: Spec{ 52 | Consecutive: 3, 53 | Invert: true, 54 | }, 55 | results: []result{ 56 | {elapsed: 0, failed: true}, 57 | {elapsed: 0, failed: true}, 58 | {elapsed: 0, failed: true}, 59 | }, 60 | failed: false, 61 | duration: 0, 62 | }, 63 | { 64 | title: "fail invert", 65 | task: ExecTask{Name: "true"}, 66 | spec: Spec{ 67 | Attempts: 3, 68 | Invert: true, 69 | }, 70 | results: []result{ 71 | {elapsed: 0, failed: false}, 72 | {elapsed: 0, failed: false}, 73 | {elapsed: 0, failed: false}, 74 | }, 75 | failed: true, 76 | duration: 0, 77 | }, 78 | { 79 | title: "succeed slow with task time", 80 | task: ExecTask{Name: "sleep", Args: []string{"2"}}, 81 | spec: Spec{ 82 | Attempts: 3, 83 | TaskTime: 3 * time.Second, 84 | }, 85 | results: []result{ 86 | {elapsed: 2 * time.Second, failed: false}, 87 | }, 88 | failed: false, 89 | duration: 2 * time.Second, 90 | }, 91 | { 92 | title: "succeed slow consecutively with task time", 93 | task: ExecTask{Name: "sleep", Args: []string{"2"}}, 94 | spec: Spec{ 95 | Attempts: 3, 96 | TaskTime: 3 * time.Second, 97 | Consecutive: 3, 98 | }, 99 | results: []result{ 100 | {elapsed: 2 * time.Second, failed: false}, 101 | {elapsed: 2 * time.Second, failed: false}, 102 | {elapsed: 2 * time.Second, failed: false}, 103 | }, 104 | failed: false, 105 | duration: 6 * time.Second, 106 | }, 107 | { 108 | title: "fail slow with task time", 109 | task: ExecTask{Name: "sleep", Args: []string{"600"}}, 110 | spec: Spec{ 111 | Attempts: 3, 112 | TaskTime: 3 * time.Second, 113 | }, 114 | results: []result{ 115 | {elapsed: 3 * time.Second, failed: true}, 116 | {elapsed: 3 * time.Second, failed: true}, 117 | {elapsed: 3 * time.Second, failed: true}, 118 | }, 119 | failed: true, 120 | duration: 9 * time.Second, 121 | }, 122 | { 123 | title: "fail slow with task time and sleep", 124 | task: ExecTask{Name: "sleep", Args: []string{"600"}}, 125 | spec: Spec{ 126 | Attempts: 3, 127 | TaskTime: 3 * time.Second, 128 | Sleep: 3 * time.Second, 129 | }, 130 | results: []result{ 131 | {elapsed: 3 * time.Second, failed: true}, 132 | {elapsed: 3 * time.Second, failed: true}, 133 | {elapsed: 3 * time.Second, failed: true}, 134 | }, 135 | failed: true, 136 | duration: 15 * time.Second, 137 | }, 138 | { 139 | title: "fail slow with task time, sleep, and backoff", 140 | task: ExecTask{Name: "sleep", Args: []string{"600"}}, 141 | spec: Spec{ 142 | Attempts: 3, 143 | TaskTime: 3 * time.Second, 144 | Sleep: 3 * time.Second, 145 | Backoff: true, 146 | }, 147 | results: []result{ 148 | {elapsed: 3 * time.Second, failed: true}, 149 | {elapsed: 3 * time.Second, failed: true}, 150 | {elapsed: 3 * time.Second, failed: true}, 151 | }, 152 | failed: true, 153 | duration: 18 * time.Second, 154 | }, 155 | { 156 | title: "fail slow with task time, sleep, backoff, and total time", 157 | task: ExecTask{Name: "sleep", Args: []string{"600"}}, 158 | spec: Spec{ 159 | Attempts: 3, 160 | TaskTime: 3 * time.Second, 161 | Sleep: 3 * time.Second, 162 | Backoff: true, 163 | TotalTime: 12 * time.Second, 164 | }, 165 | results: []result{ 166 | {elapsed: 3 * time.Second, failed: true}, 167 | {elapsed: 3 * time.Second, failed: true}, 168 | }, 169 | failed: true, 170 | duration: 12 * time.Second, 171 | }, 172 | { 173 | title: "succeed initial delay", 174 | task: ExecTask{Name: "sleep", Args: []string{"5"}}, 175 | spec: Spec{ 176 | InitialDelay: 5 * time.Second, 177 | }, 178 | results: []result{ 179 | {elapsed: 5 * time.Second}, 180 | }, 181 | duration: 10 * time.Second, 182 | }, 183 | { 184 | title: "http url", 185 | task: HTTPTask{"http://www.google.com"}, 186 | spec: Spec{ 187 | Attempts: 1, 188 | }, 189 | results: []result{ 190 | {elapsed: 0, failed: false}, 191 | }, 192 | failed: false, 193 | duration: 0, 194 | }, 195 | { 196 | title: "https url", 197 | task: HTTPTask{"https://www.google.com"}, 198 | spec: Spec{ 199 | Attempts: 1, 200 | }, 201 | results: []result{ 202 | {elapsed: 0, failed: false}, 203 | }, 204 | failed: false, 205 | duration: 0, 206 | }, 207 | { 208 | title: "bad url", 209 | task: HTTPTask{"https://fake.example.com"}, 210 | spec: Spec{ 211 | Attempts: 1, 212 | }, 213 | results: []result{ 214 | {elapsed: 0, failed: true}, 215 | }, 216 | failed: true, 217 | duration: 0, 218 | }, 219 | } 220 | 221 | for index, test := range tests { 222 | name := fmt.Sprintf("%d %s", index, test.title) 223 | t.Run(name, func(t *testing.T) { 224 | test := test 225 | t.Parallel() 226 | 227 | var ( 228 | task = newWrappedTask(test.task) 229 | start = time.Now() 230 | err = Retry(test.spec, task) 231 | end = time.Now() 232 | actual = end.Sub(start) 233 | ) 234 | 235 | // Sanity check that there were the same number of actual results 236 | // as expected. 237 | require.Equal(t, len(test.results), len(task.results)) 238 | 239 | // Check error for overall task run. 240 | checkError(t, test.failed, err) 241 | 242 | // Check duration for overall task run. 243 | checkDuration(t, test.duration, actual) 244 | 245 | for i, result := range test.results { 246 | // Check error for this specific task run. 247 | checkError(t, result.failed, task.results[i].error) 248 | 249 | // Check duration for this specific task run. 250 | checkDuration(t, result.elapsed, task.results[i].elapsed) 251 | } 252 | }) 253 | } 254 | } 255 | 256 | func checkError(t *testing.T, failureExpected bool, actual error) { 257 | t.Helper() 258 | if failureExpected { 259 | assert.Error(t, actual) 260 | } else { 261 | assert.NoError(t, actual) 262 | } 263 | } 264 | 265 | func checkDuration(t *testing.T, expected time.Duration, actual time.Duration) { 266 | t.Helper() 267 | 268 | // epsilon is the time duration delta that is allowed when comparing times. 269 | // Higher epsilon values result in longer time margins. Lower epsilon 270 | // values result in smaller time margins, but potentially flaky tests. 271 | epsilon := time.Millisecond * 500 272 | 273 | if actual < expected-epsilon || expected+epsilon < actual { 274 | assert.Failf(t, "duration mismatch", "A duration of %v ± %v is expected but got %v", expected, epsilon, actual) 275 | } 276 | } 277 | 278 | type result struct { 279 | elapsed time.Duration 280 | error error 281 | failed bool 282 | } 283 | 284 | type wrappedTask struct { 285 | task Task 286 | results []result 287 | } 288 | 289 | func (t *wrappedTask) Run(ctx context.Context) error { 290 | var ( 291 | start = time.Now() 292 | err = t.task.Run(ctx) 293 | end = time.Now() 294 | ) 295 | 296 | t.results = append(t.results, result{ 297 | elapsed: end.Sub(start), 298 | error: err, 299 | failed: err != nil, 300 | }) 301 | 302 | return err 303 | } 304 | 305 | func newWrappedTask(task Task) *wrappedTask { 306 | return &wrappedTask{ 307 | task: task, 308 | } 309 | } 310 | -------------------------------------------------------------------------------- /retry/task.go: -------------------------------------------------------------------------------- 1 | // Copyright Josh Komoroske. All rights reserved. 2 | // Use of this source code is governed by the MIT license, 3 | // a copy of which can be found in the LICENSE.txt file. 4 | 5 | package retry 6 | 7 | import ( 8 | "context" 9 | "errors" 10 | "net/http" 11 | "os" 12 | "os/exec" 13 | ) 14 | 15 | type Task interface { 16 | Run(context.Context) error 17 | } 18 | 19 | var ( 20 | _ Task = (*ExecTask)(nil) 21 | _ Task = (*HTTPTask)(nil) 22 | ) 23 | 24 | type ExecTask struct { 25 | Name string 26 | Args []string 27 | Quiet bool 28 | } 29 | 30 | func (t ExecTask) Run(ctx context.Context) error { 31 | cmd := exec.CommandContext(ctx, t.Name, t.Args...) 32 | if !t.Quiet { 33 | cmd.Stderr = os.Stderr 34 | cmd.Stdout = os.Stdout 35 | } 36 | return cmd.Run() 37 | } 38 | 39 | type HTTPTask struct { 40 | URL string 41 | } 42 | 43 | func (t HTTPTask) Run(ctx context.Context) error { 44 | request, err := http.NewRequest(http.MethodGet, t.URL, nil) 45 | if err != nil { 46 | return err 47 | } 48 | request = request.WithContext(ctx) 49 | 50 | resp, err := http.DefaultClient.Do(request) 51 | if err != nil { 52 | return err 53 | } 54 | if resp.Body != nil { 55 | defer resp.Body.Close() 56 | } 57 | if resp.StatusCode != http.StatusOK { 58 | return errors.New("HTTP status was " + resp.Status) 59 | } 60 | 61 | return nil 62 | } 63 | --------------------------------------------------------------------------------