├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── args.go ├── args_test.go ├── cronner.go ├── cronner_test.go ├── runner.go └── runner_test.go /.gitignore: -------------------------------------------------------------------------------- 1 | # Misc 2 | *.swp 3 | cronner 4 | 5 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 6 | *.o 7 | *.a 8 | *.so 9 | 10 | # Folders 11 | _obj 12 | _test 13 | 14 | # Architecture specific extensions/prefixes 15 | *.[568vq] 16 | [568vq].out 17 | 18 | *.cgo1.go 19 | *.cgo2.c 20 | _cgo_defun.c 21 | _cgo_gotypes.go 22 | _cgo_export.* 23 | 24 | _testmain.go 25 | 26 | *.exe 27 | *.test 28 | *.prof 29 | 30 | *.tar.gz 31 | 32 | src 33 | pkg 34 | 35 | testlog/ 36 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | go: 3 | - 1.7.4 4 | addons: 5 | apt: 6 | packages: 7 | - time 8 | sudo: false 9 | env: 'PD_PROJECT=cronner GOARCH=amd64 GOOS=linux PD_BUILD_NAME="$PD_PROJECT-$GOOS-$GOARCH-$TRAVIS_TAG"' 10 | script: go test -v ./... -check.vv 11 | before_deploy: 12 | - mkdir build 13 | - mkdir $PD_BUILD_NAME 14 | - go build -o $PD_BUILD_NAME/$PD_PROJECT 15 | - tar -czf build/$PD_BUILD_NAME.tar.gz $PD_BUILD_NAME/ 16 | deploy: 17 | provider: releases 18 | skip_cleanup: true 19 | api_key: 20 | secure: Rxly2mwatA2tHz9MscZR/YdU7TR3S0kUeQv1DmKp7x0zlcPMcDtD9hQcRrv5W3JL3FsxUn9hoLQRmYPxgKmHBRBdFjn9s3tqx8wfnWkcaVHvwz3dxYEJQxlQVgl19gGFlL9uXfOt/PR2pMpFxSLdudwyBvCR9cXMWr0Y4MRHfuZtrfb0q1EGaJuhXb6hcp1STrKUgEUiv7fDcQOS6KIiPdQey6hELdidWTCIHgo2C6w+r3z2PG2y7w5s7S4vSFicNSsyV/wRPv3Eelcv2YSddcGrltb+Pd/weKDTfvLLzA3jMbFpqI/XSnr/szrXtrYomQE/9pdBwfgHD9gZ7tcRrTOVVx5bIx7+CbZ4CJdIQtHKdqjFHmXoAjCUJYnSfLMy9c9dZ42RGrmw7P1z3hB5wwLRrd3PXXlmV1YfEJt5deugXHxHxC/a5rEMNEJxPhHL4yaoxL8gO1v02o21C4DCVDalgJ18M2YjX2NCujoAfYls5ib/QCeoZAxHz/zVI80SWvqdSC/vqm4+pbzdw6JriRcUvdFb6owU1mQjloO4fGCJkT3Dhqaa916UKuqjB2ogg9SQpyPvx8VX6q7yCOzV0sk4WZZJfImiys+ifZaJoecVth8jAISdOkIJIm9a9oPy6IOskYQKP51sjhqvS/G5wmxl1Ips1Ik42iUsXpI+7Us= 21 | file: build/$PD_BUILD_NAME.tar.gz 22 | on: 23 | repo: PagerDuty/cronner 24 | tags: true 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2015, PagerDuty Inc. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, 5 | are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | 3. Neither the name of PagerDuty nor the names of its contributors may be used 15 | to endorse or promote products derived from this software without specific 16 | prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL PagerDuty OR CONTRIBUTORS BE LIABLE FOR ANY 22 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 25 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # cronner 2 | [![TravisCI Build Status](https://img.shields.io/travis/PagerDuty/cronner/master.svg?style=flat)](https://travis-ci.org/PagerDuty/cronner) 3 | 4 | ### Deprecation Notice 5 | This repository is deprecated; No further Issues or Pull Requests will be considered or approved. 6 | 7 | Further interest should be directed to the new `cronner` repository; If you'd like to check out the latest version of the source code, report any issues, contribute any improvements, or download the latest release, please do so at: 8 | 9 | * https://github.com/theckman/cronner 10 | 11 | ## Overview 12 | 13 | `cronner` is a command line utility to that wraps periodic (cron) jobs for statistics gathering and success monitoring. The amount of time the command took to ran, as well as the return code, are emitted as vanilla statsd metrics to port 8125. It also implements file-level locking for very simple, and dumb, job semaphore. 14 | 15 | The utility also supports emitting [DogStatsD Events](http://docs.datadoghq.com/guides/dogstatsd/#events) under the following occasions: 16 | 17 | * job start and job finish 18 | * job finish if the job failed 19 | * if the job is taking too long to finish running 20 | 21 | If your statsd agent isn't DogStatsD-compliant, I'm not sure what the behavior will be if you an emit an event to it. 22 | 23 | For the finish DogStatsD event, the return code and output of the command are provided in the event body. If the output is too long, it is truncated. This output can optionally be saved to disk only if the job fails for later inspection. 24 | 25 | ## License 26 | Cronner is released under the BSD 3-Clause License. See the `LICENSE` file for 27 | the full contents of the license. 28 | 29 | ## Usage 30 | ### Help Output 31 | 32 | ``` 33 | Usage: 34 | cronner [OPTIONS] -- command [arguments]... 35 | 36 | Application Options: 37 | -d, --lock-dir= the directory where lock files will be placed (/var/lock) 38 | -e, --event emit a start and end datadog event (false) 39 | -E, --event-fail only emit an event on failure (false) 40 | -F, --log-fail when a command fails, log its full output (stdout/stderr) to the log directory using the UUID as the filename (false) 41 | -G, --event-group= emit a cronner_group: tag with Datadog events, does not get sent with statsd metrics 42 | -k, --lock lock based on label so that multiple commands with the same label can not run concurrently (false) 43 | -l, --label= name for cron job to be used in statsd emissions and DogStatsd events. alphanumeric only; cronner will lowercase it 44 | --log-path= where to place the log files for command output (path for -F/--log-fail output) (/var/log/cronner) 45 | -L, --log-level= set the level at which to log at [none|error|info|debug] (error) 46 | -N, --namespace= namespace for statsd emissions, value is prepended to metric name by statsd client (cronner) 47 | -s, --sensitive specify whether command output may contain sensitive details, this only avoids it being printed to stderr (false) 48 | -V, --version print the version string and exit 49 | -w, --warn-after=N emit a warning event every N seconds if the job hasn't finished, set to 0 to disable (0) 50 | -W, --wait-secs= how long to wait for the file lock for (0) 51 | 52 | Help Options: 53 | -h, --help Show this help message 54 | 55 | Arguments: 56 | -- command [arguments] 57 | ``` 58 | 59 | ### Running A Command 60 | The label (`-l`, `--label`) flag is required. 61 | 62 | To run the command `/bin/sleep 10` and emit the stats as `cronner.sleeptyime.time` and `cronner.sleepytime.exit_code` you would run: 63 | 64 | ``` 65 | $ cronner -l sleepytime -- /bin/sleep 10 66 | ``` 67 | 68 | To note, `--` in the command line arguments tells cronner to stop parsing CLi flags. It then grabs the rest of the arguments as the command to execute. 69 | 70 | If you were to have a UDP listener on port 8125 on localhost, the statsd emissions would look something like this: 71 | 72 | ``` 73 | cronner.sleepytime.time:10005.834649|ms 74 | cronner.sleepytime.exit_code:0|g 75 | ``` 76 | 77 | It emits a timing metric for how long it took for the command to run, as well as the command's exit code. 78 | 79 | ### Running A Command with a DogStatsD Event 80 | If you want to run `/bin/sleep 5` as `sleepytime2` and emit a DogStatsD for when the job starts and finishes: 81 | 82 | ``` 83 | $ cronner -e -l sleepytime2 -- /bin/sleep 5 84 | ``` 85 | 86 | The UDP datagrams emitted would then look like this: 87 | 88 | ``` 89 | _e{35,12}:Cron sleepytime2 starting on rinzler|job starting|k:ab31f2f6-498e-468a-b572-ab990065e8d3|s:cronner|t:info 90 | cronner.sleepytime2.time:5005.649979|ms 91 | cronner.sleepytime2.exit_code:0|g 92 | _e{55,22}:Cron sleepytime2 succeeded in 5.00565 seconds on rinzler|exit code: 0\\noutput:(none)|k:ab31f2f6-498e-468a-b572-ab990065e8d3|s:cronner|t:success 93 | ``` 94 | 95 | ## Contributors 96 | * Tim Heckman 97 | * Thomas Dziedzic 98 | 99 | ## Development 100 | * set up your workspace as per the instructions for standard Go development 101 | * clone the cronner repository 102 | 103 | ```BASH 104 | git clone git@github.com:PagerDuty/cronner.git 105 | ``` 106 | * make your changes to the codebase, including adding relevant test cases 107 | * run your tests to ensure all pass 108 | 109 | ```BASH 110 | go test -v ./... -check.vv 111 | ``` 112 | * confirm that building cronner works 113 | 114 | ```BASH 115 | go build 116 | ``` 117 | -------------------------------------------------------------------------------- /args.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 PagerDuty, Inc, et al. All rights reserved. 2 | // Use of this source code is governed by the BSD 3-Clause 3 | // license that can be found in the LICENSE file. 4 | 5 | package main 6 | 7 | import ( 8 | "fmt" 9 | "os" 10 | "regexp" 11 | "runtime" 12 | "strings" 13 | 14 | "github.com/jessevdk/go-flags" 15 | "github.com/tideland/golib/logger" 16 | ) 17 | 18 | // binArgs is for argument parsing 19 | type binArgs struct { 20 | Cmd string // this is not a command line flag, but rather parsed results 21 | CmdArgs []string // this is not a command line flag, also parsed results 22 | LockDir string `short:"d" long:"lock-dir" default:"/var/lock" description:"the directory where lock files will be placed"` 23 | AllEvents bool `short:"e" long:"event" description:"emit a start and end datadog event"` 24 | StatsdHost string `short:"H" long:"statsd-host" value-name:"" description:"destination host to send datadog metrics"` 25 | FailEvent bool `short:"E" long:"event-fail" description:"only emit an event on failure"` 26 | LogFail bool `short:"F" long:"log-fail" description:"when a command fails, log its full output (stdout/stderr) to the log directory using the UUID as the filename"` 27 | EventGroup string `short:"G" long:"event-group" value-name:"" description:"emit a cronner_group: tag with Datadog events, does not get sent with statsd metrics"` 28 | Group string `short:"g" long:"group" value-name:"" description:"emit a cronner_group: tag with statsd metrics"` 29 | Lock bool `short:"k" long:"lock" description:"lock based on label so that multiple commands with the same label can not run concurrently"` 30 | Label string `short:"l" long:"label" description:"name for cron job to be used in statsd emissions and DogStatsd events. alphanumeric only; cronner will lowercase it"` 31 | LogPath string `long:"log-path" default:"/var/log/cronner" description:"where to place the log files for command output (path for -F/--log-fail output)"` 32 | LogLevel string `short:"L" long:"log-level" default:"error" description:"set the level at which to log at [none|error|info|debug]"` 33 | Namespace string `short:"N" long:"namespace" default:"cronner" description:"namespace for statsd emissions, value is prepended to metric name by statsd client"` 34 | Sensitive bool `short:"s" long:"sensitive" description:"specify whether command output may contain sensitive details, this only avoids it being printed to stderr"` 35 | Version bool `short:"V" long:"version" description:"print the version string and exit"` 36 | WarnAfter uint64 `short:"w" long:"warn-after" default:"0" value-name:"N" description:"emit a warning event every N seconds if the job hasn't finished, set to 0 to disable"` 37 | WaitSeconds uint64 `short:"W" long:"wait-secs" default:"0" description:"how long to wait for the file lock for"` 38 | Args struct { 39 | Command []string `positional-arg-name:"-- command [arguments]"` 40 | } `positional-args:"yes" required:"true"` 41 | } 42 | 43 | var argsLabelRegex = regexp.MustCompile(`^[a-zA-Z0-9_\. ]+$`) 44 | 45 | // parse function configures the go-flags parser and runs it 46 | // it also does some light input validation 47 | // 48 | // the args parameter is meant to be the entirety of os.Args 49 | func (a *binArgs) parse(args []string) (string, error) { 50 | if args == nil { 51 | args = os.Args 52 | } 53 | 54 | p := flags.NewParser(a, flags.HelpFlag|flags.PassDoubleDash) 55 | 56 | _, err := p.ParseArgs(args[1:]) 57 | 58 | // determine if there was a parsing error 59 | // unfortunately, help message is returned as an error 60 | if err != nil { 61 | // determine whether this was a help message by doing a type 62 | // assertion of err to *flags.Error and check the error type 63 | // if it was a help message, do not return an error 64 | if errType, ok := err.(*flags.Error); ok { 65 | if errType.Type == flags.ErrHelp { 66 | return err.Error(), nil 67 | } 68 | } 69 | 70 | return "", err 71 | } 72 | 73 | if a.Version { 74 | out := fmt.Sprintf("cronner v%s built with %s\nCopyright 2017 PagerDuty, Inc.; released under the BSD 3-Clause License\n", Version, runtime.Version()) 75 | return out, nil 76 | } 77 | 78 | if !argsLabelRegex.MatchString(a.Label) { 79 | return "", fmt.Errorf("cron label '%v' is invalid, it can only be alphanumeric with underscores, periods, and spaces", a.Label) 80 | } 81 | 82 | if len(a.Args.Command) == 0 { 83 | return "", fmt.Errorf("you must specify a command to run either using by adding it to the end, or using the command flag") 84 | } 85 | a.Cmd = a.Args.Command[0] 86 | 87 | if len(a.Args.Command) > 1 { 88 | a.CmdArgs = a.Args.Command[1:] 89 | } 90 | 91 | // lowercase the metric and replace spaces with underscores 92 | // to try and encourage sanity 93 | a.Label = strings.Replace(strings.ToLower(a.Label), " ", "_", -1) 94 | 95 | var logLevel logger.LogLevel 96 | 97 | switch strings.ToLower(a.LogLevel) { 98 | case "none": 99 | logLevel = logger.LevelFatal 100 | case "error": 101 | logLevel = logger.LevelError 102 | case "info": 103 | logLevel = logger.LevelInfo 104 | case "debug": 105 | logLevel = logger.LevelDebug 106 | default: 107 | return "", fmt.Errorf("%v is not a known log level, try none, debug, info, or error", a.LogLevel) 108 | } 109 | logger.SetLevel(logLevel) 110 | 111 | return "", nil 112 | } 113 | -------------------------------------------------------------------------------- /args_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 PagerDuty, Inc, et al. All rights reserved. 2 | // Use of this source code is governed by the BSD 3-Clause 3 | // license that can be found in the LICENSE file. 4 | 5 | package main 6 | 7 | import ( 8 | "fmt" 9 | "runtime" 10 | 11 | "github.com/tideland/golib/logger" 12 | 13 | . "gopkg.in/check.v1" 14 | ) 15 | 16 | func (t *TestSuite) Test_binArgs_parse(c *C) { 17 | var output string 18 | var err error 19 | 20 | const Arg0 = "/usr/loca/bin/cronner" 21 | 22 | // 23 | // assert that label is required and is validated 24 | // 25 | args := &binArgs{} 26 | cli := []string{Arg0} 27 | 28 | output, err = args.parse(cli) 29 | c.Assert(err, Not(IsNil)) 30 | c.Check(len(output), Equals, 0) 31 | c.Check(err.Error(), Equals, "cron label '' is invalid, it can only be alphanumeric with underscores, periods, and spaces") 32 | 33 | args = &binArgs{} 34 | cli = []string{ 35 | Arg0, 36 | "-l", "invalid^label", 37 | } 38 | 39 | output, err = args.parse(cli) 40 | c.Assert(err, Not(IsNil)) 41 | c.Check(len(output), Equals, 0) 42 | c.Check(err.Error(), Equals, "cron label 'invalid^label' is invalid, it can only be alphanumeric with underscores, periods, and spaces") 43 | 44 | // 45 | // assert that a command is required 46 | // 47 | args = &binArgs{} 48 | cli = []string{ 49 | Arg0, 50 | "-l", "test", 51 | } 52 | 53 | output, err = args.parse(cli) 54 | c.Assert(err, Not(IsNil)) 55 | c.Check(len(output), Equals, 0) 56 | c.Check(err.Error(), Equals, "you must specify a command to run either using by adding it to the end, or using the command flag") 57 | 58 | // 59 | // assert that version (-v/--version) printing works 60 | // 61 | args = &binArgs{} 62 | cli = []string{ 63 | Arg0, 64 | "-V", 65 | } 66 | 67 | verOut := fmt.Sprintf("cronner v%s built with %s\nCopyright 2017 PagerDuty, Inc.; released under the BSD 3-Clause License\n", Version, runtime.Version()) 68 | 69 | output, err = args.parse(cli) 70 | c.Assert(err, IsNil) 71 | c.Check(output, Equals, verOut) 72 | c.Check(args.Version, Equals, true) 73 | 74 | args = &binArgs{} 75 | cli = []string{ 76 | Arg0, 77 | "--version", 78 | } 79 | 80 | output, err = args.parse(cli) 81 | c.Assert(err, IsNil) 82 | c.Check(output, Equals, verOut) 83 | c.Check(args.Version, Equals, true) 84 | 85 | // 86 | // assert the default values 87 | // 88 | args = &binArgs{} 89 | cli = []string{ 90 | Arg0, 91 | "-l", "test", 92 | "--", "/bin/true", 93 | } 94 | 95 | output, err = args.parse(cli) 96 | c.Assert(err, IsNil) 97 | c.Check(len(output), Equals, 0) 98 | c.Check(args.LockDir, Equals, "/var/lock") 99 | c.Check(args.AllEvents, Equals, false) 100 | c.Check(args.FailEvent, Equals, false) 101 | c.Check(args.LogFail, Equals, false) 102 | c.Check(args.EventGroup, Equals, "") 103 | c.Check(args.Group, Equals, "") 104 | c.Check(args.Lock, Equals, false) 105 | c.Check(args.LogPath, Equals, "/var/log/cronner") 106 | c.Check(args.LogLevel, Equals, "error") 107 | c.Check(args.Namespace, Equals, "cronner") 108 | c.Check(args.Sensitive, Equals, false) 109 | c.Check(args.Version, Equals, false) 110 | c.Check(args.WarnAfter, Equals, uint64(0)) 111 | c.Check(args.WaitSeconds, Equals, uint64(0)) 112 | 113 | // 114 | // assert that the short flags work 115 | // 116 | args = &binArgs{} 117 | cli = []string{ 118 | Arg0, 119 | "-d", "/var/testlock", 120 | "-e", 121 | "-E", 122 | "-F", 123 | "-G", "test_group", 124 | "-g", "metric_group", 125 | "-H", "test_host", 126 | "-k", 127 | "-l", "test", 128 | "-L", "info", 129 | "-N", "testcronner", 130 | "-s", 131 | "-w", "42", 132 | "-W", "84", 133 | "--", "/bin/true", 134 | } 135 | 136 | output, err = args.parse(cli) 137 | c.Assert(err, IsNil) 138 | 139 | // because we're parsing args we've just overridden this in the parser 140 | // so set it back to the value from SetUpSuite() 141 | logger.SetLevel(logger.LevelFatal) 142 | 143 | c.Check(len(output), Equals, 0) 144 | c.Check(args.LockDir, Equals, "/var/testlock") 145 | c.Check(args.AllEvents, Equals, true) 146 | c.Check(args.FailEvent, Equals, true) 147 | c.Check(args.LogFail, Equals, true) 148 | c.Check(args.EventGroup, Equals, "test_group") 149 | c.Check(args.Group, Equals, "metric_group") 150 | c.Check(args.StatsdHost, Equals, "test_host") 151 | c.Check(args.Lock, Equals, true) 152 | c.Check(args.Label, Equals, "test") 153 | c.Check(args.LogLevel, Equals, "info") 154 | c.Check(args.Namespace, Equals, "testcronner") 155 | c.Check(args.Sensitive, Equals, true) 156 | c.Check(args.Version, Equals, false) 157 | c.Check(args.WarnAfter, Equals, uint64(42)) 158 | c.Check(args.WaitSeconds, Equals, uint64(84)) 159 | c.Check(args.Cmd, Equals, "/bin/true") 160 | c.Check(len(args.CmdArgs), Equals, 0) 161 | 162 | // 163 | // assert that long flags work 164 | // 165 | args = &binArgs{} 166 | cli = []string{ 167 | Arg0, 168 | "--lock-dir", "/var/testlock", 169 | "--event", 170 | "--event-fail", 171 | "--log-fail", 172 | "--event-group", "test_group", 173 | "--group", "metric_group", 174 | "--statsd-host", "test_host", 175 | "--lock", 176 | "--label", "test", 177 | "--log-path", "/var/log/testcronner", 178 | "--log-level", "info", 179 | "--namespace", "testcronner", 180 | "--sensitive", 181 | "--warn-after", "42", 182 | "--wait-secs", "84", 183 | "--", "/bin/true", 184 | } 185 | 186 | output, err = args.parse(cli) 187 | c.Assert(err, IsNil) 188 | logger.SetLevel(logger.LevelFatal) 189 | 190 | c.Check(len(output), Equals, 0) 191 | c.Check(args.LockDir, Equals, "/var/testlock") 192 | c.Check(args.AllEvents, Equals, true) 193 | c.Check(args.FailEvent, Equals, true) 194 | c.Check(args.LogFail, Equals, true) 195 | c.Check(args.EventGroup, Equals, "test_group") 196 | c.Check(args.Group, Equals, "metric_group") 197 | c.Check(args.StatsdHost, Equals, "test_host") 198 | c.Check(args.Lock, Equals, true) 199 | c.Check(args.Label, Equals, "test") 200 | c.Check(args.LogPath, Equals, "/var/log/testcronner") 201 | c.Check(args.LogLevel, Equals, "info") 202 | c.Check(args.Namespace, Equals, "testcronner") 203 | c.Check(args.Sensitive, Equals, true) 204 | c.Check(args.Version, Equals, false) 205 | c.Check(args.WarnAfter, Equals, uint64(42)) 206 | c.Check(args.WaitSeconds, Equals, uint64(84)) 207 | c.Check(args.Cmd, Equals, "/bin/true") 208 | c.Check(len(args.CmdArgs), Equals, 0) 209 | 210 | // 211 | // assert that long flags work with --flag=value syntax 212 | // 213 | args = &binArgs{} 214 | cli = []string{ 215 | Arg0, 216 | "--lock-dir=/var/testlock", 217 | "--event-group=test_group", 218 | "--group=metric_group", 219 | "--label=test", 220 | "--log-path=/var/log/testcronner", 221 | "--statsd-host=test_host", 222 | "--log-level=info", 223 | "--namespace=testcronner", 224 | "--warn-after=42", 225 | "--wait-secs=84", 226 | "--", "/bin/true", 227 | } 228 | 229 | output, err = args.parse(cli) 230 | c.Assert(err, IsNil) 231 | logger.SetLevel(logger.LevelFatal) 232 | 233 | c.Check(len(output), Equals, 0) 234 | c.Check(args.LockDir, Equals, "/var/testlock") 235 | c.Check(args.EventGroup, Equals, "test_group") 236 | c.Check(args.Group, Equals, "metric_group") 237 | c.Check(args.Label, Equals, "test") 238 | c.Check(args.LogPath, Equals, "/var/log/testcronner") 239 | c.Check(args.StatsdHost, Equals, "test_host") 240 | c.Check(args.LogLevel, Equals, "info") 241 | c.Check(args.Namespace, Equals, "testcronner") 242 | c.Check(args.WarnAfter, Equals, uint64(42)) 243 | c.Check(args.WaitSeconds, Equals, uint64(84)) 244 | c.Check(args.Cmd, Equals, "/bin/true") 245 | c.Check(len(args.CmdArgs), Equals, 0) 246 | 247 | // 248 | // argument parsing regression tests 249 | // 250 | 251 | // 252 | // parse() function should always discard element 0 in the slice. 253 | // 254 | args = &binArgs{} 255 | cli = []string{ 256 | "--lock-dir=/var/testlock", 257 | "--event-group=test_group", 258 | "--label=test", 259 | "--", "/bin/true", 260 | } 261 | 262 | output, err = args.parse(cli) 263 | c.Assert(err, IsNil) 264 | logger.SetLevel(logger.LevelFatal) 265 | 266 | c.Check(len(output), Equals, 0) 267 | c.Check(args.LockDir, Not(Equals), "/var/testlock") 268 | c.Check(args.EventGroup, Equals, "test_group") 269 | c.Check(args.Label, Equals, "test") 270 | c.Check(args.Cmd, Equals, "/bin/true") 271 | c.Check(len(args.CmdArgs), Equals, 0) 272 | 273 | // 274 | // parse() function should allow spaces in command line arguments 275 | // 276 | args = &binArgs{} 277 | cli = []string{ 278 | Arg0, 279 | "--label=test", 280 | "--", "/bin/true", `some string`, 281 | } 282 | 283 | output, err = args.parse(cli) 284 | c.Assert(err, IsNil) 285 | logger.SetLevel(logger.LevelFatal) 286 | 287 | c.Check(len(output), Equals, 0) 288 | c.Check(args.Label, Equals, "test") 289 | c.Check(args.Cmd, Equals, "/bin/true") 290 | c.Assert(len(args.CmdArgs), Equals, 1) 291 | c.Check(args.CmdArgs[0], Equals, "some string") 292 | } 293 | -------------------------------------------------------------------------------- /cronner.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 PagerDuty, Inc, et al. All rights reserved. 2 | // Use of this source code is governed by the BSD 3-Clause 3 | // license that can be found in the LICENSE file. 4 | 5 | // Package main is the main thing, man. 6 | package main 7 | 8 | import ( 9 | "fmt" 10 | "os" 11 | "os/exec" 12 | 13 | "github.com/PagerDuty/godspeed" 14 | "github.com/codeskyblue/go-uuid" 15 | "github.com/tideland/golib/logger" 16 | ) 17 | 18 | // Version is the program's version string 19 | const Version = "0.2.6" 20 | 21 | type cmdHandler struct { 22 | gs *godspeed.Godspeed 23 | opts *binArgs 24 | cmd *exec.Cmd 25 | uuid string 26 | hostname string 27 | } 28 | 29 | func main() { 30 | logger.SetLogger(logger.NewStandardLogger(os.Stderr)) 31 | 32 | // get and parse the command line options 33 | opts := &binArgs{} 34 | output, err := opts.parse(nil) 35 | 36 | // make sure parsing didn't bomb 37 | if err != nil { 38 | logger.Errorf("error: %v\n", err) 39 | os.Exit(1) 40 | } 41 | 42 | // if parsing had output, print it and exit 0 43 | if len(output) > 0 { 44 | fmt.Print(output) 45 | os.Exit(0) 46 | } 47 | 48 | // build a Godspeed client 49 | var gs *godspeed.Godspeed 50 | if opts.StatsdHost == "" { 51 | gs, err = godspeed.NewDefault() 52 | } else { 53 | gs, err = godspeed.New(opts.StatsdHost, godspeed.DefaultPort, false) 54 | } 55 | 56 | // make sure nothing went wrong with Godspeed 57 | if err != nil { 58 | logger.Errorf("error: %v\n", err) 59 | os.Exit(1) 60 | } 61 | 62 | gs.SetNamespace(opts.Namespace) 63 | 64 | // get the hostname and validate nothing happened 65 | hostname, err := os.Hostname() 66 | 67 | if err != nil { 68 | logger.Errorf("error: %v\n", err) 69 | os.Exit(1) 70 | } 71 | 72 | handler := &cmdHandler{ 73 | opts: opts, 74 | hostname: hostname, 75 | gs: gs, 76 | uuid: uuid.New(), 77 | cmd: exec.Command(opts.Cmd, opts.CmdArgs...), 78 | } 79 | 80 | ret, _, _, err := handleCommand(handler) 81 | 82 | if err != nil { 83 | logger.Errorf(err.Error()) 84 | } 85 | 86 | os.Exit(ret) 87 | } 88 | -------------------------------------------------------------------------------- /cronner_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 PagerDuty, Inc, et al. All rights reserved. 2 | // Use of this source code is governed by the BSD 3-Clause 3 | // license that can be found in the LICENSE file. 4 | 5 | package main 6 | 7 | import ( 8 | "bytes" 9 | "fmt" 10 | "math/rand" 11 | "net" 12 | "path" 13 | "testing" 14 | "time" 15 | 16 | "github.com/PagerDuty/godspeed" 17 | "github.com/codeskyblue/go-uuid" 18 | "github.com/tideland/golib/logger" 19 | . "gopkg.in/check.v1" 20 | ) 21 | 22 | func Test(t *testing.T) { TestingT(t) } 23 | 24 | type TestSuite struct { 25 | l *net.UDPConn 26 | ctrl chan int 27 | out chan []byte 28 | lockFile string 29 | h *cmdHandler 30 | } 31 | 32 | var _ = Suite(&TestSuite{}) 33 | 34 | func (t *TestSuite) SetUpSuite(c *C) { 35 | // suppress application logging 36 | logger.SetLevel(logger.LevelFatal) 37 | 38 | workingDir := c.MkDir() 39 | 40 | t.h = &cmdHandler{ 41 | hostname: "brainbox01", 42 | uuid: uuid.New(), 43 | opts: &binArgs{ 44 | Label: "testCmd", 45 | LogFail: true, 46 | LogPath: workingDir, 47 | LockDir: workingDir, 48 | }, 49 | } 50 | 51 | var err error 52 | 53 | t.h.gs, err = godspeed.NewDefault() 54 | c.Assert(err, IsNil) 55 | t.h.gs.SetNamespace("cronner") 56 | 57 | t.lockFile = path.Join(t.h.opts.LockDir, "cronner-testCmd.lock") 58 | } 59 | 60 | func (t *TestSuite) TearDownSuite(c *C) { 61 | t.h.gs.Conn.Close() 62 | } 63 | 64 | func (t *TestSuite) SetUpTest(c *C) { 65 | t.l, t.ctrl, t.out = buildListener(8125) 66 | 67 | // this goroutine will get cleaned up by the 68 | // TearDownTest function 69 | go listener(t.l, t.ctrl, t.out) 70 | } 71 | 72 | func (t *TestSuite) TearDownTest(c *C) { 73 | close(t.ctrl) 74 | t.l.Close() 75 | 76 | time.Sleep(time.Millisecond * 10) 77 | } 78 | 79 | // 80 | // Cronner testing helper functions 81 | // 82 | var chars = []byte("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789") 83 | 84 | func randString(size int) string { 85 | buf := make([]byte, size) 86 | for i := range buf { 87 | buf[i] = chars[rand.Intn(len(chars))] 88 | } 89 | return string(buf) 90 | } 91 | 92 | func listener(l *net.UDPConn, ctrl <-chan int, c chan<- []byte) { 93 | for { 94 | select { 95 | case _, ok := <-ctrl: 96 | if !ok { 97 | close(c) 98 | return 99 | } 100 | default: 101 | buffer := make([]byte, 8193) 102 | 103 | _, err := l.Read(buffer) 104 | 105 | if err != nil { 106 | continue 107 | } 108 | 109 | c <- bytes.Trim(buffer, "\x00") 110 | } 111 | } 112 | } 113 | 114 | func buildListener(port uint16) (*net.UDPConn, chan int, chan []byte) { 115 | addr, err := net.ResolveUDPAddr("udp", fmt.Sprintf("127.0.0.1:%d", port)) 116 | 117 | if err != nil { 118 | panic(fmt.Sprintf("getting address for test listener failed, bailing out. Here's everything I know: %v", err)) 119 | } 120 | 121 | l, err := net.ListenUDP("udp", addr) 122 | 123 | if err != nil { 124 | panic(fmt.Sprintf("unable to listen for traffic: %v", err)) 125 | } 126 | 127 | return l, make(chan int), make(chan []byte) 128 | } 129 | -------------------------------------------------------------------------------- /runner.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 PagerDuty, Inc, et al. All rights reserved. 2 | // Use of this source code is governed by the BSD 3-Clause 3 | // license that can be found in the LICENSE file. 4 | 5 | package main 6 | 7 | import ( 8 | "bytes" 9 | "fmt" 10 | "os" 11 | "os/exec" 12 | "path" 13 | "regexp" 14 | "syscall" 15 | "time" 16 | 17 | "github.com/theckman/go-flock" 18 | "github.com/tideland/golib/logger" 19 | ) 20 | 21 | const intErrCode = 200 22 | 23 | // MaxBody is the maximum length of a event body 24 | const MaxBody = 4096 25 | 26 | // execCmd is a function to run a command and send 27 | // the error value back through a channel 28 | func execCmd(cmd *exec.Cmd, c chan<- error) { 29 | c <- cmd.Run() 30 | close(c) 31 | } 32 | 33 | // handleCommand is a function that handles the entire process of running a command: 34 | // 35 | // * file-based locking for the command 36 | // * actually running the command 37 | // * timing how long it takes and emitting a metric for it 38 | // * tracking command return codes and emitting a metric for it 39 | // * emitting warning metrics if a command has exceeded its running time 40 | // 41 | // it returns the following: 42 | // 43 | // * (int) return code 44 | // * (float64) run time 45 | func handleCommand(hndlr *cmdHandler) (int, []byte, float64, error) { 46 | if hndlr.opts.AllEvents { 47 | // emit a DD event to indicate we are starting the job 48 | emitEvent(fmt.Sprintf("Cron %v starting on %v", hndlr.opts.Label, hndlr.hostname), fmt.Sprintf("UUID: %v\n", hndlr.uuid), hndlr.opts.Label, "info", hndlr) 49 | } 50 | 51 | // set up the output buffer for the command 52 | var b bytes.Buffer 53 | 54 | // comnbine stdout and stderr to the same buffer 55 | // if we actually plan on using the command output 56 | // otherwise, /dev/null 57 | if hndlr.opts.AllEvents || hndlr.opts.FailEvent || hndlr.opts.LogFail { 58 | hndlr.cmd.Stdout = &b 59 | hndlr.cmd.Stderr = &b 60 | } else { 61 | hndlr.cmd.Stdout = nil 62 | hndlr.cmd.Stderr = nil 63 | } 64 | 65 | // build a new lockFile 66 | lockFile := flock.NewFlock(path.Join(hndlr.opts.LockDir, fmt.Sprintf("cronner-%v.lock", hndlr.opts.Label))) 67 | 68 | var err error 69 | 70 | // grab the lock 71 | if hndlr.opts.Lock { 72 | locked, err := lockFile.TryLock() 73 | 74 | if err != nil { 75 | retErr := fmt.Errorf("failed to obtain lock on '%v': %v", lockFile, err) 76 | return intErrCode, nil, -1, retErr 77 | } 78 | 79 | if !locked && hndlr.opts.WaitSeconds == 0 { 80 | retErr := fmt.Errorf("failed to obtain lock on '%v': locked by another process", lockFile) 81 | return intErrCode, nil, -1, retErr 82 | } else if !locked && hndlr.opts.WaitSeconds > 0 { 83 | tick := time.NewTicker(time.Second * time.Duration(hndlr.opts.WaitSeconds)) 84 | 85 | GotLock: 86 | for { 87 | select { 88 | case _ = <-tick.C: 89 | retErr := fmt.Errorf("timeout exceeded (%ds) waiting for the file lock", hndlr.opts.WaitSeconds) 90 | return intErrCode, nil, -1, retErr 91 | default: 92 | locked, err = lockFile.TryLock() 93 | 94 | if !locked || err != nil { 95 | time.Sleep(time.Second * 1) 96 | continue 97 | } 98 | 99 | break GotLock 100 | } 101 | } 102 | } 103 | } 104 | 105 | var s time.Time 106 | ch := make(chan error) 107 | 108 | // if we have a timer value, do all the extra logic to 109 | // use the ticker to send out warning events 110 | // 111 | // otherwise, KISS 112 | if hndlr.opts.WarnAfter > 0 { 113 | // use time.Tick() instead of time.NewTicker() because 114 | // we don't ever need to run Stop() on this ticker as cronner 115 | // won't live much beyond the command returning 116 | tickChan := time.Tick(time.Second * time.Duration(hndlr.opts.WarnAfter)) 117 | 118 | // get the current (start) time since the UTC epoch 119 | // and run the command 120 | s = time.Now().UTC() 121 | go execCmd(hndlr.cmd, ch) 122 | 123 | // this is an open loop to wait for either the command to return 124 | // or time to be sent over the ticker channel 125 | // 126 | // the WaitLoop label is used to break from the select statement 127 | WaitLoop: 128 | for { 129 | // wait for either the command channel to return an error value 130 | // or wait for the ticket channel to return a time.Time value 131 | select { 132 | case m := <-ch: 133 | // the comand returned; set the error vailue and bail out of here 134 | err = m 135 | break WaitLoop 136 | case _, ok := <-tickChan: 137 | if ok { 138 | runSecs := time.Since(s).Seconds() 139 | title := fmt.Sprintf("Cron %v still running after %d seconds on %v", hndlr.opts.Label, int64(runSecs), hndlr.hostname) 140 | body := fmt.Sprintf("UUID: %v\nrunning for %v seconds", hndlr.uuid, int64(runSecs)) 141 | emitEvent(title, body, hndlr.opts.Label, "warning", hndlr) 142 | } 143 | } 144 | } 145 | } else { 146 | // get the current (start) time since the UTC epoch 147 | // and run the command 148 | s = time.Now().UTC() 149 | go execCmd(hndlr.cmd, ch) 150 | err = <-ch 151 | } 152 | 153 | // This next section computes the wallclock run time in ms. 154 | // However, there is the unfortunate limitation in that 155 | // it uses the clock that gets adjusted by ntpd. Within pure 156 | // Go, we don't have access to CLOCK_MONOTONIC_RAW. 157 | // 158 | // However, based on our usage I don't think we care about it 159 | // being off by a few milliseconds. 160 | wallRtMs := time.Since(s).Seconds() * 1000 161 | 162 | // calculate the return code of the command 163 | // default to return code 0: success 164 | // 165 | // this is being done within the lock because 166 | // even if we fail to remove the lockfile, we still 167 | // need to know what the command did. 168 | var ret int 169 | if err != nil { 170 | if ee, ok := err.(*exec.ExitError); ok { 171 | status := ee.Sys().(syscall.WaitStatus) 172 | ret = status.ExitStatus() 173 | } else { 174 | ret = intErrCode 175 | } 176 | } 177 | 178 | // unlock 179 | if hndlr.opts.Lock { 180 | if lockErr := lockFile.Unlock(); lockErr != nil { 181 | // if the command didn't fail, but unlocking did 182 | // replace the command error with the unlock error 183 | // otherwise just print the error 184 | retErr := fmt.Errorf("failed to unlock: '%v': %v", lockFile, lockErr) 185 | if err == nil { 186 | err = retErr 187 | } else { 188 | logger.Errorf(retErr.Error()) 189 | } 190 | } 191 | } 192 | 193 | // emit the metric for how long it took us and return code 194 | tags := []string{} 195 | 196 | if len(hndlr.opts.Group) > 0 { 197 | tags = append(tags, fmt.Sprintf("cronner_group:%s", hndlr.opts.Group)) 198 | } 199 | 200 | hndlr.gs.Timing(fmt.Sprintf("%v.time", hndlr.opts.Label), wallRtMs, tags) 201 | hndlr.gs.Gauge(fmt.Sprintf("%v.exit_code", hndlr.opts.Label), float64(ret), tags) 202 | 203 | out := b.Bytes() 204 | 205 | // default variables are for success 206 | // we change them later if there was a failure 207 | msg := "succeeded" 208 | alertType := "success" 209 | 210 | // if the command failed change the state variables to their failure values 211 | if err != nil { 212 | msg = "failed" 213 | alertType = "error" 214 | } 215 | 216 | if hndlr.opts.AllEvents || (hndlr.opts.FailEvent && alertType == "error") { 217 | // build the pieces of the completion event 218 | title := fmt.Sprintf("Cron %v %v in %.5f seconds on %v", hndlr.opts.Label, msg, wallRtMs/1000, hndlr.hostname) 219 | 220 | body := fmt.Sprintf("UUID: %v\nexit code: %d\n", hndlr.uuid, ret) 221 | if err != nil { 222 | er := regexp.MustCompile("^exit status ([-]?\\d)") 223 | 224 | // do not show the 'more:' line, if the line is just telling us 225 | // what the exit code is 226 | if !er.MatchString(err.Error()) { 227 | body = fmt.Sprintf("%vmore: %v\n", body, err.Error()) 228 | } 229 | } 230 | 231 | var cmdOutput string 232 | 233 | if len(out) > 0 { 234 | cmdOutput = string(out) 235 | } else { 236 | cmdOutput = "(none)" 237 | } 238 | 239 | body = fmt.Sprintf("%voutput: %v", body, cmdOutput) 240 | 241 | emitEvent(title, body, hndlr.opts.Label, alertType, hndlr) 242 | } 243 | 244 | // this code block is meant to be ran last 245 | if alertType == "error" && hndlr.opts.LogFail { 246 | filename := path.Join(hndlr.opts.LogPath, fmt.Sprintf("%v-%v.out", hndlr.opts.Label, hndlr.uuid)) 247 | if !writeOutput(filename, out, hndlr.opts.Sensitive) { 248 | os.Exit(1) 249 | } 250 | } 251 | 252 | return ret, out, wallRtMs, err 253 | } 254 | 255 | // emit a godspeed (dogstatsd) event 256 | func emitEvent(title, body, label, alertType string, hndlr *cmdHandler) { 257 | var buf bytes.Buffer 258 | 259 | // if the event's body is bigger than MaxBody 260 | if len(body) > MaxBody { 261 | // push the first MaxBody/2 bytes in to the buffer 262 | buf.WriteString(body[0 : MaxBody/2]) 263 | 264 | // add indication of truncated output to the buffer 265 | buf.WriteString("...\n=== OUTPUT TRUNCATED ===\n") 266 | 267 | // add the last 1024 bytes to the buffer 268 | buf.WriteString(body[len(body)-((MaxBody/2)+1) : len(body)-1]) 269 | 270 | body = string(buf.Bytes()) 271 | } 272 | 273 | fields := make(map[string]string) 274 | fields["source_type_name"] = "cronner" 275 | 276 | if len(alertType) > 0 { 277 | fields["alert_type"] = alertType 278 | } 279 | 280 | if len(hndlr.uuid) > 0 { 281 | fields["aggregation_key"] = hndlr.uuid 282 | } 283 | 284 | tags := []string{"source_type:cronner", fmt.Sprintf("cronner_label_name:%v", label)} 285 | 286 | if len(hndlr.opts.EventGroup) > 0 { 287 | tags = append(tags, fmt.Sprintf("cronner_group:%s", hndlr.opts.EventGroup)) 288 | } 289 | 290 | hndlr.gs.Event(title, body, fields, tags) 291 | } 292 | 293 | // bailOut is for failures during logfile writing 294 | func bailOut(out []byte, sensitive bool) bool { 295 | if !sensitive { 296 | fmt.Fprintf(os.Stderr, "here is the output in hopes you are looking here:\n\n%v", string(out)) 297 | os.Exit(1) 298 | } 299 | return false 300 | } 301 | 302 | // writeOutput saves the output (out) to the file specified 303 | func writeOutput(filename string, out []byte, sensitive bool) bool { 304 | // check to see whehter or not the output file already exists 305 | // this should really never happen, but just in case it does... 306 | if _, err := os.Stat(filename); !os.IsNotExist(err) { 307 | fmt.Fprintf(os.Stderr, "flagrant error: output file '%v' already exists\n", filename) 308 | return bailOut(out, sensitive) 309 | } 310 | 311 | outFile, err := os.Create(filename) 312 | 313 | if err != nil { 314 | fmt.Fprintf(os.Stderr, "error opening file to save command output: %v\n", err.Error()) 315 | return bailOut(out, sensitive) 316 | } 317 | 318 | defer outFile.Close() 319 | 320 | if err = outFile.Chmod(0400); err != nil { 321 | fmt.Fprintf(os.Stderr, "error setting permissions (0400) on file '%v': %v\n", filename, err.Error()) 322 | return bailOut(out, sensitive) 323 | } 324 | 325 | nwrt, err := outFile.Write(out) 326 | 327 | if err != nil { 328 | fmt.Fprintf(os.Stderr, "error writing to file '%v': %v\n", filename, err.Error()) 329 | return bailOut(out, sensitive) 330 | } 331 | 332 | if nwrt != len(out) { 333 | fmt.Fprintf(os.Stderr, "error writing to file '%v': number of bytes written not equal to output (total: %d, written: %d)\n", filename, len(out), nwrt) 334 | return bailOut(out, sensitive) 335 | } 336 | 337 | return true 338 | } 339 | -------------------------------------------------------------------------------- /runner_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2015 PagerDuty, Inc, et al. All rights reserved. 2 | // Use of this source code is governed by the BSD 3-Clause 3 | // license that can be found in the LICENSE file. 4 | 5 | package main 6 | 7 | import ( 8 | "fmt" 9 | "io/ioutil" 10 | "os" 11 | "os/exec" 12 | "path" 13 | "regexp" 14 | "runtime" 15 | "strconv" 16 | "time" 17 | 18 | "github.com/theckman/go-flock" 19 | 20 | . "gopkg.in/check.v1" 21 | ) 22 | 23 | func (t *TestSuite) Test_handleCommand(c *C) { 24 | // 25 | // Test a command that finishes in 0.3 seconds 26 | // 27 | t.h.cmd = exec.Command("/usr/bin/time", "-p", "/bin/sleep", "0.3") 28 | 29 | retCode, r, runTime, err := handleCommand(t.h) 30 | c.Assert(err, IsNil) 31 | c.Check(retCode, Equals, 0) 32 | 33 | stat, ok := <-t.out 34 | c.Assert(ok, Equals, true) 35 | 36 | timeStatRegex := regexp.MustCompile("^cronner.testCmd.time:([0-9\\.]+)\\|ms$") 37 | match := timeStatRegex.FindAllStringSubmatch(string(stat), -1) 38 | c.Assert(len(match), Equals, 1) 39 | c.Assert(len(match[0]), Equals, 2) 40 | 41 | statFloat, err := strconv.ParseFloat(match[0][1], 64) 42 | c.Assert(err, IsNil) 43 | c.Check(statFloat, Equals, runTime) 44 | 45 | stat, ok = <-t.out 46 | c.Assert(ok, Equals, true) 47 | 48 | retStatRegex := regexp.MustCompile("^cronner.testCmd.exit_code:([0-9\\.]+)\\|g$") 49 | match = retStatRegex.FindAllStringSubmatch(string(stat), -1) 50 | c.Assert(len(match), Equals, 1) 51 | c.Assert(len(match[0]), Equals, 2) 52 | 53 | retFloat, err := strconv.ParseFloat(match[0][1], 64) 54 | c.Assert(err, IsNil) 55 | c.Check(retFloat, Equals, float64(0)) 56 | 57 | var timely bool 58 | 59 | // assume the command run time will be within 20ms of correct, 60 | // note sure how tight we can make this window without incurring 61 | // false-failures. 62 | if runTime > 300 && runTime < 320 { 63 | timely = true 64 | } 65 | c.Assert(timely, Equals, true) 66 | 67 | timeRegex := regexp.MustCompile("((?m)^real[[:space:]]+([0-9\\.]+)$)") 68 | match = timeRegex.FindAllStringSubmatch(string(r), -1) 69 | c.Assert(len(match), Equals, 1) 70 | c.Assert(len(match[0]), Equals, 3) 71 | c.Check(match[0][2], Equals, "0.30") 72 | 73 | // 74 | // Test a command that finishes in 1 second 75 | // 76 | 77 | // Reset variables used 78 | r = nil 79 | err = nil 80 | runTime = 0 81 | match = nil 82 | timely = false 83 | retCode = -512 84 | timeRegex = nil 85 | 86 | t.h.cmd = exec.Command("/usr/bin/time", "-p", "/bin/sleep", "1") 87 | 88 | retCode, r, runTime, err = handleCommand(t.h) 89 | c.Assert(err, IsNil) 90 | c.Check(retCode, Equals, 0) 91 | 92 | stat, ok = <-t.out 93 | c.Assert(ok, Equals, true) 94 | 95 | match = timeStatRegex.FindAllStringSubmatch(string(stat), -1) 96 | c.Assert(len(match), Equals, 1) 97 | c.Assert(len(match[0]), Equals, 2) 98 | 99 | statFloat, err = strconv.ParseFloat(match[0][1], 64) 100 | c.Assert(err, IsNil) 101 | c.Check(statFloat, Equals, runTime) 102 | 103 | if runTime > 1000 && runTime < 1020 { 104 | timely = true 105 | } 106 | c.Check(timely, Equals, true) 107 | 108 | timeRegex = regexp.MustCompile("((?m)^real[[:space:]]+([0-9\\.]+)$)") 109 | match = timeRegex.FindAllStringSubmatch(string(r), -1) 110 | c.Assert(len(match), Equals, 1) 111 | c.Assert(len(match[0]), Equals, 3) 112 | c.Check(match[0][2], Equals, "1.00") 113 | 114 | stat, ok = <-t.out 115 | c.Assert(ok, Equals, true) 116 | 117 | match = retStatRegex.FindAllStringSubmatch(string(stat), -1) 118 | c.Assert(len(match), Equals, 1) 119 | c.Assert(len(match[0]), Equals, 2) 120 | 121 | retFloat, err = strconv.ParseFloat(match[0][1], 64) 122 | c.Assert(err, IsNil) 123 | c.Check(retFloat, Equals, float64(0)) 124 | 125 | // 126 | // Test a valid return code is given 127 | // 128 | 129 | // Reset variables used 130 | r = nil 131 | err = nil 132 | runTime = 0 133 | match = nil 134 | retCode = -512 135 | 136 | switch runtime.GOOS { 137 | case "linux": 138 | t.h.cmd = exec.Command("/bin/false") 139 | case "darwin": 140 | t.h.cmd = exec.Command("/usr/bin/false") 141 | } 142 | 143 | retCode, r, runTime, err = handleCommand(t.h) 144 | c.Assert(err, Not(IsNil)) 145 | c.Check(retCode, Equals, 1) 146 | 147 | _, ok = <-t.out 148 | c.Assert(ok, Equals, true) 149 | 150 | stat, ok = <-t.out 151 | c.Assert(ok, Equals, true) 152 | 153 | match = retStatRegex.FindAllStringSubmatch(string(stat), -1) 154 | c.Assert(len(match), Equals, 1) 155 | c.Assert(len(match[0]), Equals, 2) 156 | 157 | retFloat, err = strconv.ParseFloat(match[0][1], 64) 158 | c.Assert(err, IsNil) 159 | c.Check(retFloat, Equals, float64(1)) 160 | 161 | // 162 | // Test that DD events work 163 | // 164 | 165 | // Reset variables used 166 | r = nil 167 | err = nil 168 | runTime = 0 169 | match = nil 170 | retCode = -512 171 | 172 | t.h.cmd = exec.Command("/bin/echo", "somevalue") 173 | t.h.opts.AllEvents = true 174 | 175 | retCode, r, runTime, err = handleCommand(t.h) 176 | c.Assert(err, IsNil) 177 | 178 | stat, ok = <-t.out 179 | c.Assert(ok, Equals, true) 180 | c.Check( 181 | string(stat), 182 | Equals, 183 | fmt.Sprintf(`_e{35,44}:Cron testCmd starting on brainbox01|UUID: %v\n|k:%v|s:cronner|t:info|#source_type:cronner,cronner_label_name:testCmd`, t.h.uuid, t.h.uuid), 184 | ) 185 | 186 | stat, ok = <-t.out 187 | c.Assert(ok, Equals, true) 188 | match = timeStatRegex.FindAllStringSubmatch(string(stat), -1) 189 | c.Assert(len(match), Equals, 1) 190 | c.Assert(len(match[0]), Equals, 2) 191 | c.Check(strconv.FormatFloat(runTime, 'f', -1, 64), Equals, match[0][1]) 192 | 193 | stat, ok = <-t.out 194 | c.Assert(ok, Equals, true) 195 | c.Check(string(stat), Equals, "cronner.testCmd.exit_code:0|g") 196 | 197 | stat, ok = <-t.out 198 | c.Assert(ok, Equals, true) 199 | c.Check( 200 | string(stat), 201 | Equals, 202 | fmt.Sprintf(`_e{55,77}:Cron testCmd succeeded in %.5f seconds on brainbox01|UUID: %v\nexit code: 0\noutput: somevalue\n|k:%v|s:cronner|t:success|#source_type:cronner,cronner_label_name:testCmd`, runTime/1000, t.h.uuid, t.h.uuid), 203 | ) 204 | 205 | // 206 | // Test that DD events contain the cronner_group tag 207 | // 208 | 209 | // Reset variables used 210 | r = nil 211 | err = nil 212 | runTime = 0 213 | match = nil 214 | 215 | t.h.cmd = exec.Command("/bin/echo", "somevalue") 216 | t.h.opts.EventGroup = "testgroup" 217 | 218 | _, r, runTime, err = handleCommand(t.h) 219 | c.Assert(err, IsNil) 220 | 221 | stat, ok = <-t.out 222 | c.Assert(ok, Equals, true) 223 | c.Check( 224 | string(stat), 225 | Equals, 226 | fmt.Sprintf(`_e{35,44}:Cron testCmd starting on brainbox01|UUID: %v\n|k:%v|s:cronner|t:info|#source_type:cronner,cronner_label_name:testCmd,cronner_group:testgroup`, t.h.uuid, t.h.uuid), 227 | ) 228 | 229 | stat, ok = <-t.out 230 | c.Assert(ok, Equals, true) 231 | match = timeStatRegex.FindAllStringSubmatch(string(stat), -1) 232 | c.Assert(len(match), Equals, 1) 233 | c.Assert(len(match[0]), Equals, 2) 234 | c.Check(strconv.FormatFloat(runTime, 'f', -1, 64), Equals, match[0][1]) 235 | 236 | stat, ok = <-t.out 237 | c.Assert(ok, Equals, true) 238 | c.Check(string(stat), Equals, "cronner.testCmd.exit_code:0|g") 239 | 240 | stat, ok = <-t.out 241 | c.Assert(ok, Equals, true) 242 | c.Check( 243 | string(stat), 244 | Equals, 245 | fmt.Sprintf(`_e{55,77}:Cron testCmd succeeded in %.5f seconds on brainbox01|UUID: %v\nexit code: 0\noutput: somevalue\n|k:%v|s:cronner|t:success|#source_type:cronner,cronner_label_name:testCmd,cronner_group:testgroup`, runTime/1000, t.h.uuid, t.h.uuid), 246 | ) 247 | 248 | // 249 | // Test that DD metrics contain the cronner_group tag 250 | // 251 | 252 | // Reset variables used 253 | r = nil 254 | err = nil 255 | runTime = 0 256 | match = nil 257 | 258 | t.h.cmd = exec.Command("/bin/echo", "somevalue") 259 | t.h.opts.Group = "metricgroup" 260 | t.h.opts.EventGroup = "" 261 | 262 | _, r, runTime, err = handleCommand(t.h) 263 | c.Assert(err, IsNil) 264 | 265 | stat, ok = <-t.out 266 | c.Assert(ok, Equals, true) 267 | c.Check( 268 | string(stat), 269 | Equals, 270 | fmt.Sprintf(`_e{35,44}:Cron testCmd starting on brainbox01|UUID: %v\n|k:%v|s:cronner|t:info|#source_type:cronner,cronner_label_name:testCmd`, t.h.uuid, t.h.uuid), 271 | ) 272 | 273 | stat, ok = <-t.out 274 | c.Assert(ok, Equals, true) 275 | timeStatTagRegex := regexp.MustCompile("^cronner.testCmd.time:([0-9\\.]+)\\|ms\\|#cronner_group:([a-z]+)$") 276 | match = timeStatTagRegex.FindAllStringSubmatch(string(stat), -1) 277 | c.Assert(len(match), Equals, 1) 278 | c.Assert(len(match[0]), Equals, 3) 279 | c.Check(strconv.FormatFloat(runTime, 'f', -1, 64), Equals, match[0][1]) 280 | c.Check("metricgroup", Equals, match[0][2]) 281 | 282 | stat, ok = <-t.out 283 | c.Assert(ok, Equals, true) 284 | c.Check(string(stat), Equals, "cronner.testCmd.exit_code:0|g|#cronner_group:metricgroup") 285 | 286 | stat, ok = <-t.out 287 | c.Assert(ok, Equals, true) 288 | c.Check( 289 | string(stat), 290 | Equals, 291 | fmt.Sprintf(`_e{55,77}:Cron testCmd succeeded in %.5f seconds on brainbox01|UUID: %v\nexit code: 0\noutput: somevalue\n|k:%v|s:cronner|t:success|#source_type:cronner,cronner_label_name:testCmd`, runTime/1000, t.h.uuid, t.h.uuid), 292 | ) 293 | 294 | // 295 | // Test that no output is given 296 | // 297 | 298 | // Reset variables used 299 | r = nil 300 | err = nil 301 | runTime = 0 302 | match = nil 303 | 304 | t.h.cmd = exec.Command("/bin/echo", "something") 305 | t.h.opts.EventGroup = "" 306 | t.h.opts.Group = "" 307 | 308 | t.h.opts.LogFail = false 309 | t.h.opts.Lock = true 310 | t.h.opts.AllEvents = false 311 | 312 | retCode, r, _, err = handleCommand(t.h) 313 | c.Assert(err, IsNil) 314 | c.Check(retCode, Equals, 0) 315 | c.Check(len(r), Equals, 0) 316 | 317 | // clear the statsd return channel 318 | _, ok = <-t.out 319 | c.Assert(ok, Equals, true) 320 | _, ok = <-t.out 321 | c.Assert(ok, Equals, true) 322 | 323 | // 324 | // Test that locking fails properly when unable to acquire lock 325 | // 326 | 327 | // Reset variables used 328 | err = nil 329 | retCode = -512 330 | 331 | lf := flock.NewFlock(t.lockFile) 332 | c.Assert(lf, Not(IsNil)) 333 | 334 | locked, err := lf.TryLock() 335 | c.Assert(err, IsNil) 336 | c.Assert(locked, Equals, true) 337 | 338 | retCode, _, _, err = handleCommand(t.h) 339 | c.Assert(err, Not(IsNil)) 340 | c.Check(err.Error(), Equals, fmt.Sprintf("failed to obtain lock on '%v': locked by another process", t.lockFile)) 341 | c.Check(retCode, Equals, 200) 342 | 343 | // 344 | // Test that locking succeeds with a timeout 345 | // 346 | 347 | // Reset variables used 348 | err = nil 349 | retCode = -512 350 | 351 | t.h.opts.WaitSeconds = 5 352 | t.h.cmd = exec.Command("/bin/echo", "something") 353 | 354 | go func() { 355 | time.Sleep(time.Second * 3) 356 | lf.Unlock() 357 | }() 358 | 359 | retCode, _, _, err = handleCommand(t.h) 360 | c.Assert(err, IsNil) 361 | c.Check(retCode, Equals, 0) 362 | 363 | // clear the statsd return channel 364 | _, ok = <-t.out 365 | c.Assert(ok, Equals, true) 366 | _, ok = <-t.out 367 | c.Assert(ok, Equals, true) 368 | 369 | // 370 | // Test that locking fails when exceeding the timeout 371 | // 372 | 373 | // Reset variables used 374 | err = nil 375 | retCode = -512 376 | 377 | t.h.opts.WaitSeconds = 1 378 | t.h.cmd = exec.Command("/bin/echo", "something") 379 | 380 | locked, err = lf.TryLock() 381 | c.Assert(err, IsNil) 382 | c.Assert(locked, Equals, true) 383 | 384 | go func() { 385 | time.Sleep(time.Second * 3) 386 | lf.Unlock() 387 | }() 388 | 389 | retCode, _, _, err = handleCommand(t.h) 390 | c.Assert(err, Not(IsNil)) 391 | c.Check(err.Error(), Equals, "timeout exceeded (1s) waiting for the file lock") 392 | c.Check(retCode, Equals, 200) 393 | 394 | // 395 | // Test that warning Dogstatsd events are emitted if a 396 | // command is taking too long to run 397 | // 398 | 399 | // Reset variables used 400 | err = nil 401 | retCode = -512 402 | 403 | t.h.opts.Lock = false 404 | t.h.opts.WarnAfter = 2 405 | 406 | t.h.cmd = exec.Command("/bin/sleep", "3") 407 | 408 | retCode, r, runTime, err = handleCommand(t.h) 409 | c.Assert(err, IsNil) 410 | c.Assert(retCode, Equals, 0) 411 | c.Check(len(r), Equals, 0) 412 | 413 | // clear the statsd return channel 414 | stat, ok = <-t.out 415 | c.Assert(ok, Equals, true) 416 | c.Check( 417 | string(stat), 418 | Equals, 419 | fmt.Sprintf(`_e{56,65}:Cron testCmd still running after 2 seconds on brainbox01|UUID: %v\nrunning for 2 seconds|k:%v|s:cronner|t:warning|#source_type:cronner,cronner_label_name:testCmd`, t.h.uuid, t.h.uuid), 420 | ) 421 | 422 | stat, ok = <-t.out 423 | c.Assert(ok, Equals, true) 424 | 425 | match = timeStatRegex.FindAllStringSubmatch(string(stat), -1) 426 | c.Assert(len(match), Equals, 1) 427 | c.Assert(len(match[0]), Equals, 2) 428 | 429 | statFloat, err = strconv.ParseFloat(match[0][1], 64) 430 | c.Assert(err, IsNil) 431 | c.Check(statFloat, Equals, runTime) 432 | 433 | stat, ok = <-t.out 434 | c.Assert(ok, Equals, true) 435 | 436 | match = retStatRegex.FindAllStringSubmatch(string(stat), -1) 437 | c.Assert(len(match), Equals, 1) 438 | c.Assert(len(match[0]), Equals, 2) 439 | 440 | retFloat, err = strconv.ParseFloat(match[0][1], 64) 441 | c.Assert(err, IsNil) 442 | c.Check(retFloat, Equals, float64(0)) 443 | } 444 | 445 | func (t *TestSuite) Test_emitEvent(c *C) { 446 | title := "TE" 447 | body := "B" 448 | label := "urmom" 449 | alertType := "info" 450 | t.h.opts.EventGroup = "testing" 451 | 452 | emitEvent(title, body, label, alertType, t.h) 453 | 454 | event, ok := <-t.out 455 | c.Assert(ok, Equals, true) 456 | 457 | eventStub := fmt.Sprintf("_e{%d,%d}:%v|%v|k:%v|s:cronner|t:%v|#source_type:cronner,cronner_label_name:urmom,cronner_group:testing", len(title), len(body), title, body, t.h.uuid, alertType) 458 | eventStr := string(event) 459 | 460 | c.Check(eventStr, Equals, eventStub) 461 | 462 | // 463 | // Test truncation 464 | // 465 | 466 | // generate a body that will be truncated 467 | body = randString(4100) 468 | title = "TE2" 469 | label = "awwyiss" 470 | alertType = "success" 471 | t.h.opts.EventGroup = "" 472 | 473 | emitEvent(title, body, label, alertType, t.h) 474 | 475 | event, ok = <-t.out 476 | c.Assert(ok, Equals, true) 477 | 478 | // simulate truncation and addition of the truncation messsage 479 | truncatedBody := fmt.Sprintf("%v...\\n=== OUTPUT TRUNCATED ===\\n%v", body[0:MaxBody/2], body[len(body)-((MaxBody/2)+1):len(body)-1]) 480 | 481 | eventStub = fmt.Sprintf("_e{%d,%d}:%v|%v|k:%v|s:cronner|t:%v|#source_type:cronner,cronner_label_name:awwyiss", len(title), len(truncatedBody), title, truncatedBody, t.h.uuid, alertType) 482 | eventStr = string(event) 483 | 484 | c.Check(eventStr, Equals, eventStub) 485 | } 486 | 487 | func (t *TestSuite) Test_writeOutput(c *C) { 488 | tmpDir, err := ioutil.TempDir("/tmp", "cronner_test") 489 | c.Assert(err, IsNil) 490 | 491 | defer os.RemoveAll(tmpDir) 492 | 493 | filename := path.Join(tmpDir, fmt.Sprintf("outfile-%v.out", randString(8))) 494 | out := []byte("this is a test!") 495 | 496 | ok := writeOutput(filename, out, false) 497 | c.Assert(ok, Equals, true) 498 | 499 | stat, err := os.Stat(filename) 500 | c.Assert(err, IsNil) 501 | c.Check(stat.Mode(), Equals, os.FileMode(0400)) 502 | 503 | file, err := os.Open(filename) 504 | c.Assert(err, IsNil) 505 | 506 | contents, err := ioutil.ReadAll(file) 507 | c.Assert(err, IsNil) 508 | c.Check(string(out), Equals, string(contents)) 509 | } 510 | --------------------------------------------------------------------------------