├── .gitignore ├── .travis.yml ├── History.md ├── Makefile ├── Readme.md ├── deps ├── commander.c ├── commander.h ├── ms.c └── ms.h ├── example ├── on_error.sh └── program.sh ├── package.json └── src └── mon.c /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | mon.log 3 | mon 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | 3 | compiler: 4 | - clang 5 | - gcc 6 | 7 | install: make && sudo make install 8 | 9 | script: /usr/local/bin/mon -h 10 | -------------------------------------------------------------------------------- /History.md: -------------------------------------------------------------------------------- 1 | 2 | 1.2.3 / 2013-12-01 3 | ================== 4 | 5 | * improve -S to be position independent of -p 6 | * improve compatibility with BSD make. 7 | 8 | 1.2.2 / 2013-06-10 9 | ================== 10 | 11 | * fix waiting for child to exit on graceful shutdown 12 | 13 | 1.2.1 / 2013-05-31 14 | ================== 15 | 16 | * fix: only invoke error command when specified 17 | * update commander 18 | 19 | 1.2.0 / 2012-12-17 20 | ================== 21 | 22 | * add passing of PID to the error scripts 23 | 24 | 1.1.2 / 2012-12-13 25 | ================== 26 | 27 | * update commander 28 | 29 | 1.1.1 / 2012-11-12 30 | ================== 31 | 32 | * change SIGKILL to sig 33 | 34 | 1.1.0 / 2012-11-09 35 | ================== 36 | 37 | * add `--on-error ` 38 | * add `--on-restart ` 39 | * add `--attempts ` 40 | * add restart limiting / bailing logic 41 | * change signalling of child to pgid to fix `dash` support 42 | 43 | 1.0.1 / 2012-11-07 44 | ================== 45 | 46 | * fix child exit, reset signals 47 | * change graceful_exit to kill() with the same sig 48 | 49 | 1.0.0 / 2012-11-07 50 | ================== 51 | 52 | * add --on-error 53 | * remove pid log 54 | * update commander 55 | 56 | 0.0.3 / 2012-06-15 57 | ================== 58 | 59 | * Refactor with commander 60 | 61 | 0.0.2 / 2012-05-02 62 | ================== 63 | 64 | * Added: error when an unrecognized flag is passed 65 | * Added -m, --mon-pidfile. Closes #4 66 | * Added ms dep 67 | 68 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PREFIX ?= /usr/local 2 | SRC = src/mon.c deps/ms.c deps/commander.c 3 | OBJ = $(SRC:.c=.o) 4 | CFLAGS = -D_GNU_SOURCE -std=c99 -I deps/ 5 | 6 | mon: $(OBJ) 7 | $(CC) $(OBJ) -o $@ 8 | 9 | .SUFFIXES: .c .o 10 | .c.o: 11 | $(CC) $< $(CFLAGS) -c -o $@ 12 | 13 | install: mon 14 | cp -f mon $(PREFIX)/bin/mon 15 | 16 | uninstall: 17 | rm -f $(PREFIX)/bin/mon 18 | 19 | clean: 20 | rm -f mon $(OBJ) 21 | 22 | .PHONY: clean install uninstall -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # mon(1) 2 | 3 | Super-simple monitoring program. 4 | 5 | `mon` spawned from the needlessly complex 6 | frustration that tools like [monit](http://mmonit.com/monit/) 7 | provide, with their awkward DSLs and setup. `mon` is written 8 | in C, uses less than 400kb of memory, and is incredibly simple 9 | to set up. 10 | 11 | ## Installation 12 | 13 | ``` 14 | $ make install 15 | ``` 16 | 17 | Too lazy to clone?: 18 | 19 | ``` 20 | $ (mkdir /tmp/mon && cd /tmp/mon && curl -L# https://github.com/tj/mon/archive/master.tar.gz | tar zx --strip 1 && make install && rm -rf /tmp/mon) 21 | ``` 22 | 23 | ## Usage 24 | 25 | ``` 26 | 27 | Usage: mon [options] 28 | 29 | Options: 30 | 31 | -V, --version output program version 32 | -h, --help output help information 33 | -l, --log specify logfile [mon.log] 34 | -s, --sleep sleep seconds before re-executing [1] 35 | -S, --status check status of --pidfile 36 | -p, --pidfile write pid to 37 | -m, --mon-pidfile write mon(1) pid to 38 | -P, --prefix add a log prefix 39 | -d, --daemonize daemonize the program 40 | -a, --attempts retry attempts within 60 seconds [10] 41 | -R, --on-restart execute on restarts 42 | -E, --on-error execute on error 43 | 44 | ``` 45 | 46 | ## Example 47 | 48 | The most simple use of `mon(1)` is to simply keep a command running: 49 | 50 | ```js 51 | $ mon ./myprogram 52 | mon : pid 50395 53 | mon : child 50396 54 | mon : sh -c "./example/program.sh" 55 | one 56 | two 57 | three 58 | ``` 59 | 60 | You may daemonize mon and disassociate from the term with `-d`: 61 | 62 | ```js 63 | $ mon ./myprogram -d 64 | mon : pid 50413 65 | ``` 66 | 67 | ## Failure alerts 68 | 69 | `mon(1)` will continue to attempt restarting your program unless the maximum number 70 | of `--attempts` has been exceeded within 60 seconds. Each time a restart is performed 71 | the `--on-restart` command is executed, and when `mon(1)` finally bails the `--on-error` 72 | command is then executed before mon itself exits and gives up. 73 | 74 | For example the following will echo "hey" three times before mon realizes that 75 | the program is unstable, since it's exiting immediately, thus finally invoking 76 | `./email.sh`, or any other script you like. 77 | 78 | ```js 79 | mon "echo hey" --attempts 3 --on-error ./email.sh 80 | mon : child 48386 81 | mon : sh -c "echo hey" 82 | hey 83 | mon : last restart less than one second ago 84 | mon : 3 attempts remaining 85 | mon : child 48387 86 | mon : sh -c "echo hey" 87 | hey 88 | mon : last restart less than one second ago 89 | mon : 2 attempts remaining 90 | mon : child 48388 91 | mon : sh -c "echo hey" 92 | hey 93 | mon : last restart less than one second ago 94 | mon : 1 attempts remaining 95 | mon : 3 restarts within less than one second, bailing 96 | mon : on error `sh test.sh` 97 | emailed failure notice to tobi@ferret-land.com 98 | mon : bye :) 99 | ``` 100 | 101 | __NOTE__: The process id is passed as an argument to both `--on-error` and `--on-restart` scripts. 102 | 103 | ## Managing several mon(1) processes 104 | 105 | `mon(1)` is designed to monitor a single program only, this means a few things, 106 | firstly that a single `mon(1)` may crash and it will not influence other programs, 107 | secondly that the "configuration" for `mon(1)` is simply a shell script, 108 | no need for funky weird inflexible DSLs. 109 | 110 | ```bash 111 | #!/usr/bin/env bash 112 | 113 | pids="/var/run" 114 | app="/www/example.com" 115 | 116 | mon -d redis-server -p $pids/redis.pid 117 | mon -d "node $app/app" -p $pids/app-0.pid 118 | mon -d "node $app/jobs" -p $pids/jobs-0.pid 119 | mon -d "node $app/jobs" -p $pids/jobs-1.pid 120 | mon -d "node $app/jobs" -p $pids/jobs-2.pid 121 | mon -d "node $app/image" -p $pids/image-0.pid 122 | mon -d "node $app/image" -p $pids/image-1.pid 123 | mon -d "node $app/image-broker" -p $pids/image-broker.pid 124 | ``` 125 | 126 | I highly recommend checking out jgallen23's [mongroup(1)](https://github.com/jgallen23/mongroup), 127 | which provides a great interface for managing any number of `mon(1)` instances. 128 | 129 | ## Logs 130 | 131 | By default `mon(1)` logs to stdio, however when daemonized it will default 132 | to writing a log file named `./mon.log`. If you have several instances you may 133 | wish to `--prefix` the log lines, or specify separate files. 134 | 135 | ## Signals 136 | 137 | - __SIGQUIT__ graceful shutdown 138 | - __SIGTERM__ graceful shutdown 139 | 140 | ## Links 141 | 142 | Tools built with `mon(1)`: 143 | 144 | - [mongroup(1)](https://github.com/jgallen23/mongroup) - monitor a group of processes (shell script) 145 | - [node-mongroup](https://github.com/visionmedia/node-mongroup) - node implementation of mongroup(1) 146 | 147 | # License 148 | 149 | MIT 150 | 151 | # Build Status 152 | 153 | [![Build Status](https://travis-ci.org/visionmedia/mon.png)](http://travis-ci.org/visionmedia/mon) 154 | -------------------------------------------------------------------------------- /deps/commander.c: -------------------------------------------------------------------------------- 1 | 2 | // 3 | // commander.c 4 | // 5 | // Copyright (c) 2012 TJ Holowaychuk 6 | // 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include "commander.h" 13 | 14 | /* 15 | * Output error and exit. 16 | */ 17 | 18 | static void 19 | error(char *msg) { 20 | fprintf(stderr, "%s\n", msg); 21 | exit(1); 22 | } 23 | 24 | /* 25 | * Output command version. 26 | */ 27 | 28 | static void 29 | command_version(command_t *self) { 30 | printf("%s\n", self->version); 31 | exit(0); 32 | } 33 | 34 | /* 35 | * Output command help. 36 | */ 37 | 38 | void 39 | command_help(command_t *self) { 40 | printf("\n"); 41 | printf(" Usage: %s %s\n", self->name, self->usage); 42 | printf("\n"); 43 | printf(" Options:\n"); 44 | printf("\n"); 45 | for (int i = 0; i < self->option_count; ++i) { 46 | command_option_t *option = &self->options[i]; 47 | printf(" %s, %-25s %s\n" 48 | , option->small 49 | , option->large_with_arg 50 | , option->description); 51 | } 52 | printf("\n"); 53 | exit(0); 54 | } 55 | 56 | /* 57 | * Initialize with program `name` and `version`. 58 | */ 59 | 60 | void 61 | command_init(command_t *self, const char *name, const char *version) { 62 | self->arg = NULL; 63 | self->name = name; 64 | self->version = version; 65 | self->option_count = self->argc = 0; 66 | self->usage = "[options]"; 67 | self->nargv = NULL; 68 | command_option(self, "-V", "--version", "output program version", command_version); 69 | command_option(self, "-h", "--help", "output help information", command_help); 70 | } 71 | 72 | /* 73 | * Free up commander after use. 74 | */ 75 | 76 | void 77 | command_free(command_t *self) { 78 | for (int i = 0; i < self->option_count; ++i) { 79 | command_option_t *option = &self->options[i]; 80 | free(option->argname); 81 | free(option->large); 82 | } 83 | 84 | if (self->nargv) { 85 | for (int i = 0; self->nargv[i]; ++i) { 86 | free(self->nargv[i]); 87 | } 88 | free(self->nargv); 89 | } 90 | } 91 | 92 | /* 93 | * Parse argname from `str`. For example 94 | * Take "--required " and populate `flag` 95 | * with "--required" and `arg` with "". 96 | */ 97 | 98 | static void 99 | parse_argname(const char *str, char *flag, char *arg) { 100 | int buffer = 0; 101 | size_t flagpos = 0; 102 | size_t argpos = 0; 103 | size_t len = strlen(str); 104 | 105 | for (int i = 0; i < len; ++i) { 106 | if (buffer || '[' == str[i] || '<' == str[i]) { 107 | buffer = 1; 108 | arg[argpos++] = str[i]; 109 | } else { 110 | if (' ' == str[i]) continue; 111 | flag[flagpos++] = str[i]; 112 | } 113 | } 114 | 115 | arg[argpos] = '\0'; 116 | flag[flagpos] = '\0'; 117 | } 118 | 119 | /* 120 | * Normalize the argument vector by exploding 121 | * multiple options (if any). For example 122 | * "foo -abc --scm git" -> "foo -a -b -c --scm git" 123 | */ 124 | 125 | static char ** 126 | normalize_args(int *argc, char **argv) { 127 | int size = 0; 128 | int alloc = *argc + 1; 129 | char **nargv = malloc(alloc * sizeof(char *)); 130 | 131 | for (int i = 0; argv[i]; ++i) { 132 | const char *arg = argv[i]; 133 | int len = strlen(arg); 134 | 135 | // short flag 136 | if (len > 2 && '-' == arg[0] && !strchr(arg + 1, '-')) { 137 | alloc += len - 2; 138 | nargv = realloc(nargv, alloc * sizeof(char *)); 139 | for (int j = 1; j < len; ++j) { 140 | nargv[size] = malloc(3); 141 | sprintf(nargv[size], "-%c", arg[j]); 142 | size++; 143 | } 144 | continue; 145 | } 146 | 147 | // regular arg 148 | nargv[size] = malloc(len + 1); 149 | strcpy(nargv[size], arg); 150 | size++; 151 | } 152 | 153 | nargv[size] = NULL; 154 | *argc = size; 155 | return nargv; 156 | } 157 | 158 | /* 159 | * Define an option. 160 | */ 161 | 162 | void 163 | command_option(command_t *self, const char *small, const char *large, const char *desc, command_callback_t cb) { 164 | int n = self->option_count++; 165 | if (n == COMMANDER_MAX_OPTIONS) error("Maximum option definitions exceeded"); 166 | command_option_t *option = &self->options[n]; 167 | option->cb = cb; 168 | option->small = small; 169 | option->description = desc; 170 | option->required_arg = option->optional_arg = 0; 171 | option->large_with_arg = large; 172 | option->argname = malloc(strlen(large) + 1); 173 | assert(option->argname); 174 | option->large = malloc(strlen(large) + 1); 175 | assert(option->large); 176 | parse_argname(large, option->large, option->argname); 177 | if ('[' == option->argname[0]) option->optional_arg = 1; 178 | if ('<' == option->argname[0]) option->required_arg = 1; 179 | } 180 | 181 | /* 182 | * Parse `argv` (internal). 183 | * Input arguments should be normalized first 184 | * see `normalize_args`. 185 | */ 186 | 187 | static void 188 | command_parse_args(command_t *self, int argc, char **argv) { 189 | int literal = 0; 190 | 191 | for (int i = 1; i < argc; ++i) { 192 | const char *arg = argv[i]; 193 | for (int j = 0; j < self->option_count; ++j) { 194 | command_option_t *option = &self->options[j]; 195 | 196 | // match flag 197 | if (!strcmp(arg, option->small) || !strcmp(arg, option->large)) { 198 | self->arg = NULL; 199 | 200 | // required 201 | if (option->required_arg) { 202 | arg = argv[++i]; 203 | if (!arg || '-' == arg[0]) { 204 | fprintf(stderr, "%s %s argument required\n", option->large, option->argname); 205 | exit(1); 206 | } 207 | self->arg = arg; 208 | } 209 | 210 | // optional 211 | if (option->optional_arg) { 212 | if (argv[i + 1] && '-' != argv[i + 1][0]) { 213 | self->arg = argv[++i]; 214 | } 215 | } 216 | 217 | // invoke callback 218 | option->cb(self); 219 | goto match; 220 | } 221 | } 222 | 223 | // -- 224 | if ('-' == arg[0] && '-' == arg[1] && 0 == arg[2]) { 225 | literal = 1; 226 | goto match; 227 | } 228 | 229 | // unrecognized 230 | if ('-' == arg[0] && !literal) { 231 | fprintf(stderr, "unrecognized flag %s\n", arg); 232 | exit(1); 233 | } 234 | 235 | int n = self->argc++; 236 | if (n == COMMANDER_MAX_ARGS) error("Maximum number of arguments exceeded"); 237 | self->argv[n] = (char *) arg; 238 | match:; 239 | } 240 | } 241 | 242 | /* 243 | * Parse `argv` (public). 244 | */ 245 | 246 | void 247 | command_parse(command_t *self, int argc, char **argv) { 248 | self->nargv = normalize_args(&argc, argv); 249 | command_parse_args(self, argc, self->nargv); 250 | } 251 | -------------------------------------------------------------------------------- /deps/commander.h: -------------------------------------------------------------------------------- 1 | 2 | // 3 | // commander.h 4 | // 5 | // Copyright (c) 2012 TJ Holowaychuk 6 | // 7 | 8 | #ifndef COMMANDER_H 9 | #define COMMANDER_H 10 | 11 | /* 12 | * Max options that can be defined. 13 | */ 14 | 15 | #ifndef COMMANDER_MAX_OPTIONS 16 | #define COMMANDER_MAX_OPTIONS 32 17 | #endif 18 | 19 | /* 20 | * Max arguments that can be passed. 21 | */ 22 | 23 | #ifndef COMMANDER_MAX_ARGS 24 | #define COMMANDER_MAX_ARGS 32 25 | #endif 26 | 27 | /* 28 | * Command struct. 29 | */ 30 | 31 | struct command; 32 | 33 | /* 34 | * Option callback. 35 | */ 36 | 37 | typedef void (* command_callback_t)(struct command *self); 38 | 39 | /* 40 | * Command option. 41 | */ 42 | 43 | typedef struct { 44 | int optional_arg; 45 | int required_arg; 46 | char *argname; 47 | char *large; 48 | const char *small; 49 | const char *large_with_arg; 50 | const char *description; 51 | command_callback_t cb; 52 | } command_option_t; 53 | 54 | /* 55 | * Command. 56 | */ 57 | 58 | typedef struct command { 59 | void *data; 60 | const char *usage; 61 | const char *arg; 62 | const char *name; 63 | const char *version; 64 | int option_count; 65 | command_option_t options[COMMANDER_MAX_OPTIONS]; 66 | int argc; 67 | char *argv[COMMANDER_MAX_ARGS]; 68 | char **nargv; 69 | } command_t; 70 | 71 | // prototypes 72 | 73 | void 74 | command_init(command_t *self, const char *name, const char *version); 75 | 76 | void 77 | command_free(command_t *self); 78 | 79 | void 80 | command_help(command_t *self); 81 | 82 | void 83 | command_option(command_t *self, const char *small, const char *large, const char *desc, command_callback_t cb); 84 | 85 | void 86 | command_parse(command_t *self, int argc, char **argv); 87 | 88 | #endif /* COMMANDER_H */ 89 | -------------------------------------------------------------------------------- /deps/ms.c: -------------------------------------------------------------------------------- 1 | 2 | // 3 | // ms.c 4 | // 5 | // Copyright (c) 2012 TJ Holowaychuk 6 | // 7 | 8 | #include 9 | #include 10 | #include 11 | #include "ms.h" 12 | 13 | // microseconds 14 | 15 | #define US_MSEC (long long)1000 16 | #define US_SEC 1000 * US_MSEC 17 | #define US_MIN 60 * US_SEC 18 | #define US_HOUR 60 * US_MIN 19 | #define US_DAY 24 * US_HOUR 20 | #define US_WEEK 7 * US_DAY 21 | #define US_YEAR 52 * US_WEEK 22 | 23 | // milliseconds 24 | 25 | #define MS_SEC (long long)1000 26 | #define MS_MIN 60 * MS_SEC 27 | #define MS_HOUR 60 * MS_MIN 28 | #define MS_DAY 24 * MS_HOUR 29 | #define MS_WEEK 7 * MS_DAY 30 | #define MS_YEAR 52 * MS_WEEK 31 | 32 | /* 33 | * Convert the given `str` representation to microseconds, 34 | * for example "10ms", "5s", "2m", "1h" etc. 35 | */ 36 | 37 | long long 38 | string_to_microseconds(const char *str) { 39 | size_t len = strlen(str); 40 | long long val = strtoll(str, NULL, 10); 41 | if (!val) return -1; 42 | switch (str[len - 1]) { 43 | case 's': return 'm' == str[len - 2] ? val * 1000 : val * US_SEC; 44 | case 'm': return val * US_MIN; 45 | case 'h': return val * US_HOUR; 46 | case 'd': return val * US_DAY; 47 | case 'w': return val * US_WEEK; 48 | case 'y': return val * US_YEAR; 49 | default: return val; 50 | } 51 | } 52 | 53 | /* 54 | * Convert the given `str` representation to milliseconds, 55 | * for example "10ms", "5s", "2m", "1h" etc. 56 | */ 57 | 58 | long long 59 | string_to_milliseconds(const char *str) { 60 | size_t len = strlen(str); 61 | long long val = strtoll(str, NULL, 10); 62 | if (!val) return -1; 63 | switch (str[len - 1]) { 64 | case 's': return 'm' == str[len - 2] ? val : val * 1000; 65 | case 'm': return val * MS_MIN; 66 | case 'h': return val * MS_HOUR; 67 | case 'd': return val * MS_DAY; 68 | case 'w': return val * MS_WEEK; 69 | case 'y': return val * MS_YEAR; 70 | default: return val; 71 | } 72 | } 73 | 74 | /* 75 | * Convert the given `str` representation to seconds. 76 | */ 77 | 78 | long long 79 | string_to_seconds(const char *str) { 80 | long long ret = string_to_milliseconds(str); 81 | if (-1 == ret) return ret; 82 | return ret / 1000; 83 | } 84 | 85 | /* 86 | * Convert the given `ms` to a string. This 87 | * value must be `free()`d by the developer. 88 | */ 89 | 90 | char * 91 | milliseconds_to_string(long long ms) { 92 | char *str = malloc(MS_MAX); 93 | if (!str) return NULL; 94 | long div = 1; 95 | char *fmt; 96 | 97 | if (ms < MS_SEC) fmt = "%lldms"; 98 | else if (ms < MS_MIN) { fmt = "%llds"; div = MS_SEC; } 99 | else if (ms < MS_HOUR) { fmt = "%lldm"; div = MS_MIN; } 100 | else if (ms < MS_DAY) { fmt = "%lldh"; div = MS_HOUR; } 101 | else if (ms < MS_WEEK) { fmt = "%lldd"; div = MS_DAY; } 102 | else if (ms < MS_YEAR) { fmt = "%lldw"; div = MS_WEEK; } 103 | else { fmt = "%lldy"; div = MS_YEAR; } 104 | snprintf(str, MS_MAX, fmt, ms / div); 105 | 106 | return str; 107 | } 108 | 109 | /* 110 | * Convert the given `ms` to a long string. This 111 | * value must be `free()`d by the developer. 112 | */ 113 | 114 | char * 115 | milliseconds_to_long_string(long long ms) { 116 | long div; 117 | char *name; 118 | 119 | char *str = malloc(MS_MAX); 120 | if (!str) return NULL; 121 | 122 | if (ms < MS_SEC) { 123 | sprintf(str, "less than one second"); 124 | return str; 125 | } 126 | 127 | if (ms < MS_MIN) { name = "second"; div = MS_SEC; } 128 | else if (ms < MS_HOUR) { name = "minute"; div = MS_MIN; } 129 | else if (ms < MS_DAY) { name = "hour"; div = MS_HOUR; } 130 | else if (ms < MS_WEEK) { name = "day"; div = MS_DAY; } 131 | else if (ms < MS_YEAR) { name = "week"; div = MS_WEEK; } 132 | else { name = "year"; div = MS_YEAR; } 133 | 134 | long long val = ms / div; 135 | char *fmt = 1 == val 136 | ? "%lld %s" 137 | : "%lld %ss"; 138 | 139 | snprintf(str, MS_MAX, fmt, val, name); 140 | return str; 141 | } 142 | 143 | // tests 144 | 145 | #ifdef TEST_MS 146 | 147 | #include 148 | 149 | void 150 | equal(char *a, char *b) { 151 | if (strcmp(a, b)) { 152 | printf("expected: %s\n", a); 153 | printf("actual: %s\n", b); 154 | exit(1); 155 | } 156 | } 157 | 158 | void 159 | test_string_to_microseconds() { 160 | assert(string_to_microseconds("") == -1); 161 | assert(string_to_microseconds("s") == -1); 162 | assert(string_to_microseconds("hey") == -1); 163 | assert(string_to_microseconds("5000") == 5000); 164 | assert(string_to_microseconds("1ms") == 1000); 165 | assert(string_to_microseconds("5ms") == 5000); 166 | assert(string_to_microseconds("1s") == 1000000); 167 | assert(string_to_microseconds("5s") == 5000000); 168 | assert(string_to_microseconds("1m") == 60000000); 169 | assert(string_to_microseconds("1h") == 3600000000); 170 | assert(string_to_microseconds("2d") == 2 * 24 * 3600000000); 171 | } 172 | 173 | void 174 | test_string_to_milliseconds() { 175 | assert(string_to_milliseconds("") == -1); 176 | assert(string_to_milliseconds("s") == -1); 177 | assert(string_to_milliseconds("hey") == -1); 178 | assert(string_to_milliseconds("5000") == 5000); 179 | assert(string_to_milliseconds("1ms") == 1); 180 | assert(string_to_milliseconds("5ms") == 5); 181 | assert(string_to_milliseconds("1s") == 1000); 182 | assert(string_to_milliseconds("5s") == 5000); 183 | assert(string_to_milliseconds("1m") == 60 * 1000); 184 | assert(string_to_milliseconds("1h") == 60 * 60 * 1000); 185 | assert(string_to_milliseconds("1d") == 24 * 60 * 60 * 1000); 186 | } 187 | 188 | void 189 | test_string_to_seconds() { 190 | assert(string_to_seconds("") == -1); 191 | assert(string_to_seconds("s") == -1); 192 | assert(string_to_seconds("hey") == -1); 193 | assert(string_to_seconds("5000") == 5); 194 | assert(string_to_seconds("1ms") == 0); 195 | assert(string_to_seconds("5ms") == 0); 196 | assert(string_to_seconds("1s") == 1); 197 | assert(string_to_seconds("5s") == 5); 198 | assert(string_to_seconds("1m") == 60); 199 | assert(string_to_seconds("1h") == 60 * 60); 200 | assert(string_to_seconds("1d") == 24 * 60 * 60); 201 | } 202 | 203 | void 204 | test_milliseconds_to_string() { 205 | equal("500ms", milliseconds_to_string(500)); 206 | equal("5s", milliseconds_to_string(5000)); 207 | equal("2s", milliseconds_to_string(2500)); 208 | equal("1m", milliseconds_to_string(MS_MIN)); 209 | equal("5m", milliseconds_to_string(5 * MS_MIN)); 210 | equal("1h", milliseconds_to_string(MS_HOUR)); 211 | equal("2d", milliseconds_to_string(2 * MS_DAY)); 212 | equal("2w", milliseconds_to_string(15 * MS_DAY)); 213 | equal("3y", milliseconds_to_string(3 * MS_YEAR)); 214 | } 215 | 216 | void 217 | test_milliseconds_to_long_string() { 218 | equal("less than one second", milliseconds_to_long_string(500)); 219 | equal("5 seconds", milliseconds_to_long_string(5000)); 220 | equal("2 seconds", milliseconds_to_long_string(2500)); 221 | equal("1 minute", milliseconds_to_long_string(MS_MIN)); 222 | equal("5 minutes", milliseconds_to_long_string(5 * MS_MIN)); 223 | equal("1 hour", milliseconds_to_long_string(MS_HOUR)); 224 | equal("2 days", milliseconds_to_long_string(2 * MS_DAY)); 225 | equal("2 weeks", milliseconds_to_long_string(15 * MS_DAY)); 226 | equal("1 year", milliseconds_to_long_string(MS_YEAR)); 227 | equal("3 years", milliseconds_to_long_string(3 * MS_YEAR)); 228 | } 229 | 230 | int 231 | main(){ 232 | test_string_to_microseconds(); 233 | test_string_to_milliseconds(); 234 | test_string_to_seconds(); 235 | test_milliseconds_to_string(); 236 | test_milliseconds_to_long_string(); 237 | printf("\n \e[32m\u2713 \e[90mok\e[0m\n\n"); 238 | return 0; 239 | } 240 | 241 | #endif 242 | -------------------------------------------------------------------------------- /deps/ms.h: -------------------------------------------------------------------------------- 1 | 2 | // 3 | // ms.h 4 | // 5 | // Copyright (c) 2012 TJ Holowaychuk 6 | // 7 | 8 | #ifndef MS 9 | #define MS 10 | 11 | // max buffer length 12 | 13 | #ifndef MS_MAX 14 | #define MS_MAX 256 15 | #endif 16 | 17 | // prototypes 18 | 19 | long long 20 | string_to_microseconds(const char *str); 21 | 22 | long long 23 | string_to_milliseconds(const char *str); 24 | 25 | long long 26 | string_to_seconds(const char *str); 27 | 28 | char * 29 | milliseconds_to_string(long long ms); 30 | 31 | char * 32 | milliseconds_to_long_string(long long ms); 33 | 34 | #endif -------------------------------------------------------------------------------- /example/on_error.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # faux email / alert 4 | pid=$1 5 | echo "process $pid broke!" > email 6 | -------------------------------------------------------------------------------- /example/program.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # try: 4 | # mon ./example/program.sh 5 | # mon -d ./example/program.sh 6 | # mon -d ./example/program.sh --on-error ./example/on_error.sh 7 | 8 | echo one 9 | sleep 2 10 | echo two 11 | sleep 2 12 | echo three 13 | sleep 2 14 | echo exiting 15 | exit 1 -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mon", 3 | "version": "1.2.2", 4 | "repo": "visionmedia/mon", 5 | "description": "Simple process monitoring", 6 | "keywords": ["process", "monitoring", "monitor", "availability"], 7 | "license": "MIT", 8 | "install": "make install" 9 | } 10 | -------------------------------------------------------------------------------- /src/mon.c: -------------------------------------------------------------------------------- 1 | 2 | // 3 | // mon.c 4 | // 5 | // Copyright (c) 2012 TJ Holowaychuk 6 | // 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include "commander.h" 22 | #include "ms.h" 23 | 24 | /* 25 | * Program version. 26 | */ 27 | 28 | #define VERSION "1.2.3" 29 | 30 | /* 31 | * Log prefix. 32 | */ 33 | 34 | static const char *prefix = NULL; 35 | 36 | /* 37 | * Monitor. 38 | */ 39 | 40 | typedef struct { 41 | const char *pidfile; 42 | const char *mon_pidfile; 43 | const char *logfile; 44 | const char *on_error; 45 | const char *on_restart; 46 | int64_t last_restart_at; 47 | int64_t clock; 48 | int daemon; 49 | int sleepsec; 50 | int max_attempts; 51 | int attempts; 52 | bool show_status; 53 | } monitor_t; 54 | 55 | /* 56 | * Monitor instance. 57 | */ 58 | 59 | static monitor_t monitor; 60 | 61 | /* 62 | * Logger. 63 | */ 64 | 65 | #define log(fmt, args...) \ 66 | do { \ 67 | if (prefix) { \ 68 | printf("mon : %s : " fmt "\n", prefix, ##args); \ 69 | fflush(stdout); \ 70 | } else { \ 71 | printf("mon : " fmt "\n", ##args); \ 72 | fflush(stdout); \ 73 | } \ 74 | } while(0) 75 | 76 | /* 77 | * Output error `msg`. 78 | */ 79 | 80 | void 81 | error(char *msg) { 82 | fprintf(stderr, "Error: %s\n", msg); 83 | exit(1); 84 | } 85 | 86 | /* 87 | * Check if process of `pid` is alive. 88 | */ 89 | 90 | int 91 | alive(pid_t pid) { 92 | return 0 == kill(pid, 0); 93 | } 94 | 95 | /* 96 | * Return a timestamp in milliseconds. 97 | */ 98 | 99 | int64_t 100 | timestamp() { 101 | struct timeval tv; 102 | int ret = gettimeofday(&tv, NULL); 103 | if (-1 == ret) return -1; 104 | return (int64_t) ((int64_t) tv.tv_sec * 1000 + (int64_t) tv.tv_usec / 1000); 105 | } 106 | 107 | /* 108 | * Write `pid` to `file`. 109 | */ 110 | 111 | void 112 | write_pidfile(const char *file, pid_t pid) { 113 | char buf[32] = {0}; 114 | snprintf(buf, 32, "%d", pid); 115 | int fd = open(file, O_WRONLY | O_CREAT, S_IRUSR | S_IWUSR); 116 | if (fd < 0) perror("open()"); 117 | write(fd, buf, 32); 118 | close(fd); 119 | } 120 | 121 | /* 122 | * Read pid `file`. 123 | */ 124 | 125 | pid_t 126 | read_pidfile(const char *file) { 127 | off_t size; 128 | struct stat s; 129 | 130 | // stat 131 | if (stat(file, &s) < 0) { 132 | perror("stat()"); 133 | exit(1); 134 | } 135 | 136 | size = s.st_size; 137 | 138 | // opens 139 | int fd = open(file, O_RDONLY, 0); 140 | if (fd < 0) { 141 | perror("open()"); 142 | exit(1); 143 | } 144 | 145 | // read 146 | char buf[size]; 147 | if (size != read(fd, buf, size)) { 148 | perror("read()"); 149 | exit(1); 150 | } 151 | 152 | return atoi(buf); 153 | } 154 | 155 | /* 156 | * Output status of `pidfile`. 157 | */ 158 | 159 | void 160 | show_status_of(const char *pidfile) { 161 | off_t size; 162 | struct stat s; 163 | 164 | // stat 165 | if (stat(pidfile, &s) < 0) { 166 | perror("stat()"); 167 | exit(1); 168 | } 169 | 170 | size = s.st_size; 171 | 172 | // opens 173 | int fd = open(pidfile, O_RDONLY, 0); 174 | if (fd < 0) { 175 | perror("open()"); 176 | exit(1); 177 | } 178 | 179 | // read 180 | char buf[size]; 181 | if (size != read(fd, buf, size)) { 182 | perror("read()"); 183 | exit(1); 184 | } 185 | 186 | // uptime 187 | time_t modified = s.st_mtime; 188 | 189 | struct timeval t; 190 | gettimeofday(&t, NULL); 191 | time_t now = t.tv_sec; 192 | time_t secs = now - modified; 193 | 194 | // status 195 | pid_t pid = atoi(buf); 196 | 197 | if (alive(pid)) { 198 | char *str = milliseconds_to_long_string(secs * 1000); 199 | printf("\e[90m%d\e[0m : \e[32malive\e[0m : uptime %s\e[m\n", pid, str); 200 | free(str); 201 | } else { 202 | printf("\e[90m%d\e[0m : \e[31mdead\e[0m\n", pid); 203 | } 204 | 205 | close(fd); 206 | } 207 | 208 | /* 209 | * Redirect stdio to `file`. 210 | */ 211 | 212 | void 213 | redirect_stdio_to(const char *file) { 214 | int logfd = open(file, O_WRONLY | O_CREAT | O_APPEND, 0755); 215 | int nullfd = open("/dev/null", O_RDONLY, 0); 216 | 217 | if (-1 == logfd) { 218 | perror("open()"); 219 | exit(1); 220 | } 221 | 222 | if (-1 == nullfd) { 223 | perror("open()"); 224 | exit(1); 225 | } 226 | 227 | dup2(nullfd, 0); 228 | dup2(logfd, 1); 229 | dup2(logfd, 2); 230 | } 231 | 232 | /* 233 | * Graceful exit, signal process group. 234 | */ 235 | 236 | void 237 | graceful_exit(int sig) { 238 | int status; 239 | pid_t pid = getpid(); 240 | log("shutting down"); 241 | log("kill(-%d, %d)", pid, sig); 242 | kill(-pid, sig); 243 | log("waiting for exit"); 244 | waitpid(read_pidfile(monitor.pidfile), &status, 0); 245 | log("bye :)"); 246 | exit(0); 247 | } 248 | 249 | /* 250 | * Daemonize the program. 251 | */ 252 | 253 | void 254 | daemonize() { 255 | if (fork()) exit(0); 256 | 257 | if (setsid() < 0) { 258 | perror("setsid()"); 259 | exit(1); 260 | } 261 | } 262 | 263 | /* 264 | * Invoke the --on-restart command. 265 | */ 266 | 267 | void 268 | exec_restart_command(monitor_t *monitor, pid_t pid) { 269 | char buf[1024] = {0}; 270 | snprintf(buf, 1024, "%s %d", monitor->on_restart, pid); 271 | log("on restart `%s`", buf); 272 | int status = system(buf); 273 | if (status) log("exit(%d)", status); 274 | } 275 | 276 | /* 277 | * Invoke the --on-error command. 278 | */ 279 | 280 | void 281 | exec_error_command(monitor_t *monitor, pid_t pid) { 282 | char buf[1024] = {0}; 283 | snprintf(buf, 1024, "%s %d", monitor->on_error, pid); 284 | log("on error `%s`", buf); 285 | int status = system(buf); 286 | if (status) log("exit(%d)", status); 287 | } 288 | 289 | /* 290 | * Return the ms since the last restart. 291 | */ 292 | 293 | int64_t 294 | ms_since_last_restart(monitor_t *monitor) { 295 | if (0 == monitor->last_restart_at) return 0; 296 | int64_t now = timestamp(); 297 | return now - monitor->last_restart_at; 298 | } 299 | 300 | /* 301 | * Check if the maximum restarts within 60 seconds 302 | * have been exceeded and return 1, 0 otherwise. 303 | */ 304 | 305 | int 306 | attempts_exceeded(monitor_t *monitor, int64_t ms) { 307 | monitor->attempts++; 308 | monitor->clock -= ms; 309 | 310 | // reset 311 | if (monitor->clock <= 0) { 312 | monitor->clock = 60000; 313 | monitor->attempts = 0; 314 | return 0; 315 | } 316 | 317 | // all good 318 | if (monitor->attempts < monitor->max_attempts) return 0; 319 | 320 | return 1; 321 | } 322 | 323 | /* 324 | * Monitor the given `cmd`. 325 | */ 326 | 327 | void 328 | start(const char *cmd, monitor_t *monitor) { 329 | exec: { 330 | pid_t pid = fork(); 331 | int status; 332 | 333 | switch (pid) { 334 | case -1: 335 | perror("fork()"); 336 | exit(1); 337 | case 0: 338 | signal(SIGTERM, SIG_DFL); 339 | signal(SIGQUIT, SIG_DFL); 340 | log("sh -c \"%s\"", cmd); 341 | execl("/bin/sh", "sh", "-c", cmd, 0); 342 | perror("execl()"); 343 | exit(1); 344 | default: 345 | log("child %d", pid); 346 | 347 | // write pidfile 348 | if (monitor->pidfile) { 349 | log("write pid to %s", monitor->pidfile); 350 | write_pidfile(monitor->pidfile, pid); 351 | } 352 | 353 | // wait for exit 354 | waitpid(pid, &status, 0); 355 | 356 | // signalled 357 | if (WIFSIGNALED(status)) { 358 | log("signal(%s)", strsignal(WTERMSIG(status))); 359 | log("sleep(%d)", monitor->sleepsec); 360 | sleep(monitor->sleepsec); 361 | goto error; 362 | } 363 | 364 | // check status 365 | if (WEXITSTATUS(status)) { 366 | log("exit(%d)", WEXITSTATUS(status)); 367 | log("sleep(%d)", monitor->sleepsec); 368 | sleep(monitor->sleepsec); 369 | goto error; 370 | } 371 | 372 | // restart 373 | error: { 374 | if (monitor->on_restart) exec_restart_command(monitor, pid); 375 | int64_t ms = ms_since_last_restart(monitor); 376 | monitor->last_restart_at = timestamp(); 377 | log("last restart %s ago", milliseconds_to_long_string(ms)); 378 | log("%d attempts remaining", monitor->max_attempts - monitor->attempts); 379 | 380 | if (attempts_exceeded(monitor, ms)) { 381 | char *time = milliseconds_to_long_string(60000 - monitor->clock); 382 | log("%d restarts within %s, bailing", monitor->max_attempts, time); 383 | if (monitor->on_error) exec_error_command(monitor, pid); 384 | log("bye :)"); 385 | exit(2); 386 | } 387 | 388 | goto exec; 389 | } 390 | } 391 | } 392 | } 393 | 394 | /* 395 | * --log 396 | */ 397 | 398 | static void 399 | on_log(command_t *self) { 400 | monitor_t *monitor = (monitor_t *) self->data; 401 | monitor->logfile = self->arg; 402 | } 403 | 404 | /* 405 | * --sleep 406 | */ 407 | 408 | static void 409 | on_sleep(command_t *self) { 410 | monitor_t *monitor = (monitor_t *) self->data; 411 | monitor->sleepsec = atoi(self->arg); 412 | } 413 | 414 | /* 415 | * --daemonize 416 | */ 417 | 418 | static void 419 | on_daemonize(command_t *self) { 420 | monitor_t *monitor = (monitor_t *) self->data; 421 | monitor->daemon = 1; 422 | } 423 | 424 | /* 425 | * --pidfile 426 | */ 427 | 428 | static void 429 | on_pidfile(command_t *self) { 430 | monitor_t *monitor = (monitor_t *) self->data; 431 | monitor->pidfile = self->arg; 432 | } 433 | 434 | /* 435 | * --mon-pidfile 436 | */ 437 | 438 | static void 439 | on_mon_pidfile(command_t *self) { 440 | monitor_t *monitor = (monitor_t *) self->data; 441 | monitor->mon_pidfile = self->arg; 442 | } 443 | 444 | /* 445 | * --status 446 | */ 447 | 448 | static void 449 | on_status(command_t *self) { 450 | monitor_t *monitor = (monitor_t *) self->data; 451 | monitor->show_status = true; 452 | } 453 | 454 | /* 455 | * --prefix 456 | */ 457 | 458 | static void 459 | on_prefix(command_t *self) { 460 | prefix = self->arg; 461 | } 462 | 463 | /* 464 | * --on-restart 465 | */ 466 | 467 | static void 468 | on_restart(command_t *self) { 469 | monitor_t *monitor = (monitor_t *) self->data; 470 | monitor->on_restart = self->arg; 471 | } 472 | 473 | /* 474 | * --on-error 475 | */ 476 | 477 | static void 478 | on_error(command_t *self) { 479 | monitor_t *monitor = (monitor_t *) self->data; 480 | monitor->on_error = self->arg; 481 | } 482 | 483 | /* 484 | * --attempts 485 | */ 486 | 487 | static void 488 | on_attempts(command_t *self) { 489 | monitor_t *monitor = (monitor_t *) self->data; 490 | monitor->max_attempts = atoi(self->arg); 491 | } 492 | 493 | /* 494 | * [options] 495 | */ 496 | 497 | int 498 | main(int argc, char **argv){ 499 | monitor.pidfile = NULL; 500 | monitor.mon_pidfile = NULL; 501 | monitor.on_restart = NULL; 502 | monitor.on_error = NULL; 503 | monitor.logfile = "mon.log"; 504 | monitor.daemon = 0; 505 | monitor.sleepsec = 1; 506 | monitor.max_attempts = 10; 507 | monitor.attempts = 0; 508 | monitor.last_restart_at = 0; 509 | monitor.clock = 60000; 510 | monitor.show_status = false; 511 | 512 | command_t program; 513 | command_init(&program, "mon", VERSION); 514 | program.data = &monitor; 515 | program.usage = "[options] "; 516 | command_option(&program, "-l", "--log ", "specify logfile [mon.log]", on_log); 517 | command_option(&program, "-s", "--sleep ", "sleep seconds before re-executing [1]", on_sleep); 518 | command_option(&program, "-S", "--status", "check status of --pidfile", on_status); 519 | command_option(&program, "-p", "--pidfile ", "write pid to ", on_pidfile); 520 | command_option(&program, "-m", "--mon-pidfile ", "write mon(1) pid to ", on_mon_pidfile); 521 | command_option(&program, "-P", "--prefix ", "add a log prefix", on_prefix); 522 | command_option(&program, "-d", "--daemonize", "daemonize the program", on_daemonize); 523 | command_option(&program, "-a", "--attempts ", "retry attempts within 60 seconds [10]", on_attempts); 524 | command_option(&program, "-R", "--on-restart ", "execute on restarts", on_restart); 525 | command_option(&program, "-E", "--on-error ", "execute on error", on_error); 526 | command_parse(&program, argc, argv); 527 | 528 | if (monitor.show_status) { 529 | if (!monitor.pidfile) error("--pidfile required"); 530 | show_status_of(monitor.pidfile); 531 | exit(0); 532 | } 533 | 534 | // command required 535 | if (!program.argc) error(" required"); 536 | const char *cmd = program.argv[0]; 537 | 538 | // signals 539 | signal(SIGTERM, graceful_exit); 540 | signal(SIGQUIT, graceful_exit); 541 | 542 | // daemonize 543 | if (monitor.daemon) { 544 | daemonize(); 545 | redirect_stdio_to(monitor.logfile); 546 | } 547 | 548 | // write mon pidfile 549 | if (monitor.mon_pidfile) { 550 | log("write mon pid to %s", monitor.mon_pidfile); 551 | write_pidfile(monitor.mon_pidfile, getpid()); 552 | } 553 | 554 | start(cmd, &monitor); 555 | 556 | return 0; 557 | } 558 | --------------------------------------------------------------------------------