├── LICENSE ├── README.md ├── cmd └── ncore.js ├── examples ├── example-auto.js ├── example-explicit.js └── example-loop.js ├── lib ├── panic.js └── subr.js └── package.json /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013, Joyent, Inc. All rights reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so, 8 | subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 15 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 16 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ``` 2 | ________ __ __________ .__ 3 | \______ \ ____ ____ | _/ |_ \______ \_____ ____ |__| ____ 4 | | | \ / _ \ / \ \ __\ | ___/\__ \ / \| |/ ___\ 5 | | ` ( <_> ) | \ | | | | / __ \| | \ \ \___ 6 | /_______ /\____/|___| / |__| |____| (____ /___| /__|\___ > 7 | \/ \/ \/ \/ \/ 8 | ``` 9 | 10 | NOTE 11 | ==== 12 | 13 | This module is no longer under active development. On illumos-based systems, 14 | consider using native core files using MDB, which provides a much richer 15 | debugging environment. For details, see: 16 | 17 | http://dtrace.org/blogs/dap/2012/01/13/playing-with-nodev8-postmortem-debugging/ 18 | 19 | 20 | node-panic 21 | =============== 22 | 23 | This module provides a primitive postmortem debugging facility for Node.js. 24 | Postmortem debugging is critical for root-causing issues that occur in 25 | production from the artifacts of a single failure. Without such a facility, 26 | tracking down problems in production becomes a tedious process of adding 27 | logging, trying to reproduce the problem, and repeating until enough information 28 | is gathered to root-cause the issue. For reproducible problems, this process is 29 | merely painful for developers, administrators, and customers alike. For 30 | unreproducible problems, this is untenable. 31 | 32 | The basic idea of this implementation is to maintain a global object that 33 | references all of the internal state we would want for postmortem debugging. 34 | Then when our application crashes, we dump this state to a file, and then exit. 35 | 36 | 37 | The basics 38 | ---------- 39 | 40 | There are only a few functions you need to know about. The first time this 41 | module is loaded, it creates a global object called `panicDbg` to manage program 42 | debug state. 43 | 44 | * `panicDbg.set(key, value)`: registers the object `value` to be dumped under 45 | the key `key` when the program panics. This function replaces the value from 46 | any previous call with the same key. 47 | * `panicDbg.add(keybase, value)`: like panicDbg.set, but generates a unique key 48 | based on `keybase`. 49 | * `mod_panic.panic(msg, err)`: dumps the given error message an optional 50 | exception as well as all registered debug state to a file called 51 | "ncore." and then exits the program. 52 | * `mod_panic.enablePanicOnCrash()`: sets up the program to automatically invoke 53 | `mod_panic.panic` when an uncaught exception bubbles to the event loop 54 | 55 | When the program panics (crashes), it saves all debug state to a file called 56 | "ncore.". This file is pure JSON and is best read using the "json" tool at: 57 | 58 | https://github.com/trentm/json 59 | 60 | In the example above, the program first invokes `enablePanicOnCrash` to set up 61 | automatic panicking when the program crashes. As each function is invoked, it 62 | adds its argument to the global debug state. After the program crashes, you 63 | can see the saved state as "func1.arg" and "func2.arg" in the dump. 64 | 65 | 66 | Example 67 | -------- 68 | 69 | First, a simple program: 70 | 71 | $ cat examples/example-auto.js 72 | /* 73 | * example-auto.js: simple example of automatically panicking on crash 74 | */ 75 | 76 | var mod_panic = require('panic'); 77 | 78 | function func1(arg1) 79 | { 80 | /* include func1 arg in debug state */ 81 | panicDbg.set('func1.arg', arg1); 82 | func2(arg1 + 10); 83 | } 84 | 85 | function func2(arg2) 86 | { 87 | /* include func2 arg in debug state */ 88 | panicDbg.set('func2.arg', arg2); 89 | /* crash */ 90 | (undefined).nonexistentMethod(); 91 | } 92 | 93 | /* 94 | * Trigger a panic on crash. 95 | */ 96 | mod_panic.enablePanicOnCrash(); 97 | 98 | /* 99 | * The following line of code will cause this Node program to exit after dumping 100 | * debug state to ncore. (including func1's and func2's arguments). 101 | */ 102 | func1(10); 103 | console.error('cannot get here'); 104 | 105 | 106 | Run the program: 107 | 108 | $ node examples/example-auto.js 109 | [2011-09-12 22:37:36.410 UTC] CRIT PANIC: panic due to uncaught exception: EXCEPTION: TypeError: TypeError: Cannot call method 'nonexistentMethod' of undefined 110 | at func2 (/home/dap/node-postmortem/examples/example-auto.js:19:14) 111 | at func1 (/home/dap/node-postmortem/examples/example-auto.js:11:2) 112 | at Object. (/home/dap/node-postmortem/examples/example-auto.js:31:1) 113 | at Module._compile (module.js:402:26) 114 | at Object..js (module.js:408:10) 115 | at Module.load (module.js:334:31) 116 | at Function._load (module.js:293:12) 117 | at Array. (module.js:421:10) 118 | at EventEmitter._tickCallback (node.js:126:26) 119 | [2011-09-12 22:37:36.411 UTC] CRIT writing core dump to /home/dap/node-postmortem/ncore.22984 120 | [2011-09-12 22:37:36.413 UTC] CRIT finished writing core dump 121 | 122 | 123 | View the "core dump": 124 | 125 | $ json < /home/dap/node-postmortem/ncore.22984 126 | { 127 | "dbg.format-version": "0.1", 128 | "init.process.argv": [ 129 | "node", 130 | "/home/dap/node-panic/examples/example-auto.js" 131 | ], 132 | "init.process.pid": 22984, 133 | "init.process.cwd": "/home/dap/node-panic", 134 | "init.process.env": { 135 | "HOST": "devel", 136 | "TERM": "xterm-color", 137 | "SHELL": "/bin/bash", 138 | "USER": "dap", 139 | "PWD": "/home/dap/node-panic", 140 | "MACHINE_THAT_GOES_PING": "1", 141 | "SHLVL": "1", 142 | "HOME": "/home/dap", 143 | "_": "/usr/bin/node" 144 | }, 145 | "init.process.version": "v0.4.9", 146 | "init.process.platform": "sunos", 147 | "init.time": "2011-09-12T22:37:36.408Z", 148 | "init.time-ms": 1315867056408, 149 | "func1.arg": 10, 150 | "func2.arg": 20, 151 | "panic.error": "EXCEPTION: TypeError: TypeError: Cannot call method 'nonexistentMethod' of undefined\n at func2 (/home/dap/node-postmortem/examples/example-auto.js:19:14)\n at func1 (/home/dap/node-postmortem/examples/example-auto.js:11:2)\n at Object. (/home/dap/node-postmortem/examples/example-auto.js:31:1)\n at Module._compile (module.js:402:26)\n at Object..js (module.js:408:10)\n at Module.load (module.js:334:31)\n at Function._load (module.js:293:12)\n at Array. (module.js:421:10)\n at EventEmitter._tickCallback (node.js:126:26)", 152 | "panic.time": "2011-09-12T22:37:36.408Z", 153 | "panic.time-ms": 1315867056408, 154 | "panic.memusage": { 155 | "rss": 13000704, 156 | "vsize": 73252864, 157 | "heapTotal": 3196160, 158 | "heapUsed": 1926592 159 | } 160 | } 161 | 162 | 163 | What's in the dump 164 | ------------------ 165 | 166 | The dump itself is just a JSON object. This module automatically fills in the following keys: 167 | 168 | * dbg.format-version: file format version 169 | * init.process.argv: value of process.argv (process arguments) 170 | * init.process.pid: value of process.pid (process identifier) 171 | * init.process.cwd: value of process.cwd (process working directory) 172 | * init.process.env: value of process.env (process environment) 173 | * init.process.version: value of process.version (Node.js version) 174 | * init.process.platform: value of process.platform (operating system) 175 | * init.time: time at which node-panic was loaded 176 | * init.time-ms: time in milliseconds at which node-panic was loaded 177 | * panic.error: string description of the actual error that caused the panic (includes stack trace) 178 | * panic.time: time at which the panic occurred 179 | * panic.time: time in milliseconds at which the panic occurred 180 | * panic.memusage: memory used when the panic occurred 181 | 182 | *plus* any information added with `panicDbg.set` or `panicDbg.add`. 183 | 184 | 185 | Generating dumps from outside the program 186 | ----------------------------------------- 187 | 188 | node-panic includes a tool called "ncore" for causing a node program that's 189 | already loaded node-panic to dump core on demand *without* any other cooperation 190 | from the program itself. That is, even if the program is stuck inside an 191 | infinite loop, "ncore" can interrupt it to take a core dump. 192 | 193 | Caveat: this tool can be very dangerous! Since it uses SIGUSR1, invoking it on 194 | non-node processes can result in all kinds of failure. (On Illumos systems, 195 | "ncore" will automatically detect this case and bail out.) Additionally, if 196 | another program on the same system is using the node debugger, ncore will fail. 197 | "ncore" tries to avoid hijacking another debugger session, but this check is 198 | inherently racy. Because of these risks, this tool should be viewed as a last 199 | resort, but it can be extremely valuable when needed. 200 | 201 | Let's take a look at how it works: 202 | 203 | $ cat examples/example-loop.js 204 | /* 205 | * example-loop.js: example of using "ncore" tool to generate a node core 206 | */ 207 | 208 | var mod_panic = require('panic'); 209 | 210 | function func() 211 | { 212 | for (var ii = 0; ; ii++) 213 | panicDbg.set('func-iter', ii); 214 | } 215 | 216 | console.log('starting infinite loop; use "ncore" tool to generate core'); 217 | func(); 218 | 219 | Now run the program: 220 | 221 | $ node examples/example-loop.js 222 | starting infinite loop; use "ncore" tool to generate core 223 | 224 | In another shell, run "ncore" on the given program: 225 | 226 | $ ncore 1369 227 | attempting to attach to process 1369 ... . ok. 228 | 229 | And back in the first shell we see: 230 | 231 | Hit SIGUSR1 - starting debugger agent. 232 | debugger listening on port 5858[2011-09-13 19:20:38.265 UTC] CRIT PANIC: 233 | explicit panic: EXCEPTION: Error: Error: core dump initiated at user request 234 | at caPanic (/Users/dap/work/node-panic/lib/panic.js:55:9) 235 | at eval at func (/Users/dap/work/node-panic/examples/example-loop.js:9:23) 236 | at ExecutionState.evaluateGlobal (native) 237 | at DebugCommandProcessor.evaluateRequest_ (native) 238 | at DebugCommandProcessor.processDebugJSONRequest (native) 239 | at DebugCommandProcessor.processDebugRequest (native) 240 | at func (/Users/dap/work/node-panic/examples/example-loop.js:9:23) 241 | at Object. 242 | (/Users/dap/work/node-panic/examples/example-loop.js:14:1) 243 | at Module._compile (module.js:402:26) 244 | at Object..js (module.js:408:10) 245 | [2011-09-13 19:20:38.265 UTC] CRIT writing core dump to 246 | /Users/dap/work/node-panic/ncore.1369 247 | [2011-09-13 19:20:38.294 UTC] CRIT finished writing core dump 248 | 249 | And we now have a core dump from the process somewhere in the middle of the 250 | loop: 251 | 252 | $ json < ncore.1369 253 | { 254 | "dbg.format-version": "0.1", 255 | "init.process.argv": [ 256 | "node", 257 | "/Users/dap/work/node-panic/examples/example-loop.js" 258 | ], 259 | "init.process.pid": 1369, 260 | "init.process.cwd": "/Users/dap/work/node-panic", 261 | ... 262 | "func-iter": 604762552, 263 | "panic.error": "EXCEPTION: Error: Error: core dump initiated at user request\n 264 | at caPanic (/Users/dap/work/node-panic/lib/panic.js:55:9)\n at eval at func 265 | (/Users/dap/work/node-panic/examples/example-loop.js:9:23)\n at 266 | ExecutionState.evaluateGlobal (native)\n at 267 | DebugCommandProcessor.evaluateRequest_ (native)\n at 268 | DebugCommandProcessor.processDebugJSONRequest (native)\n at 269 | DebugCommandProcessor.processDebugRequest (native)\n at func 270 | (/Users/dap/work/node-panic/examples/example-loop.js:9:23)\n at 271 | Object. (/Users/dap/work/node-panic/examples/example-loop.js:14:1)\n 272 | at Module._compile (module.js:402:26)\n at Object..js (module.js:408:10)", 273 | } 274 | 275 | 276 | Notes 277 | ----- 278 | 279 | This facility was initially developed for Joyent's Cloud Analytics service. 280 | For more information on Cloud Analytics, see http://dtrace.org/blogs/dap/files/2011/07/ca-oscon-data.pdf 281 | 282 | Pull requests accepted, but code must pass style and lint checks using: 283 | 284 | * style: https://github.com/davepacheco/jsstyle 285 | * lint: https://github.com/davepacheco/javascriptlint 286 | 287 | This facility has been tested on MacOSX and Illumos with Node.js v0.4. It has 288 | few dependencies on either the underlying platform or the Node version and so 289 | should work on other platforms. 290 | -------------------------------------------------------------------------------- /cmd/ncore.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | /* 4 | * cacore.js: generate a core file from a running node program. See usage. 5 | */ 6 | 7 | var mod_child = require('child_process'); 8 | var mod_debug = require('_debugger'); 9 | var mod_net = require('net'); 10 | var mod_subr = require('../lib/subr'); 11 | 12 | var cacPid; 13 | var cacClient; 14 | var cacStages = []; 15 | var cacTries = 30; 16 | var cacSignaled = false; 17 | var cacUsage = mod_subr.caSprintf([ 18 | 'usage: %s %s PID', 19 | '', 20 | 'Cause the specified process to dump core and exit. The target process ', 21 | 'MUST be a node process and MUST contain the symbol "caPanic". ', 22 | 'Additionally, no other node process on the system may be running under ', 23 | 'the debugger. That is, the node debug port (5858) must be available. ', 24 | 'This tool will attempt to verify these conditions, but such checks are ', 25 | 'necessarily subject to races and so should not be relied upon.' 26 | ].join('\n'), process.argv[0], process.argv[1]); 27 | 28 | cacStages.push(cacCheckArgs); 29 | cacStages.push(cacCheckTarget); 30 | cacStages.push(cacCheckPort); 31 | cacStages.push(cacDebugEnable); 32 | cacStages.push(cacDebugConnect); 33 | cacStages.push(cacCheckPid); 34 | cacStages.push(cacSendPanic); 35 | 36 | function die() 37 | { 38 | var msg = mod_subr.caSprintf.apply(null, arguments); 39 | 40 | console.error('%s', msg); 41 | 42 | if (cacSignaled) 43 | console.error('WARNING: SIGUSR1 sent to pid %s, but ' + 44 | 'debug attach failed.', cacPid); 45 | 46 | process.exit(1); 47 | } 48 | 49 | function cacCheckArgs(unused, next) 50 | { 51 | if (process.argv.length < 3) 52 | die(cacUsage); 53 | 54 | cacPid = process.argv[2]; 55 | next(); 56 | } 57 | 58 | function cacCheckTarget(unused, next) 59 | { 60 | var cmd = mod_subr.caSprintf('pargs %s | grep "argv\\[0\\]"', cacPid); 61 | 62 | mod_child.exec(cmd, function (error, stdout, stderr) { 63 | if (error) { 64 | if (!/pargs: command not found/.test(stderr)) 65 | die('pargs code %s: %s', error.code, stderr); 66 | 67 | console.error('WARNING: no "pargs" present; cannot ' + 68 | 'confirm process %s is "node"', cacPid); 69 | } else if (!/^argv\[0\]: (.*\/)?node\n$/.test(stdout)) 70 | die('target process is not node: %s', stdout); 71 | 72 | next(); 73 | }); 74 | } 75 | 76 | function cacCheckPort(unused, next) 77 | { 78 | var server = mod_net.createServer(function () {}); 79 | 80 | server.on('error', function (err) { 81 | die('debug port already in use (error %s)\n' + 82 | 'won\'t try to attach to target', err.code); 83 | }); 84 | 85 | server.listen(mod_debug.port, 'localhost', function () { 86 | server.on('close', next); 87 | server.close(); 88 | }); 89 | } 90 | 91 | function cacDebugEnable(unused, next) 92 | { 93 | process.kill(cacPid, 'SIGUSR1'); 94 | cacSignaled = true; 95 | next(); 96 | } 97 | 98 | function cacDebugConnect(unused, next) 99 | { 100 | process.stderr.write(mod_subr.caSprintf( 101 | 'attempting to attach to process %s ... ', cacPid)); 102 | 103 | cacClient = new mod_debug.Client(); 104 | 105 | cacClient.on('error', function (err) { 106 | if (--cacTries === 0) 107 | die('FAILED\nexceeded retry limit with error %s ', 108 | err.code); 109 | 110 | process.stderr.write('.'); 111 | setTimeout(function () { 112 | cacClient.connect(mod_debug.port); 113 | }, 1000); 114 | }); 115 | 116 | cacClient.on('ready', function () { 117 | process.stderr.write(' ok.\n'); 118 | next(); 119 | }); 120 | 121 | cacClient.connect(mod_debug.port); 122 | } 123 | 124 | function cacCheckPid(unused, next) 125 | { 126 | cacClient.reqEval('process.pid', function (res) { 127 | if (!res.success || res.body.type != 'number') 128 | die('failed to get target pid: %j', res); 129 | 130 | if (res.body.value != cacPid) 131 | die('connected to wrong pid: %j', res.body.value); 132 | 133 | next(); 134 | }); 135 | } 136 | 137 | function cacSendPanic(unused, next) 138 | { 139 | cacClient.reqEval('caPanic("core dump initiated at user request")', 140 | function (res) { 141 | if (!res.success) 142 | die('core dump FAILED: %j', res); 143 | die('core dumped'); 144 | }); 145 | } 146 | 147 | function main() 148 | { 149 | mod_subr.caRunStages(cacStages, null, function (err) { 150 | if (err) { 151 | die('fatal error: %r', err); 152 | process.exit(1); 153 | } 154 | 155 | process.exit(0); 156 | }); 157 | } 158 | 159 | main(); 160 | -------------------------------------------------------------------------------- /examples/example-auto.js: -------------------------------------------------------------------------------- 1 | /* 2 | * example-auto.js: simple example of automatically panicking on crash 3 | */ 4 | 5 | var mod_panic = require('panic'); 6 | 7 | function func1(arg1) 8 | { 9 | /* include func1 arg in debug state */ 10 | panicDbg.set('func1.arg', arg1); 11 | func2(arg1 + 10); 12 | } 13 | 14 | function func2(arg2) 15 | { 16 | /* include func2 arg in debug state */ 17 | panicDbg.set('func2.arg', arg2); 18 | /* crash */ 19 | (undefined).nonexistentMethod(); 20 | } 21 | 22 | /* 23 | * Trigger a panic on crash. 24 | */ 25 | mod_panic.enablePanicOnCrash(); 26 | 27 | /* 28 | * The following line of code will cause this Node program to exit after dumping 29 | * debug state to cacore. (including func1's and func2's arguments). 30 | */ 31 | func1(10); 32 | console.error('cannot get here'); 33 | -------------------------------------------------------------------------------- /examples/example-explicit.js: -------------------------------------------------------------------------------- 1 | /* 2 | * example-explicit.js: example of using "panic" directly 3 | */ 4 | 5 | var mod_panic = require('panic'); 6 | 7 | if (process.argv.length >= 3 && process.argv[2] == 'panic') 8 | mod_panic.panic('panicked on command'); 9 | 10 | console.log('usage: %s %s panic', process.argv[0], process.argv[1]); 11 | -------------------------------------------------------------------------------- /examples/example-loop.js: -------------------------------------------------------------------------------- 1 | /* 2 | * example-loop.js: example of using "ncore" tool to generate a node core 3 | */ 4 | 5 | var mod_panic = require('panic'); 6 | 7 | function func() 8 | { 9 | for (var ii = 0; ; ii++) 10 | panicDbg.set('func-iter', ii); 11 | } 12 | 13 | console.log('starting infinite loop; use "ncore" tool to generate core'); 14 | func(); 15 | -------------------------------------------------------------------------------- /lib/panic.js: -------------------------------------------------------------------------------- 1 | /* 2 | * panic.js: postmortem debugging for JavaScript 3 | * 4 | * A postmortem debugging facility is critical for root-causing issues that 5 | * occur in production from the artifacts of a single failure. Without such a 6 | * facility, tracking down problems in production becomes a tedious process of 7 | * adding logging, trying to reproduce the problem, and repeating until enough 8 | * information is gathered to root-cause the issue. For reproducible problems, 9 | * this process is merely painful for developers, administrators, and customers 10 | * alike. For unreproducible problems, this is untenable. 11 | * 12 | * Like most dynamic environments, JavaScript under Node/V8 has no built-in 13 | * postmortem debugging facility, so we implement our own here. The basic idea 14 | * is to maintain a global object that references all of the internal state we 15 | * would want for debugging. Then when our application crashes, we dump this 16 | * state to a file, and then exit. 17 | * 18 | * Note that while the program is panicking, we don't invoke any code inside 19 | * other components; modules must register objects *before* the panic in order 20 | * to have them saved during the panic. This is reasonable because we're only 21 | * storing references, so consumers can continue modifying their objects after 22 | * registering them. This is necessary to minimize the amount of code that must 23 | * work correctly during the panic. 24 | */ 25 | 26 | var mod_fs = require('fs'); 27 | var mod_subr = require('./subr'); 28 | 29 | var caPanicSkipDump = false; 30 | var caPanicAbort = false; 31 | 32 | /* 33 | * Configures the current program to dump saved program state before crashing. 34 | * The following options may be specified: 35 | * 36 | * abortOnPanic On panic, uses process.abort() (which is abort(3C)) to 37 | * exit the program. On some systems, this causes the OS 38 | * to save a core file that can be used to read JavaScript- 39 | * level state. If process.abort isn't available, SIGABRT 40 | * will be used instead. 41 | * 42 | * skipDump On panic, skips attempting to dump JavaScript-level 43 | * state in JavaScript. This is mostly useful if you've 44 | * also set abortOnPanic, in which case you expect to 45 | * extract JavaScript-level state from the OS core dump and 46 | * don't want node-panic to even try to serialize state 47 | * separatley. 48 | */ 49 | function caEnablePanicOnCrash(options) 50 | { 51 | if (options && options.abortOnPanic) 52 | caPanicAbort = true; 53 | 54 | if (options && options.skipDump) 55 | caPanicSkipDump = true; 56 | 57 | process.on('uncaughtException', function (ex) { 58 | caPanic('panic due to uncaught exception', ex); 59 | }); 60 | } 61 | 62 | /* 63 | * caPanic is invoked when the program encounters a fatal error to log the error 64 | * message and optional exception, dump all state previously registered via 65 | * panicDbg to the file "ncore.", and then exit the program. This function 66 | * is invoked either explicitly by the application or, if caEnablePanicOnCrash 67 | * has been invoked, automatically when an uncaught exception bubbles back to 68 | * the event loop. Since the program has effectively crashed at the point this 69 | * function is called, we must not allow any other code to run, so we perform 70 | * all filesystem operations synchronously and then exit immediately with a 71 | * non-zero exit status. 72 | */ 73 | function caPanic(str, err) 74 | { 75 | var when, filename, msg; 76 | 77 | if (!err) { 78 | err = new Error(str); 79 | str = 'explicit panic'; 80 | } 81 | 82 | try { 83 | when = new Date(); 84 | filename = 'ncore.' + process.pid; 85 | msg = caPanicWriteSafeError('PANIC: ' + str, err); 86 | 87 | panicDbg.set('panic.error', msg); 88 | panicDbg.set('panic.time', when); 89 | panicDbg.set('panic.time-ms', when.getTime()); 90 | panicDbg.set('panic.memusage', process.memoryUsage()); 91 | 92 | /* 93 | * If we had child.execSync(), we could get pfiles. :( 94 | */ 95 | 96 | if (!caPanicSkipDump) { 97 | caPanicLog('writing core dump to ' + process.cwd() + 98 | '/' + filename); 99 | caPanicSave(filename); 100 | caPanicLog('finished writing core dump'); 101 | } 102 | } catch (ex) { 103 | caPanicWriteSafeError('error during panic', ex); 104 | } 105 | 106 | if (!caPanicAbort) 107 | process.exit(1); 108 | 109 | if (process.abort) 110 | process.abort(); 111 | 112 | for (;;) 113 | process.kill(process.pid, 'SIGABRT'); 114 | } 115 | 116 | /* 117 | * Log the given message and error without throwing an exception. 118 | */ 119 | function caPanicWriteSafeError(msg, err) 120 | { 121 | var errstr; 122 | 123 | try { 124 | errstr = mod_subr.caSprintf('%r', err); 125 | } catch (ex) { 126 | errstr = (err && err.message && err.stack) ? 127 | err.message + '\n' + err.stack : ''; 128 | } 129 | 130 | caPanicLog(msg + ': ' + errstr); 131 | return (errstr); 132 | } 133 | 134 | /* 135 | * Log the given raw message without throwing an exception. 136 | */ 137 | function caPanicLog(msg) 138 | { 139 | process.stderr.write('[' + mod_subr.caFormatDate(new Date()) + ']' + 140 | ' CRIT ' + msg + '\n'); 141 | } 142 | 143 | /* 144 | * Saves panicDbg state to the named file. 145 | */ 146 | function caPanicSave(filename) 147 | { 148 | var dump = panicDbg.dump(); 149 | mod_fs.writeFileSync(filename, dump); 150 | } 151 | 152 | /* 153 | * Since we want all components to be able to save debugging state without 154 | * having to pass context pointers around everywhere, we supply a global object 155 | * called panicDbg to which program state can be attached via the following 156 | * methods: 157 | * 158 | * set(name, state) Adds a new debugging key called "name" and 159 | * associates "state" with that key. If "name" is 160 | * already being used, the previous association is 161 | * replaced with the new one. This key-value pair 162 | * will be serialized and dumped when the program 163 | * crashes. Assuming "state" is a reference type, 164 | * the caller can modify this object later and such 165 | * updates will be reflected in the serialized 166 | * state when the program crashes. 167 | * 168 | * add(name, state) Like set(name, state), but ensures that the new 169 | * key does not conflict with an existing key by 170 | * adding a unique identifier to it. Returns the 171 | * actual key that was used for subsequent use in 172 | * "remove". 173 | * 174 | * remove(name) Removes an existing association. 175 | * 176 | * dump() Returns the serialized debug state. This should 177 | * NOT be used except by the panic code itself and 178 | * test code since it may modify the debug state. 179 | */ 180 | function caDebugState() 181 | { 182 | var now = new Date(); 183 | 184 | this.cds_state = {}; 185 | this.cds_ids = {}; 186 | 187 | this.set('dbg.format-version', '0.1'); 188 | this.set('init.process.argv', process.argv); 189 | this.set('init.process.pid', process.pid); 190 | this.set('init.process.cwd', process.cwd()); 191 | this.set('init.process.env', process.env); 192 | this.set('init.process.version', process.version); 193 | this.set('init.process.platform', process.platform); 194 | this.set('init.time', now); 195 | this.set('init.time-ms', now.getTime()); 196 | } 197 | 198 | caDebugState.prototype.set = function (name, state) 199 | { 200 | this.cds_state[name] = state; 201 | }; 202 | 203 | caDebugState.prototype.add = function (name, state) 204 | { 205 | var ii; 206 | 207 | if (!this.cds_ids[name]) 208 | this.cds_ids[name] = 1; 209 | 210 | for (ii = this.cds_ids[name]; ; ii++) { 211 | if (!((name + ii) in this.cds_state)) 212 | break; 213 | } 214 | 215 | this.cds_ids[name] = ii + 1; 216 | this.set(name + ii, state); 217 | return (name + ii); 218 | }; 219 | 220 | caDebugState.prototype.remove = function (name) 221 | { 222 | delete (this.cds_state[name]); 223 | }; 224 | 225 | caDebugState.prototype.dump = function () 226 | { 227 | /* 228 | * JSON.stringify() does not deal with circular structures, so we have 229 | * to explicitly remove such references here. It would be nice if we 230 | * could encode these properly, but we'll need something more 231 | * sophisticated than JSON. We're allowed to stomp on the state 232 | * in-memory here because we're only invoked in the crash path. 233 | */ 234 | mod_subr.caRemoveCircularRefs(this.cds_state); 235 | return (JSON.stringify(this.cds_state)); 236 | }; 237 | 238 | /* 239 | * The public interface for this module is simple: 240 | * 241 | * caPanic(msg, [err]) Dumps all registered debug state and exits the 242 | * program. 243 | * 244 | * caEnablePanicOnCrash() Configures the program to automatically invoke 245 | * caPanic when an uncaught exception bubbles back 246 | * to the event loop. 247 | * 248 | * [global] panicDbg Manages state to be dumped when caPanic is 249 | * invoked. 250 | * 251 | * While global state is traditionally frowned upon in favor of reusable 252 | * components, the global solution makes more sense for this module since 253 | * there can be only one application running at a time and no matter how many 254 | * components it contains there must only be one set of debugging state that 255 | * gets dumped when the program crashes. 256 | */ 257 | if (!global.panicDbg) 258 | global.panicDbg = new caDebugState(); 259 | 260 | /* 261 | * We expose "caPanic" as a global for the "ncore" tool, which uses the debugger 262 | * interface to invoke it. 263 | */ 264 | global.caPanic = caPanic; 265 | 266 | exports.enablePanicOnCrash = caEnablePanicOnCrash; 267 | exports.panic = caPanic; 268 | exports.caPanicSave = caPanicSave; /* for testing only */ 269 | exports.caDebugState = caDebugState; /* for testing only */ 270 | -------------------------------------------------------------------------------- /lib/subr.js: -------------------------------------------------------------------------------- 1 | /* 2 | * subr.js: common routines 3 | */ 4 | 5 | var mod_assert = require('assert'); 6 | var mod_sys = require('util'); 7 | 8 | /* 9 | * Formats a date using a reasonable format string. 10 | */ 11 | function caFormatDate(now) 12 | { 13 | return (caSprintf('%4d-%02d-%02d %02d:%02d:%02d.%03d UTC', 14 | now.getUTCFullYear(), now.getUTCMonth() + 1, now.getUTCDate(), 15 | now.getUTCHours(), now.getUTCMinutes(), now.getUTCSeconds(), 16 | now.getUTCMilliseconds())); 17 | } 18 | 19 | /* 20 | * Removes circular references from "obj". This modifies the original object. 21 | */ 22 | function caRemoveCircularRefs(obj) 23 | { 24 | var key, marker, circular; 25 | 26 | marker = 'caRemoveCircular'; 27 | circular = ''; 28 | 29 | if (typeof (obj) != typeof ({})) 30 | return; 31 | 32 | if (obj === null) 33 | return; 34 | 35 | mod_assert.ok(!(marker in obj)); 36 | obj[marker] = true; 37 | 38 | /* 39 | * The following works for both arrays and general objects. 40 | */ 41 | for (key in obj) { 42 | if (typeof (obj[key]) == typeof ({}) && 43 | obj[key] !== null && obj[key][marker]) { 44 | obj[key] = circular; 45 | continue; 46 | } 47 | 48 | caRemoveCircularRefs(obj[key]); 49 | } 50 | 51 | delete (obj[marker]); 52 | } 53 | 54 | /* 55 | * caRunStages is given an array "stages" of functions, an initial argument 56 | * "arg", and a callback "callback". Each stage represents some task, 57 | * asynchronous or not, which should be completed before the next stage is 58 | * started. Each stage is invoked with the result of the previous stage and can 59 | * abort this process if it encounters an error. When all stages have 60 | * completed, "callback" is invoked with the error and results of the last stage 61 | * that was run. 62 | * 63 | * More precisely: the first function of "stages" may be invoked during 64 | * caRunStages or immediately after (asynchronously). Each stage is invoked as 65 | * stage(arg, callback), where "arg" is the result of the previous stage (or 66 | * the "arg" specified to caRunStages, for the first stage) and "callback" 67 | * should be invoked when the stage is complete. "callback" should be invoked 68 | * as callback(err, result), where "err" is a non-null instance of Error iff an 69 | * error was encountered and null otherwise, and "result" is an arbitrary object 70 | * to be passed to the next stage. The "callback" given to caRunStages is 71 | * invoked after the last stage has been run with the arguments given to that 72 | * stage's completion callback. 73 | */ 74 | function caRunStages(stages, arg, callback) 75 | { 76 | var stage, next; 77 | 78 | next = function (err, result) { 79 | var nextfunc; 80 | 81 | if (err) 82 | return (callback(err, result)); 83 | 84 | nextfunc = stages[stage++]; 85 | if (!nextfunc) 86 | return (callback(null, result)); 87 | 88 | return (nextfunc(result, next)); 89 | }; 90 | 91 | stage = 0; 92 | next(null, arg); 93 | } 94 | 95 | /* 96 | * Stripped down version of s[n]printf(3c). We make a best effort to throw an 97 | * exception when given a format string we don't understand, rather than 98 | * ignoring it, so that we won't break existing programs if/when we go implement 99 | * the rest of this. 100 | * 101 | * This implementation currently supports specifying 102 | * - field alignment ('-' flag), 103 | * - zero-pad ('0' flag) 104 | * - always show numeric sign ('+' flag), 105 | * - field width 106 | * - conversions for strings, decimal integers, and floats (numbers). 107 | * - argument size specifiers. These are all accepted but ignored, since 108 | * Javascript has no notion of the physical size of an argument. 109 | * 110 | * Everything else is currently unsupported, most notably precision, unsigned 111 | * numbers, non-decimal numbers, and characters. 112 | */ 113 | function caSprintf(fmt) 114 | { 115 | var regex = [ 116 | '([^%]*)', /* non-special */ 117 | '%', /* start of format */ 118 | '([\'\\-+ #0]*?)', /* flags (optional) */ 119 | '([1-9]\\d*)?', /* width (optional) */ 120 | '(\\.([1-9]\\d*))?', /* precision (optional) */ 121 | '[lhjztL]*?', /* length mods (ignored) */ 122 | '([diouxXfFeEgGaAcCsSp%jr])' /* conversion */ 123 | ].join(''); 124 | 125 | var re = new RegExp(regex); 126 | var args = Array.prototype.slice.call(arguments, 1); 127 | var flags, width, precision, conversion; 128 | var left, pad, sign, arg, match; 129 | var ret = ''; 130 | var argn = 1; 131 | 132 | mod_assert.equal('string', typeof (fmt)); 133 | 134 | while ((match = re.exec(fmt)) !== null) { 135 | ret += match[1]; 136 | fmt = fmt.substring(match[0].length); 137 | 138 | flags = match[2] || ''; 139 | width = match[3] || 0; 140 | precision = match[4] || ''; 141 | conversion = match[6]; 142 | left = false; 143 | sign = false; 144 | pad = ' '; 145 | 146 | if (conversion == '%') { 147 | ret += '%'; 148 | continue; 149 | } 150 | 151 | if (args.length === 0) 152 | throw (new Error('too few args to sprintf')); 153 | 154 | arg = args.shift(); 155 | argn++; 156 | 157 | if (flags.match(/[\' #]/)) 158 | throw (new Error( 159 | 'unsupported flags: ' + flags)); 160 | 161 | if (precision.length > 0) 162 | throw (new Error( 163 | 'non-zero precision not supported')); 164 | 165 | if (flags.match(/-/)) 166 | left = true; 167 | 168 | if (flags.match(/0/)) 169 | pad = '0'; 170 | 171 | if (flags.match(/\+/)) 172 | sign = true; 173 | 174 | switch (conversion) { 175 | case 's': 176 | if (arg === undefined || arg === null) 177 | throw (new Error('argument ' + argn + 178 | ': attempted to print undefined or null ' + 179 | 'as a string')); 180 | ret += doPad(pad, width, left, arg); 181 | break; 182 | 183 | case 'd': 184 | arg = Math.floor(arg); 185 | /*jsl:fallthru*/ 186 | case 'f': 187 | sign = sign && arg > 0 ? '+' : ''; 188 | ret += sign + doPad(pad, width, left, 189 | arg.toString()); 190 | break; 191 | 192 | case 'j': /* non-standard */ 193 | if (width === 0) 194 | width = 10; 195 | ret += mod_sys.inspect(arg, false, width); 196 | break; 197 | 198 | case 'r': /* non-standard */ 199 | ret += dumpException(arg); 200 | break; 201 | 202 | default: 203 | throw (new Error('unsupported conversion: ' + 204 | conversion)); 205 | } 206 | } 207 | 208 | ret += fmt; 209 | return (ret); 210 | } 211 | 212 | function doPad(chr, width, left, str) 213 | { 214 | var ret = str; 215 | 216 | while (ret.length < width) { 217 | if (left) 218 | ret += chr; 219 | else 220 | ret = chr + ret; 221 | } 222 | 223 | return (ret); 224 | } 225 | 226 | function dumpException(ex) 227 | { 228 | var ret; 229 | 230 | if (!(ex instanceof Error)) 231 | throw (new Error(caSprintf('invalid type for %%r: %j', ex))); 232 | 233 | /* 234 | * Note that V8 prepends "ex.stack" with ex.toString(). 235 | */ 236 | ret = 'EXCEPTION: ' + ex.constructor.name + ': ' + ex.stack; 237 | 238 | if (!ex.cause) 239 | return (ret); 240 | 241 | for (ex = ex.cause(); ex; ex = ex.cause ? ex.cause() : null) 242 | ret += '\nCaused by: ' + dumpException(ex); 243 | 244 | return (ret); 245 | } 246 | 247 | exports.caFormatDate = caFormatDate; 248 | exports.caRemoveCircularRefs = caRemoveCircularRefs; 249 | exports.caRunStages = caRunStages; 250 | exports.caSprintf = caSprintf; 251 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "panic", 3 | "version": "0.2.1", 4 | "description": "Postmortem debugging facility", 5 | "author": "Joyent (joyent.com)", 6 | "engines": { 7 | "node": "*" 8 | }, 9 | "main": "./lib/panic", 10 | "repository": { 11 | "type": "git", 12 | "url": "http://github.com/joyent/node-panic.git" 13 | }, 14 | "bin": { 15 | "ncore": "cmd/ncore.js" 16 | }, 17 | "license": "MIT" 18 | } 19 | --------------------------------------------------------------------------------