├── .gitignore ├── wscript ├── sample.js ├── hdb.js ├── LICENSE ├── pmd.cc ├── README.md └── heap-dump.js /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | .lock-wscript 3 | *.swp 4 | node_modules 5 | -------------------------------------------------------------------------------- /wscript: -------------------------------------------------------------------------------- 1 | srcdir = '.' 2 | blddir = 'build' 3 | VERSION = '0.0.1' 4 | 5 | def set_options(opt): 6 | opt.tool_options('compiler_cxx') 7 | 8 | def configure(conf): 9 | conf.check_tool('compiler_cxx') 10 | conf.check_tool('node_addon') 11 | 12 | def build(bld): 13 | obj = bld.new_task_gen('cxx', 'shlib', 'node_addon') 14 | obj.target = 'pmd' 15 | obj.source = 'pmd.cc' 16 | -------------------------------------------------------------------------------- /sample.js: -------------------------------------------------------------------------------- 1 | /* 2 | * sample.js: sample program that takes a heap snapshot and sends it to stdout 3 | */ 4 | 5 | var pmd = require('pmd'); 6 | 7 | /* We put this particular object in the heap to try to find it in the dump. */ 8 | stuff = { 9 | somekey: 'someval', 10 | someotherkey: 15, 11 | whoa: { 12 | hello: [ 'compost', 'mortem' ] 13 | } 14 | }; 15 | 16 | junk = new Date(); 17 | 18 | pmd.takeSnapshot(); 19 | -------------------------------------------------------------------------------- /hdb.js: -------------------------------------------------------------------------------- 1 | /* 2 | * hdb.js: uses heap-dump to read in a heap dump and analyze it 3 | */ 4 | 5 | var mod_heap = require('./heap-dump'); 6 | 7 | var subcmds = { 8 | text: dbgText, 9 | html: dbgHtml, 10 | explore: dbgExplore 11 | }; 12 | 13 | function main(argv) 14 | { 15 | 16 | if (argv.length != 2 || !(argv[1] in subcmds)) { 17 | console.error('usage: node hdb.js heapfile command'); 18 | console.error(' subcommands: text, html, or explore'); 19 | process.exit(1); 20 | } 21 | 22 | mod_heap.readFile(argv[0], function (err, dump) { 23 | if (err) 24 | throw (err); 25 | 26 | subcmds[argv[1]](dump); 27 | }); 28 | } 29 | 30 | function dbgText(dump) 31 | { 32 | dump.dbgdumpText(process.stdout); 33 | process.stdout.end(); 34 | } 35 | 36 | function dbgHtml(dump) 37 | { 38 | dump.dbgdumpHtml(process.stdout); 39 | process.stdout.end(); 40 | } 41 | 42 | function dbgExplore(dump) 43 | { 44 | dump.dbgexplore(process.stdout); 45 | } 46 | 47 | main(process.argv.slice(2)); 48 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2011 David Pacheco. All rights reserved. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to 5 | deal in the Software without restriction, including without limitation the 6 | rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 7 | sell copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in 11 | all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 19 | IN THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /pmd.cc: -------------------------------------------------------------------------------- 1 | /* 2 | * pmd.cc: node add-on for taking a heap snapshot and serializing it to stdout 3 | */ 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | 13 | #include 14 | 15 | #include 16 | 17 | using namespace v8; 18 | using std::string; 19 | 20 | /* 21 | * Implementation of OutputStream serializer that saves to a stdio stream. 22 | */ 23 | class FileOutputStream : public OutputStream 24 | { 25 | private: 26 | FILE *out; 27 | 28 | public: 29 | FileOutputStream(FILE *out) 30 | { 31 | this->out = out; 32 | } 33 | 34 | void EndOfStream() 35 | { 36 | fflush(out); 37 | } 38 | 39 | OutputStream::WriteResult WriteAsciiChunk(char *data, int size) 40 | { 41 | int ii, rv; 42 | 43 | /* remove newlines */ 44 | for (ii = 0; ii < size; ii++) 45 | if (data[ii] == '\n') 46 | data[ii] = ' '; 47 | 48 | rv = write(fileno(out), data, size); 49 | 50 | if (rv != size) 51 | fprintf(stderr, "write returned %d\n", size); 52 | 53 | return (OutputStream::kContinue); 54 | } 55 | }; 56 | 57 | Handle take_snapshot(const Arguments& args) 58 | { 59 | HandleScope scope; 60 | FileOutputStream *out; 61 | const HeapSnapshot *hsp; 62 | 63 | FILE *fp = stdout; 64 | 65 | bool customOut = args[0]->IsString(); 66 | 67 | if (customOut) { 68 | String::AsciiValue fname(args[0]); 69 | fp = fopen(*fname, "w"); 70 | } 71 | 72 | out = new FileOutputStream(fp); 73 | hsp = HeapProfiler::TakeSnapshot(String::New("snap")); 74 | hsp->Serialize(out, HeapSnapshot::kJSON); 75 | 76 | if (customOut) { 77 | fclose(fp); 78 | } 79 | 80 | Local rv = Object::New(); 81 | return (rv); 82 | } 83 | 84 | extern "C" void 85 | init (Handle target) 86 | { 87 | HandleScope scope; 88 | Local templ = FunctionTemplate::New(take_snapshot); 89 | 90 | target->Set(String::NewSymbol("takeSnapshot"), templ->GetFunction()); 91 | } 92 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | node-heap-dump 3 | ============== 4 | 5 | Overview 6 | -------- 7 | 8 | This is a collection of programs for creating and viewing Node heap dumps using 9 | V8 snapshots. This was an experiment to see what run-time structures could be 10 | viewed post-mortem using heap snapshots. 11 | 12 | There are the following components: 13 | 14 | - pmd.cc: add-on that provides one function, `takeSnapshot`, that takes a new 15 | heapsnapshot and serializes it as JSON to stdout (using the built-in V8 16 | mechanism for this). 17 | - sample.js: simple script that uses the "pmd" add-on to generate a heap snapshot. 18 | - heap-dump.js: a class called HeapDump that parses serialized heap snapshots. 19 | This is probably the component most likely to be useful for other projects 20 | because it parses the heap snapshot pretty generically. 21 | - hdb.js: a program that takes a heap dump generated by the "pmd" add-on, 22 | parses it using heap-dump.js, and lets you summarize or explore the results. 23 | 24 | The heap snapshot parser is pretty basic: it resolves references to strings, 25 | type names, etc. but does *not* resolve references to other nodes. This would 26 | be easy to add as a second pass. 27 | 28 | Note that the heap snapshot is a relatively compact representation; the 29 | in-memory representation generated by this parser is substantially larger. 30 | 31 | 32 | Setup: creating a snapshot 33 | -------------------------- 34 | 35 | # install dependencies 36 | npm install sprintf 37 | 38 | # build the add-on 39 | node-waf configure 40 | node-waf build 41 | node-waf install 42 | 43 | # generate a heap snapshot 44 | node sample.js > heapsnapshot 45 | 46 | Example 1: summarize a snapshot 47 | ------------------------------- 48 | 49 | node hdb.js heapsnapshot text > heapsummary.txt 50 | 51 | The result is a text file that resembles the snapshot but with various string 52 | and type references resolved and presented in more human-readable form. 53 | 54 | 55 | Example 2: explore a snapshot 56 | ----------------------------- 57 | 58 | You can use "explore" to explore a snapshot. It opens up a REPL with the 59 | following functions: 60 | 61 | - `node(num)`: return the node with index `num`. 62 | - `findstr(str)`: return the node corresponding to the string `str`. 63 | - `findrefs(num)`: return nodes referencing the node with index `num`. 64 | - `parents(num)`: returns array of nodes referencing the specified node 65 | - `dump(depth)`: dumps out information from the root 66 | - `tree(num, depth)`: prints a tree of nodes 67 | - `root(num)`: prints the shortest path from the specified node to the root 68 | 69 | For example: 70 | 71 | node hdb.js heapsnapshot explore 72 | > findstr('compost') 73 | { index: 503438, 74 | type: 'string', 75 | name: 'compost', 76 | children: [] } 77 | 78 | > findrefs(503438) 79 | [ { node: '40615', 80 | node_nchildren: 16385, 81 | node_name: '', 82 | type: 'hidden', 83 | name_or_index: 7722, 84 | to_node: 503438 }, 85 | { node: '165335', 86 | node_nchildren: 6, 87 | node_name: 'Array', 88 | type: 'element', 89 | name_or_index: 0, 90 | to_node: 503438 }, 91 | { node: '226953', 92 | node_nchildren: 6, 93 | node_name: 'Array', 94 | type: 'element', 95 | name_or_index: 0, 96 | to_node: 503438 }, 97 | { node: '294910', 98 | node_nchildren: 3, 99 | node_name: '', 100 | type: 'hidden', 101 | name_or_index: 2, 102 | to_node: 503438 } ] 103 | 104 | > node(165335) 105 | { index: 165335, 106 | type: 'object', 107 | name: 'Array', 108 | children: 109 | [ { type: 'element', name_or_index: 0, to_node: 503438 }, 110 | { type: 'element', name_or_index: 1, to_node: 566310 }, 111 | { type: 'property', 112 | name_or_index: '__proto__', 113 | to_node: 40475 }, 114 | { type: 'hidden', name_or_index: 1, to_node: 248392 }, 115 | { type: 'hidden', name_or_index: 2, to_node: 570782 }, 116 | { type: 'hidden', name_or_index: 3, to_node: 294910 } ] } 117 | 118 | Though it's not obvious if you haven't spent time looking at V8 heap dumps, 119 | we've established that node 165335 is an array whose first element is the 120 | string 'compost'. 121 | 122 | 123 | Resources 124 | --------- 125 | 126 | The snapshot format is documented in: 127 | 128 | node/deps/v8/include/v8-profiler.h 129 | 130 | This discussion contains additional useful information: 131 | 132 | http://groups.google.com/group/google-chrome-developer-tools/browse_thread/thread/a5f86cb20fa1e9eb/?pli=1 133 | -------------------------------------------------------------------------------- /heap-dump.js: -------------------------------------------------------------------------------- 1 | /* 2 | * heap-dump.js: parses serialized JSON representation of a V8 snapshot 3 | */ 4 | 5 | var mod_assert = require('assert'); 6 | var mod_fs = require('fs'); 7 | var mod_repl = require('repl'); 8 | var mod_sys = require('sys'); 9 | 10 | var sprintf = require('sprintf').sprintf; 11 | mod_repl.writer = function (obj) { return (mod_sys.inspect(obj, false, 5, true)); }; 12 | 13 | /* 14 | * Parses the serialized JSON representation of a V8 snapshot. Note that the 15 | * serialized form is relatively compressed, so the resulting parsed 16 | * representation will take substantially more memory. 17 | */ 18 | function HeapDump(contents) 19 | { 20 | this.load(contents); 21 | } 22 | 23 | HeapDump.prototype.load = function (contents) 24 | { 25 | var last; 26 | 27 | this.hd_snapname = contents['snapshot']['title']; 28 | this.hd_snapid = contents['snapshot']['uid']; 29 | this.hd_fields = contents['nodes'][0]['fields']; 30 | this.hd_field_types = contents['nodes'][0]['types']; 31 | this.hd_type_types = this.hd_field_types[0]; /* XXX */ 32 | this.hd_strings = contents['strings']; 33 | this.hd_nodes = contents['nodes']; 34 | this.hd_nodeidx = 1; 35 | this.hd_graph = {}; 36 | this.hd_nnodes = 0; 37 | 38 | console.error('will read %s entries', this.hd_nodes.length); 39 | 40 | while (this.hd_nodeidx < this.hd_nodes.length) { 41 | this.readNode(); 42 | 43 | if (last === undefined || this.hd_nodeidx - last >= 5000) { 44 | last = this.hd_nodeidx; 45 | process.stderr.write( 46 | sprintf('read %s entries\r', this.hd_nodeidx)); 47 | } 48 | } 49 | 50 | console.error('read %s entries (done)', this.hd_nodeidx); 51 | console.error('read %s nodes', this.hd_nnodes); 52 | mod_assert.equal(this.hd_nodeidx, this.hd_nodes.length); 53 | }; 54 | 55 | HeapDump.prototype.readNode = function () 56 | { 57 | var node; 58 | 59 | node = {}; 60 | node['index'] = this.hd_nodeidx; 61 | 62 | for (ii = 0; ii < this.hd_fields.length; ii++) 63 | node[this.hd_fields[ii]] = this.readRawField(node, 64 | this.hd_fields[ii], this.hd_field_types[ii]); 65 | 66 | this.hd_graph[node['index']] = node; 67 | this.hd_nnodes++; 68 | }; 69 | 70 | HeapDump.prototype.readRawField = function (obj, fieldname, type) 71 | { 72 | var rawval, value, ii; 73 | 74 | if (fieldname == 'children') { 75 | mod_assert.ok('children_count' in obj); 76 | mod_assert.ok(typeof (obj['children_count']) == 'number'); 77 | 78 | value = []; 79 | 80 | for (ii = 0; ii < obj['children_count']; ii++) { 81 | rawval = this.readRawField({}, 'child', type); 82 | value.push(rawval); 83 | } 84 | 85 | return (value); 86 | } 87 | 88 | /* 89 | * The actual type of the "name_or_index" field depends on the "type" of 90 | * the object we're reading. 91 | */ 92 | if (fieldname == 'name_or_index') { 93 | mod_assert.equal(type, 'string_or_number'); 94 | 95 | if (obj['type'] == 'element' || obj['type'] == 'hidden') 96 | type = 'number'; 97 | else 98 | type = 'string'; 99 | } 100 | 101 | mod_assert.notEqual(type, 'string_or_number'); 102 | 103 | if (type == 'number' || type == 'node') { 104 | value = this.hd_nodes[this.hd_nodeidx++]; 105 | mod_assert.ok(typeof (value) == 'number'); 106 | return (value); 107 | } 108 | 109 | if (type == 'string') { 110 | rawval = this.hd_nodes[this.hd_nodeidx++]; 111 | mod_assert.ok(typeof (rawval) == 'number'); 112 | mod_assert.ok(rawval >= 0); 113 | mod_assert.ok(rawval < this.hd_strings.length); 114 | value = this.hd_strings[rawval]; 115 | return (value); 116 | } 117 | 118 | if (Array.isArray(type)) { 119 | rawval = this.hd_nodes[this.hd_nodeidx++]; 120 | mod_assert.ok(typeof (rawval) == 'number'); 121 | mod_assert.ok(rawval >= 0); 122 | mod_assert.ok(rawval < type.length); 123 | value = type[rawval]; 124 | return (value); 125 | } 126 | 127 | mod_assert.ok('fields' in type); 128 | mod_assert.ok('types' in type); 129 | 130 | value = {}; 131 | 132 | for (ii = 0; ii < type['fields'].length; ii++) 133 | value[type['fields'][ii]] = this.readRawField( 134 | value, type['fields'][ii], type['types'][ii]); 135 | 136 | return (value); 137 | }; 138 | 139 | /* dumps out a summary of the whole graph. */ 140 | HeapDump.prototype.dbgdumpText = function (out) 141 | { 142 | var id, node; 143 | 144 | console.error('saving text summary ... '); 145 | 146 | for (id in this.hd_graph) { 147 | node = this.hd_graph[id]; 148 | out.write(sprintf('NODE %s (%s): %s\n', node['id'], 149 | node['index'], mod_sys.inspect(node))); 150 | } 151 | 152 | console.error('done'); 153 | } 154 | 155 | HeapDump.prototype.dbgdumpHtml = function (out) 156 | { 157 | var id, node, prop, indent, text; 158 | 159 | console.error('saving HTML summary ... '); 160 | 161 | indent = '    '; 162 | out.write('
\n'); 163 | 164 | for (id in this.hd_graph) { 165 | node = this.hd_graph[id]; 166 | text = sprintf('NODE %s
\n', 167 | node['index'], node['index']); 168 | for (prop in node) { 169 | if (prop == 'children') 170 | continue; 171 | 172 | text += sprintf('%s%s: %s
\n', indent, prop, 173 | node[prop]); 174 | } 175 | 176 | text += sprintf('%schildren: [
\n', indent); 177 | node['children'].forEach(function (child) { 178 | text += sprintf('%s%s{ type: "%s", ' + 179 | 'name_or_index: "%s", to_node: "', indent, indent, 180 | child['type'], child['name_or_index']); 181 | text += sprintf('%s" }
\n', 182 | child['to_node'], child['to_node']); 183 | }); 184 | 185 | text += sprintf('%s]
\n', indent); 186 | out.write(text); 187 | } 188 | 189 | out.write('
'); 190 | 191 | console.error('done'); 192 | } 193 | 194 | HeapDump.prototype.dbgexplore = function (out) 195 | { 196 | var heap = this; 197 | var repl = mod_repl.start(); 198 | var nodef; 199 | 200 | heap.computeDepths(this.hd_graph[1], 0); 201 | 202 | repl.context['pnode'] = function (num) { 203 | var node = heap.hd_graph[num]; 204 | console.log([ 205 | 'NODE %s:', 206 | ' type: %s', 207 | ' name: %s', 208 | ' children: %s' 209 | ].join('\n'), num, node['type'], node['name'], 210 | mod_sys.inspect(node['children'])); 211 | }; 212 | 213 | repl.context['node'] = nodef = function (num) { 214 | var node = heap.hd_graph[num]; 215 | return ({ 216 | index: node['index'], 217 | type: node['type'], 218 | name: node['name'], 219 | depth: node['depth'], 220 | children: node['children'] 221 | }); 222 | }; 223 | 224 | repl.context['parents'] = function (num) { 225 | return (heap.hd_graph[num]['parents']); 226 | }; 227 | 228 | repl.context['children'] = function (num) { 229 | var node = heap.hd_graph[num]; 230 | return (node['children']); 231 | }; 232 | 233 | repl.context['findstr'] = function (str) { 234 | var id, node; 235 | 236 | for (id in heap.hd_graph) { 237 | node = heap.hd_graph[id]; 238 | if (node['type'] == 'string' && node['name'] == str) 239 | return (nodef(node['index'])); 240 | } 241 | 242 | return (undefined); 243 | }; 244 | 245 | repl.context['findrefs'] = function (id) { 246 | var id, node, rv, ii; 247 | 248 | rv = []; 249 | 250 | for (nid in heap.hd_graph) { 251 | node = heap.hd_graph[nid]; 252 | node['children'].forEach(function (edge) { 253 | if (edge['to_node'] != id) 254 | return; 255 | 256 | rv.push({ 257 | node: nid, 258 | node_nchildren: node['children_count'], 259 | node_name: node['name'], 260 | type: edge['type'], 261 | name_or_index: edge['name_or_index'], 262 | to_node: edge['to_node'], 263 | }); 264 | }); 265 | } 266 | 267 | return (rv); 268 | }; 269 | 270 | repl.context['tree'] = function (nodeid, depth) { 271 | if (depth === undefined) 272 | depth = 3; 273 | 274 | if (nodeid === undefined) 275 | nodeid = 1; 276 | 277 | heap.dumpTree('', nodeid, out, depth, 0); 278 | }; 279 | 280 | repl.context['dump'] = function (depth) { 281 | var rootcld; 282 | 283 | if (depth === undefined) 284 | depth = 2; 285 | 286 | rootcld = heap.hd_graph[1]['children'].map(function (elt) { 287 | return (elt['to_node']); 288 | }); 289 | 290 | out.write('Global scope 1\n'); 291 | heap.dumpTree('', rootcld[0], out, depth, 0); 292 | out.write('\nGlobal scope 2\n'); 293 | heap.dumpTree('', rootcld[1], out, depth, 0); 294 | out.write(sprintf('\n%s\n', heap.hd_graph[rootcld[2]]['name'])); 295 | heap.dumpTree('', rootcld[2], out, depth, 0); 296 | }; 297 | 298 | repl.context['root'] = function (nodeid) { 299 | var path, node, func, bestnext; 300 | 301 | node = heap.hd_graph[nodeid]; 302 | 303 | if (nodeid == 1) 304 | return ([ { 305 | node: nodeid, 306 | node_name: node['name'], 307 | node_nchildren: node['children_count'], 308 | } ]); 309 | 310 | func = arguments.callee; 311 | 312 | for (ii = 0; ii < node['parents'].length; ii++) { 313 | if (bestnext === undefined || 314 | bestnext['depth'] > 315 | heap.hd_graph[node['parents'][ii]]['depth']) 316 | bestnext = heap.hd_graph[node['parents'][ii]]; 317 | } 318 | 319 | mod_assert.ok(bestnext !== undefined); 320 | mod_assert.ok(bestnext['depth'] < node['depth']); 321 | path = func(bestnext['index']); 322 | path.push({ 323 | node: nodeid, 324 | node_name: node['name'], 325 | node_nchildren: node['children_count'], 326 | }); 327 | return (path); 328 | }; 329 | }; 330 | 331 | HeapDump.prototype.computeDepths = function (node, depth) 332 | { 333 | var heap = this; 334 | 335 | if (!('depth' in node)) { 336 | node['depth'] = depth; 337 | node['parents'] = []; 338 | } 339 | 340 | node['children'].forEach(function (edge) { 341 | var child = heap.hd_graph[edge['to_node']]; 342 | 343 | if ('depth' in child && child['depth'] <= depth + 1) { 344 | child['parents'].push(node['index']); 345 | return; 346 | } 347 | 348 | child['parents'] = [ node['index'] ]; 349 | child['depth'] = depth + 1; 350 | heap.computeDepths(child, depth + 1); 351 | }); 352 | }; 353 | 354 | HeapDump.prototype.dumpTree = function (label, nodeid, out, depth, indent) 355 | { 356 | var heap, node, name, indstr, ii; 357 | 358 | indstr = ''; 359 | for (ii = 0; ii < indent; ii++) 360 | indstr += ' '; 361 | 362 | heap = this; 363 | node = this.hd_graph[nodeid]; 364 | name = node['name']; 365 | if (name.length > 15) 366 | name = sprintf('"%s" ... ', name.substr(0, 15)); 367 | 368 | out.write(sprintf('%s%s%s "%s" (length %s, node %s)\n', indstr, label, 369 | node['type'], name, node['name'].length, node['index'])); 370 | 371 | if (indent >= depth) 372 | return; 373 | 374 | node['children'].forEach(function (child) { 375 | if (child['type'] == 'hidden') 376 | return; 377 | 378 | heap.dumpTree(sprintf('%s %s: ', child['type'], 379 | child['name_or_index']), child['to_node'], out, depth, 380 | indent + 1); 381 | }); 382 | }; 383 | 384 | exports.readFile = function (filename, callback) 385 | { 386 | mod_fs.readFile(filename, function (err, contents) { 387 | var json, dump; 388 | 389 | if (err) 390 | return (callback(err)); 391 | 392 | try { 393 | json = JSON.parse(contents); 394 | dump = new HeapDump(json); 395 | } catch (ex) { 396 | return (callback(ex)); 397 | } 398 | 399 | return (callback(null, dump)); 400 | }); 401 | } 402 | --------------------------------------------------------------------------------