├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── config.js.sample ├── couchdb-sample.js └── log-parser.js /.gitignore: -------------------------------------------------------------------------------- 1 | !*.gitignore 2 | tmp 3 | .DS_Store 4 | .*.swp 5 | .*.swo 6 | *.pid 7 | config.js 8 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "lib/node-couchdb-min"] 2 | path = lib/node-couchdb-min 3 | url = git://github.com/chewbranca/node-couchdb-min.git 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2010 Russell Branca 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining 4 | a copy of this software and associated documentation files (the 5 | "Software"), to deal in the Software without restriction, including 6 | without limitation the rights to use, copy, modify, merge, publish, 7 | distribute, sublicense, and/or sell copies of the Software, and to 8 | permit persons to whom the Software is furnished to do so, subject to 9 | the following conditions: 10 | 11 | The above copyright notice and this permission notice shall be 12 | included in all copies or substantial portions of the Software. 13 | 14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Node Log Parser 2 | =============== 3 | 4 | Node Log Parser is a Node.js module to parse logs of various formats. Right now 5 | it has support for Rails production logs, but I will soon be adding support for 6 | other formats, such as Couch DB, nginx, Redis, and anything else that comes up. 7 | 8 | It is primarily geared towards taking log files and converting them into JSON 9 | format to pass along to Couch DB for storage and processing. However, it is 10 | modular so other database storage options are welcome. 11 | 12 | Why a Javascript log parser? 13 | -------- 14 | 15 | A few reasons: 16 | 1. I wanted to expirment more with Node.js and Couch DB, I also have plenty of 17 | log data to work with to make things interesting. 18 | 2. I am not satisifed with current log parsing systems so I decided to roll my 19 | own. This is the processing piece of the puzzle, I will be building a UI on top 20 | of this as a separate component to keep the functionality isolated. 21 | 3. Machine readable log formats are incredibly cool, especially when its JSON 22 | and you can throw that data into Couch DB to run map/reduce on. Unfortunately, 23 | I have tons of log files that are not stored as JSON, hence the creation of this 24 | project. 25 | 26 | Installation 27 | ------------ 28 | 29 | Install by cloning this repo, initializing submodules, and then including this 30 | library in your Node.js program. 31 | 32 | $ git clone git://github.com/chewbranca/node-log-parser.git 33 | $ git submodule init 34 | $ git submodule update 35 | 36 | $ cp config.js.sample config.js 37 | $ vim config.js 38 | 39 | The submodule dependency is 40 | [rsms/node-couchdb-min](http://github.com/rsms/node-couchdb-min). I have not 41 | modified this fork yet, however I expect to shortly as this project progresses. 42 | 43 | Example Storing to Couch DB 44 | --------------------------- 45 | var couchdb = require('./couchdb'), parser = require('./log-parser'); 46 | var cdb = new couchdb.Db('logs'); 47 | 48 | parser.process_logs('/home/chewbranca/src/rails_app/log/production.log', 49 | parser.railsLogParser, 50 | function(logs) { 51 | for (var i = 0, l = logs.length; i < l; i++) { 52 | var log = logs[i]; 53 | cdb.put(uuid, log, function(err, result) { 54 | }); 55 | } 56 | } 57 | }); 58 | 59 | A working example can be found in couchdb-sample.js. Edit the two Settings at 60 | the top, and then from the command line run ./couchdb-sample.js. 61 | 62 | Example Couch DB map/reduce 63 | --------------------------- 64 | 65 | Now that you've got your data into Couch DB, its time to have some fun. 66 | Here is a quick example calculating the average elapsed time for each page. 67 | 68 | **Map** 69 | 70 | function(doc) { 71 | if (doc.success) { 72 | emit([doc.processing.controller, doc.processing.action], 73 | doc.success.elapsed_time); 74 | } 75 | } 76 | 77 | **Reduce** 78 | 79 | function(keys, values) { 80 | var sum = 0; 81 | values.forEach(function(value) { 82 | sum += parseInt(value); 83 | }); 84 | var avg = sum / values.length; 85 | return avg + "ms"; 86 | } 87 | 88 | 89 | Status 90 | ------ 91 | 92 | This is an initial release and has a lot of rough edges, but it gets some rails 93 | log data from your log files into Couch DB to play around with. A couple issues 94 | to be aware of. 95 | 96 | * This currently loads the entire log file into memory to process, so it is not 97 | yet recommended for large log files. 98 | * Rails log files are far from consistent, as such, this works with a subset of 99 | rails log messages. This will increase over time as more log messages are 100 | parsed and I extend the rails parser for the various formats. 101 | 102 | 103 | 104 | 105 | Conventions 106 | ----------- 107 | 108 | Right now there are only two real conventions to keep in mind if you add a new 109 | parser. 110 | 111 | 1. All log messages should either have a success or error property set. 112 | 2. Set the base date property to be the result of `exports.dataToArray`. This 113 | is so we can do some fun sorting on the date fields with Couch DB. 114 | 115 | Todo/Misc Thoughts 116 | ------------------ 117 | 118 | * Switched to buffered line reader 119 | * Add more log parsers 120 | * Expand Rails parsers 121 | * Add more Couch DB views 122 | * Command line options parsing 123 | * Lots more 124 | 125 | * The rails log format is a mess, which is why I'm using regex, however, for 126 | more reasonable single log standardized log messages, I will most likely 127 | inline sed & awk scripts to parse though so we can expedite getting data in. 128 | 129 | 130 | Links 131 | ----- 132 | 133 | * [node-log-parser](http://github.com/chewbranca/node-log-parser) 134 | -------------------------------------------------------------------------------- /config.js.sample: -------------------------------------------------------------------------------- 1 | exports.config = { 2 | "file" : "/home/chewbranca/src/rails_app/log/production.log", 3 | "couch_db_name" : "logs" 4 | }; 5 | -------------------------------------------------------------------------------- /couchdb-sample.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | /* 4 | * Settings 5 | */ 6 | var config = require('./config').config; 7 | 8 | var sys = require('sys'), couchdb = require('./lib/node-couchdb-min/couchdb'), parser = require('./log-parser'); 9 | var logsDB = new couchdb.Db(config.couch_db_name); 10 | var debug = false; 11 | if (debug) parser.setDebug(debug); 12 | 13 | var couchdb_save_func = function(logs) { 14 | if (debug) 15 | sys.puts("Processing "+logs.length+" logs"); 16 | 17 | logsDB.get('/_uuids?count='+logs.length, function(err, result) { 18 | if (err) return sys.error(err.stack); 19 | var uuids = result.uuids; 20 | 21 | for (var i = 0, l = logs.length; i < l; i++) { 22 | var uuid = uuids[i]; 23 | var log = logs[i]; 24 | 25 | logsDB.put(uuid, log, function(err, result) { 26 | if (err) return sys.error(err.stack); 27 | if (debug) 28 | sys.log('Created doc at '+uuid+' with --> '+sys.inspect(result)); 29 | }); 30 | } 31 | }); 32 | }; 33 | 34 | parser.process_logs(config.file, parser.railsLogParser, couchdb_save_func); 35 | -------------------------------------------------------------------------------- /log-parser.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | 3 | var fs = require('fs'), sys = require('sys'); 4 | var debug = false; 5 | 6 | /* 7 | * Parsers 8 | */ 9 | exports.railsLogParser = function(msg, index) { 10 | var request = {}; 11 | // DEBUG:: Comment this out to ease debugging as it can get noisy 12 | request.full_log = msg; 13 | var lines = msg.split(/\n/), line = null; 14 | 15 | while ((line = lines.shift()) !== undefined) { 16 | if (line === '') continue; 17 | // Controller, Action, Format, IP, Date, Method 18 | if ((tmp = line.match(/^Processing (.*?)Controller#([^\s]+?)(?: to ([^\s]+?))? \(for ([0-9\.]+?) at ([^)]+?)\) \[([^\]]+)\]$/))) { 19 | request["date"] = exports.dateToArray(tmp[5]); 20 | request.processing = { 21 | "controller" : tmp[1], 22 | "action" : tmp[2], 23 | "format" : tmp[3], 24 | "ip" : tmp[4], 25 | "date" : tmp[5], 26 | "method" : tmp[6] 27 | }; 28 | // Params Ruby hash 29 | } else if ((tmp = line.match(/^\s+Parameters: ([{].+[}])$/))) { 30 | request.params = JSON.parse(tmp[1].replace(/(['"])\s?=>\s?(['"{])/g, '$1 : $2')); 31 | } else if (line.match(/^Rendering/)) { 32 | if ( ! request.render) { 33 | request.render = []; 34 | } 35 | if ((tmp = line.match(/^Rendering template within (.*)/))) { 36 | request.render.push({"type" : "layout", "file" : tmp[1]}); 37 | } else if ((tmp = line.match(/^Rendering ([^\s]+)$/))) { 38 | request.render.push({"type" : "view", "file" : tmp[1]}); 39 | } else if ((tmp = line.match(/^Rendering ([^\s]+) \(([^\)]+)\)$/))) { 40 | request.render.push({"type" : "view", "file" : tmp[1], "error" : tmp[2]}); 41 | } else { 42 | if (exports.isDebug()) { 43 | sys.puts("***ERROR:: Could not parse Render line***"); 44 | sys.puts("\t"+line); 45 | } 46 | } 47 | } else if ((tmp = line.match(/^Redirected to ([^\s]+)$/))) { 48 | request.redirect = tmp[1]; 49 | } else if ((tmp = line.match(/^Filter chain halted as \[([^\]]+)\]/))) { 50 | request.chain_filter = tmp[1]; 51 | // TimeTaken, ViewTime, DBTime, HTTP Status, URL 52 | } else if ((tmp = line.match(/^Completed in ([^\s]+) \((?:View: ([0-9]+))?(?:, )?(?:DB: ([0-9]+))\) \| ([0-9]{3} .+?) \[([^\]]+)\]$/))) { 53 | request.success = { 54 | "elapsed_time" : tmp[1], 55 | "view_time" : tmp[2], 56 | "db_time" : tmp[3], 57 | "http_status" : tmp[4], 58 | "request_url" : tmp[5] 59 | }; 60 | } else if ((tmp = line.match(/^(?:([^:\s]+)::)?([^\s]+?Error|UnknownAction) \((.+?)\)?:$/))) { 61 | var error = { 62 | "klass" : tmp[1], 63 | "error_type" : tmp[2], 64 | "error_msg" : tmp[3], 65 | "stack_trace" : [] 66 | }; 67 | if (error.error_type == 'RoutingError' && (tmp_error = error.error_msg.match(/^No route matches "([^"]+)" with ([{].+[}])$/))) { 68 | error.route = tmp_error[1]; 69 | error.params = JSON.parse(tmp_error[2].replace(/:([^\s=,]+)/g, '"$1"').replace(/(['"])\s?=>\s?(['"{])/g, '$1 : $2').replace(/"=>/g, '" :')); 70 | } else if (error.error_type == 'TemplateError' && (tmp_error = error.error_msg.match(/^wrong number of arguments \((([0-9]+) for ([0-9]+))\)\) on line #([0-9]+) of (.+)$/))) { 71 | error.arguments_msg = tmp_error[1]; 72 | error.arguments_one = tmp_error[2]; 73 | error.arguments_two = tmp_error[3]; 74 | error.line_number = tmp_error[4]; 75 | error.file = tmp_error[5]; 76 | error.view_trace = []; 77 | while ((line = lines.shift()) !== undefined && line !== '') { 78 | error.view_trace.push(line.trim()); 79 | } 80 | } else { 81 | if (exports.isDebug()) 82 | sys.puts("Count not match error("+error.error_type+"): " + error.error_msg); 83 | } 84 | while ((line = lines.shift()) !== undefined && line !== '') { 85 | error.stack_trace.push(line.trim()); 86 | } 87 | request.error = error; 88 | } else { 89 | if (exports.isDebug()) 90 | sys.puts("Could not process line: "+line); 91 | } 92 | } 93 | if (request.processing) { 94 | if (exports.isDebug()) { 95 | sys.puts(sys.inspect(request)); 96 | sys.puts("\n"); 97 | } 98 | return request; 99 | } else { 100 | if (exports.isDebug()) { 101 | sys.puts("\nERROR:: Could not process log message:\n\n"); 102 | sys.puts(sys.inspect(request.full_log)); 103 | sys.puts("\n"); 104 | } 105 | return false; 106 | } 107 | }; 108 | 109 | /* 110 | * Parser Dispatcher 111 | */ 112 | // Takes file path, log process function and save log function 113 | exports.process_logs = function(file, process_func, save_func) { 114 | fs.readFile(file, 'utf8', function(read_error, content) { 115 | var logs = []; 116 | if (read_error) return sys.error(read_error); 117 | 118 | // TODO:: Remove these hardcode rails filters 119 | // switch filters and split functions to a nested helper in process func 120 | content.replace(/^Starting the New Relic Agent[^\n]+/,'').replace(/^\*\*\s+vote_fu[^\n]+/, '').split(/\n{3,}/).forEach(function(msg, index) { 121 | if ((tmp = process_func(msg, index))) 122 | logs.push(tmp); 123 | }); 124 | save_func(logs); 125 | }); 126 | }; 127 | 128 | /* 129 | * Utility Functions 130 | */ 131 | exports.setDebug = function(val) { 132 | debug = !! val; 133 | }; 134 | 135 | exports.isDebug = function() { 136 | return debug; 137 | }; 138 | 139 | exports.dateToArray = function(dateObj) { 140 | if (typeof dateObj !== 'object') 141 | dateObj = new Date(dateObj); 142 | return [ 143 | dateObj.getFullYear(), 144 | dateObj.getMonth() + 1, 145 | dateObj.getDate(), 146 | dateObj.getHours(), 147 | dateObj.getMinutes(), 148 | dateObj.getSeconds() 149 | ]; 150 | }; 151 | --------------------------------------------------------------------------------