├── README.md └── SlowQueryLogParser.rb /README.md: -------------------------------------------------------------------------------- 1 | The MySQL slow query log parser converts your slow query logs in to a more usable format. It also adds some interesting information like median and average times. 2 | 3 | Here is an example of what the parsed log looks like 4 | 5 | ``` 6 | 1 Queries 7 | Taking 4 seconds to complete 8 | Locking for 0 seconds 9 | Average time: 4, Median time 4 10 | Average lock: 0, Median lock 0 11 | 12 | DELETE FROM blah WHERE blah1 >= XXX AND blah2<= XXX; 13 | ################################################################################ 14 | 15 | 22 Queries 16 | Taking 3 3 seconds to complete 17 | Locking for 0 0 seconds 18 | Average time: 3, Median time 3 19 | Average lock: 0, Median lock 0 20 | 21 | select * from table1 WHERE table1.something = table.something and table1.x = XXX; 22 | ################################################################################ 23 | ``` 24 | 25 | This parser was inspired by the perl mysql_slow_log_parser written by Nathanial Hendler. -------------------------------------------------------------------------------- /SlowQueryLogParser.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/ruby -w 2 | 3 | # 4 | # -------------------------------------------------------------------------------- 5 | # MYSQL SLOW QUERY LOG PARSER 6 | # -------------------------------------------------------------------------------- 7 | # 8 | # http://code.google.com/p/mysql-slow-query-log-parser 9 | # 10 | # Inspired by on the perl MySQL slow query log parser written by 11 | # Nathanial Hendler (http://retards.org/) 12 | # 13 | # Any suggestions or fixes are more then welcome. 14 | # lee (at) kumkee (dot) com 15 | # 16 | # -------------------------------------------------------------------------------- 17 | # USAGE 18 | # -------------------------------------------------------------------------------- 19 | # 20 | # ruby SlowQueryLogParser [Path to log] [Order By] 21 | # 22 | # eg. 23 | # ruby SlowQueryLogParser query.log lock 24 | # 25 | # Order By Options: 26 | # - lock 27 | # - time 28 | # - number 29 | # 30 | # -------------------------------------------------------------------------------- 31 | # TODO 32 | # -------------------------------------------------------------------------------- 33 | # 34 | # - Parse server info at the top of the log 35 | # - Add date / time selection 36 | # - XML Output 37 | # - Totals information / stats 38 | # 39 | # -------------------------------------------------------------------------------- 40 | # UPADATE LOG 41 | # -------------------------------------------------------------------------------- 42 | # 43 | # 2007-06-06 - Version 0.1 Alpha 44 | # First version with basic parsing of log file & basic sorting 45 | # 2009-02-20 46 | # Support for multiline queries by Jacob Kjeldahl 47 | # 2011-06-27 48 | # Fix for bug where minimum time and lock always compute to 0 by Benoit Soenen 49 | # 50 | # -------------------------------------------------------------------------------- 51 | # 52 | # MySQL Slow Query Log Parser. 53 | # 54 | # Copyright 2007-2011 Lee Kemp 55 | # 56 | # Licensed under the Apache License, Version 2.0 (the "License"); 57 | # you may not use this file except in compliance with the License. 58 | # You may obtain a copy of the License at 59 | # 60 | # http://www.apache.org/licenses/LICENSE-2.0 61 | # 62 | # Unless required by applicable law or agreed to in writing, software 63 | # distributed under the License is distributed on an "AS IS" BASIS, 64 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 65 | # See the License for the specific language governing permissions and 66 | # limitations under the License. 67 | # 68 | 69 | require 'date' 70 | 71 | # Vars 72 | logPath = ARGV[0] 73 | orderBy = ARGV[1] 74 | 75 | version = "0.1 Alpha" 76 | spacer = "#" * 80 77 | 78 | # Print page header 79 | puts spacer 80 | puts 81 | puts "MySQL Slow Query Log Parser v #{version}" 82 | puts 83 | puts Time::now() 84 | puts "Output for #{logPath} ordered by #{orderBy}" 85 | puts 86 | puts spacer 87 | puts 88 | # This array holds all the query objects after they have been read from the text file 89 | queries = Array.new 90 | 91 | # This hash holds the QueryTotals using the normalized SQL query as the key 92 | queryTotals = Hash.new 93 | 94 | class Query 95 | 96 | def initialize(sql, date, time, lock, rows, sent, user, url, ip) 97 | @sql = sql 98 | @date = date 99 | @time = time.to_i 100 | @lock = lock.to_i 101 | @rows = rows.to_i 102 | @sent = sent.to_i 103 | @user = user 104 | @url = url 105 | @ip = ip 106 | 107 | # Normalize sql query using RegExp from perl parser 108 | @normalized_query = @sql.gsub(/\d+/, "XXX") # Replace numbers 109 | @normalized_query = @normalized_query.gsub(/([\'\"]).+?([\'\"])/, "XXX") # Replace strings 110 | #@normalized_query = @normalized_query.gsub(/\/\*[A-Za-z0-9\W\S]*/, "") # Remove comments '/* blah */ 111 | end 112 | 113 | def getNormalizedQuery() 114 | @normalized_query 115 | end 116 | 117 | def getUser() 118 | @user 119 | end 120 | 121 | def getUrl() 122 | @url 123 | end 124 | 125 | def getIp() 126 | @ip 127 | end 128 | 129 | def getTime() 130 | @time 131 | end 132 | 133 | def getLock() 134 | @lock 135 | end 136 | 137 | def to_s 138 | "Date: #{@date}, Time #{@time}, Lock #{@lock}, Sent #{@sent}, Rows #{@rows} \n #{@sql}" 139 | end 140 | end 141 | 142 | class QueryTotal 143 | 144 | def initialize(sql) 145 | @sql = sql 146 | @queries = Array.new 147 | @max_time = 0 148 | @max_lock = 0 149 | @min_time = -1 150 | @min_lock = -1 151 | 152 | end 153 | 154 | def addQuery(query) 155 | @queries.push(query) 156 | 157 | if @max_time < query.getTime then 158 | @max_time = query.getTime 159 | end 160 | 161 | if @max_lock < query.getLock then 162 | @max_lock = query.getLock 163 | end 164 | 165 | if @min_time > query.getTime or @min_time == -1 then 166 | @min_time = query.getTime 167 | end 168 | 169 | if @min_lock > query.getLock or @min_lock == -1 then 170 | @min_lock = query.getLock 171 | end 172 | 173 | end 174 | 175 | def getUser() 176 | for query in @queries 177 | user = query.getUser 178 | end 179 | user 180 | end 181 | 182 | def getUrl() 183 | for query in @queries 184 | url = query.getUrl 185 | end 186 | url 187 | end 188 | 189 | def getIp() 190 | for query in @queries 191 | ip = query.getIp 192 | end 193 | ip 194 | end 195 | 196 | 197 | def getMax_time 198 | @max_time 199 | end 200 | 201 | def getMax_lock 202 | @max_lock 203 | end 204 | 205 | def getMin_time 206 | @min_time 207 | end 208 | 209 | def getMin_lock 210 | @min_lock 211 | end 212 | 213 | def getNumberQueries 214 | @queries.length 215 | end 216 | 217 | def getMedianTime 218 | @queries.sort{ |a,b| a.getTime <=> b.getTime }[@queries.length / 2].getTime 219 | end 220 | 221 | def getMedianLock 222 | @queries.sort{ |a,b| a.getLock <=> b.getLock }[@queries.length / 2].getLock 223 | end 224 | 225 | def getAverageTime 226 | total = 0 227 | for query in @queries 228 | total = total + query.getTime 229 | end 230 | total / @queries.length 231 | end 232 | 233 | def getAverageLock 234 | total = 0 235 | for query in @queries 236 | total = total + query.getLock 237 | end 238 | total / @queries.length 239 | end 240 | 241 | def to_s 242 | "Max time: #{@max_time}, Max lock #{@max_lock}, Number of queries #{@queries.length} \n #{@sql}" 243 | end 244 | 245 | def display 246 | puts "#{@queries.length} Queries" 247 | puts "user: #{getUser}" 248 | puts "url: #{getUrl}" 249 | puts "ip: #{getIp}" 250 | if @queries.length < 10 then 251 | 252 | @queries.sort!{ |a,b| a.getTime <=> b.getTime } 253 | print "Taking " 254 | @queries.each do |q| 255 | print "#{q.getTime} " 256 | end 257 | puts "seconds to complete" 258 | 259 | @queries.sort{ |a,b| a.getLock <=> b.getLock } 260 | print "Locking for " 261 | @queries.each do |q| 262 | print "#{q.getLock} " 263 | end 264 | puts "seconds" 265 | else 266 | puts "Taking #{@min_time} to #{@max_time} seconds to complete" 267 | puts "Locking for #{@min_lock} to #{@max_lock} seconds" 268 | end 269 | 270 | puts "Average time: #{getAverageTime}, Median time #{getMedianTime}" 271 | puts "Average lock: #{getAverageLock}, Median lock #{getMedianLock}" 272 | 273 | puts 274 | puts "#{@sql}" 275 | end 276 | 277 | end 278 | 279 | # 280 | # Starts Here 281 | # 282 | 283 | begin 284 | file = File.new(logPath, "r") 285 | while (line = file.gets) 286 | # First line in the query header is the time in which the query happened 287 | if line[0,1] == '#' 288 | 289 | if line[0,7] == "# Time:" then 290 | date = "#{line}".delete("#Time:").lstrip.chop 291 | 292 | # Ignore next line in the log (server info) 293 | line = file.gets 294 | sl = line.split(" ") 295 | user = sl[2] 296 | url = sl[4] 297 | ip = sl[5] 298 | else 299 | # puts "Found line missing Date info. Date set to 0" 300 | date = 0 301 | end 302 | 303 | # This line (3rd) has all the important info. Time, Lock etc. 304 | line = file.gets 305 | sl = line.split(" ") 306 | time = sl[2] 307 | lock = sl[4] 308 | sent = sl[6] 309 | rows = sl[8] 310 | 311 | # The next line is the sql query 312 | sql = file.gets 313 | if sql[0,3] == 'use' then 314 | # When a use statement has been passed as a part of the query the next line is the actual query 315 | sql = file.gets 316 | end 317 | 318 | # Some queries span multiple lines 319 | position = file.pos # Store the position 320 | while ((next_line = file.gets) && !(next_line =~ /^#/)) 321 | position = file.pos 322 | sql += next_line 323 | end 324 | file.pos = position 325 | 326 | # Create and store query object 327 | # If it is more than one week ago or if it doesn't have a valid timestamp we ignore it 328 | if date != 0 and not (Date.strptime(date, '%y%m%d') < Date.jd((DateTime.now.jd)) - 8) 329 | query = Query.new(sql, date, time, lock, rows, sent, user, url, ip) 330 | queries.push(query) 331 | end 332 | else 333 | # puts "Ignoring line (This normally means the querys header is messed up)" 334 | # puts line 335 | end 336 | end 337 | file.close 338 | 339 | # 340 | # Go over all the query objects and group them in the appropriate QueryTotals object based on the SQL 341 | # 342 | for query in queries 343 | if queryTotals.has_key?(query.getNormalizedQuery) then 344 | qt = queryTotals.fetch(query.getNormalizedQuery) 345 | qt.addQuery(query) 346 | else 347 | qt = QueryTotal.new(query.getNormalizedQuery) 348 | qt.addQuery(query) 349 | queryTotals.store(query.getNormalizedQuery, qt) 350 | end 351 | end 352 | 353 | 354 | # 355 | # Sort the query totals by lock time and display the output 356 | # 357 | queryTotalsArray = queryTotals.values 358 | 359 | case orderBy 360 | when "lock" 361 | queryTotalsArray.sort! { |a,b| a.getMax_lock <=> b.getMax_lock } 362 | #queryTotalsArray.reverse! 363 | when "time" 364 | queryTotalsArray.sort! { |a,b| a.getMax_time <=> b.getMax_time } 365 | #queryTotalsArray.reverse! 366 | else 367 | queryTotalsArray.sort! { |a,b| a.getNumberQueries <=> b.getNumberQueries } 368 | #queryTotalsArray.reverse! 369 | end 370 | 371 | puts 372 | for queryTotal in queryTotalsArray 373 | puts spacer 374 | queryTotal.display 375 | end 376 | 377 | rescue => err 378 | puts "Exception: #{err}" 379 | err 380 | end 381 | --------------------------------------------------------------------------------