├── .github └── dependabot.yml ├── .gitignore ├── LICENSE ├── README.md ├── bin ├── mysql_binlog_dump └── mysql_binlog_summary ├── lib ├── mysql_binlog.rb └── mysql_binlog │ ├── binlog.rb │ ├── binlog_event_parser.rb │ ├── binlog_field_parser.rb │ ├── mysql_character_set.rb │ ├── reader │ ├── binlog_file_reader.rb │ ├── binlog_stream_reader.rb │ └── debugging_reader.rb │ └── version.rb └── mysql_binlog.gemspec /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: bundler 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | time: "13:00" 8 | open-pull-requests-limit: 10 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | mysql-bin.?????? 2 | binlog.?????? 3 | *.gem 4 | *.*history 5 | .vscode -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | This software is licensed under the Revised (3-clause) BSD license as follows: 2 | 3 | Copyright (c) 2013, Twitter, Inc. 4 | Copyright (c) 2013, Jeremy Cole 5 | Copyright (c) 2013, Davi Arnaut 6 | 7 | All rights reserved. 8 | 9 | Redistribution and use in source and binary forms, with or without 10 | modification, are permitted provided that the following conditions are met: 11 | * Redistributions of source code must retain the above copyright 12 | notice, this list of conditions and the following disclaimer. 13 | * Redistributions in binary form must reproduce the above copyright 14 | notice, this list of conditions and the following disclaimer in the 15 | documentation and/or other materials provided with the distribution. 16 | * Neither the name of the nor the 17 | names of its contributors may be used to endorse or promote products 18 | derived from this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 21 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 22 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL BE LIABLE FOR ANY 24 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 25 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 26 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 27 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 29 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Library for parsing MySQL binary logs in Ruby # 2 | 3 | This library parses a MySQL binary log in pure Ruby and produces hashes as output, much like the following `Query` event: 4 | 5 | ``` 6 | {:type=>:query_event, 7 | :position=>107, 8 | :filename=>"mysql-bin.000001", 9 | :header=> 10 | {:event_type=>2, 11 | :server_id=>1, 12 | :flags=>[], 13 | :event_length=>117, 14 | :timestamp=>1340414127, 15 | :next_position=>224}, 16 | :event=> 17 | {:thread_id=>1, 18 | :query=>"create table a (id int, a char(100), primary key (id))", 19 | :status=> 20 | {:sql_mode=>0, 21 | :charset=> 22 | {:character_set_client=> 23 | {:character_set=>:utf8, :collation=>:utf8_general_ci}, 24 | :collation_connection=> 25 | {:character_set=>:utf8, :collation=>:utf8_general_ci}, 26 | :collation_server=> 27 | {:character_set=>:latin1, :collation=>:latin1_swedish_ci}}, 28 | :flags2=>[], 29 | :catalog=>"std"}, 30 | :elapsed_time=>0, 31 | :error_code=>0, 32 | :db=>"test"}} 33 | ``` 34 | 35 | # Status # 36 | 37 | All event types can be read, but may not be parsed, as not all event types are currently fully supported. Over time this will improve. The current status of event support is documented below. 38 | 39 | ## Event Types ## 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 |
IDEvent TypeStatus
1start_event_v3Unsupported (deprecated).
2query_eventFully supported with all fields parsed.
3stop_eventFully supported with all fields parsed.
4rotate_eventFully supported with all fields parsed.
5intvar_eventFully supported with all fields parsed.
6load_eventUnsupported (deprecated).
7slave_eventUnsupported (deprecated).
8create_file_eventUnsupported (deprecated).
9append_block_eventUnsupported.
10exec_load_eventUnsupported (deprecated).
11delete_file_eventUnsupported.
12new_load_eventUnsupported (deprecated).
13rand_eventFully supported with all fields parsed.
14user_var_eventFully supported with all fields parsed.
15format_description_eventFully supported with all fields parsed.
16xid_eventFully supported with all fields parsed.
17begin_load_query_eventUnsupported.
18execute_load_query_eventUnsupported.
19table_map_eventFully supported with all fields parsed.
20pre_ga_write_rows_eventUnsupported (deprecated).
21pre_ga_update_rows_eventUnsupported (deprecated).
22pre_ga_delete_rows_eventUnsupported (deprecated).
23write_rows_event_v1Fully supported with all fields parsed.
24update_rows_event_v1Fully supported with all fields parsed.
25delete_rows_event_v1Fully supported with all fields parsed.
26incident_eventUnsupported.
27heartbeat_log_eventUnsupported.
28ignorable_log_eventUnsupported.
29rows_query_log_eventFully supported with all fields parsed.
30write_rows_event_v2Fully supported with all fields parsed.
31update_rows_event_v2Fully supported with all fields parsed.
32delete_rows_event_v2Fully supported with all fields parsed.
33gtid_log_eventFully supported with all fields parsed.
34anonymous_gtid_log_eventFully supported with all fields parsed.
35previous_gtids_log_eventFully supported with all fields parsed.
36transaction_context_eventUnsupported.
37view_change_eventUnsupported.
38xa_prepare_log_eventUnsupported.
50table_metadata_eventSpecific to Twitter MySQL 5.5.24.t7+. Fully supported with all fields parsed.
243 | 244 | ## Data Types Supported in Row Events ## 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 |
IDSQL Data TypeMySQL Internal TypeStatus
Numeric Types
1TINYINTMYSQL_TYPE_TINYFully supported.
2SMALLINTMYSQL_TYPE_SHORTFully supported.
9MEDIUMINTMYSQL_TYPE_INT24Fully supported.
3INTMYSQL_TYPE_LONGFully supported.
8BIGINTMYSQL_TYPE_LONGLONGFully supported.
4FLOATMYSQL_TYPE_FLOATFully supported.
5DOUBLEMYSQL_TYPE_DOUBLEFully supported.
246DECIMALMYSQL_TYPE_NEWDECIMALFully supported using BigDecimal.
Temporal Types
7TIMESTAMPMYSQL_TYPE_TIMESTAMPFully supported.
17TIMESTAMP(n)MYSQL_TYPE_TIMESTAMP2Fully supported.
12DATETIMEMYSQL_TYPE_DATETIMEFully supported.
18DATETIME(n)MYSQL_TYPE_DATETIME2Fully supported.
10DATEMYSQL_TYPE_DATEFully supported.
14DATEMYSQL_TYPE_NEWDATEUnsupported.
11TIMEMYSQL_TYPE_TIMEFully supported.
19TIME(n)MYSQL_TYPE_TIME2Unsupported.
13YEARMYSQL_TYPE_YEARFully supported.
String Types
15
253
254
CHAR
VARCHAR
MYSQL_TYPE_STRINGFully supported.
249
252
250
251
TINYBLOB
BLOB
MEDIUMBLOB
LONGBLOB
MYSQL_TYPE_BLOBFully supported.
Other Types
247ENUMMYSQL_TYPE_STRINGSupported, but values returned are internal representations.
248SETMYSQL_TYPE_STRINGSupported, but values returned are internal representations.
16BITMYSQL_TYPE_BITSupported, treated as integer of appropriate size.
255GEOMETRYMYSQL_TYPE_GEOMETRYSupported, treated as BLOB.
245JSONMYSQL_TYPE_JSONSupported, treated as BLOB.
410 | -------------------------------------------------------------------------------- /bin/mysql_binlog_dump: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require 'mysql_binlog' 4 | require 'bigdecimal' 5 | require 'getoptlong' 6 | require 'ostruct' 7 | require 'pp' 8 | 9 | def usage(exit_code, message = nil) 10 | print "Error: #{message}\n\n" unless message.nil? 11 | 12 | print <<'END_OF_USAGE' 13 | 14 | Usage: 15 | To read from a binary log file on disk: 16 | mysql_binlog_dump [options] 17 | 18 | --help, -? 19 | Show this help. 20 | 21 | --file, -f 22 | Read from a binary log file on disk (deprecated). 23 | 24 | --checksum, -c 25 | Enable CRC32 checksums. 26 | 27 | --position, -j 28 | Start the first file at a particular position. 29 | 30 | --debug, -d 31 | Debug reading from the binary log, showing calls into the reader and the 32 | data bytes read. This is useful for debugging the mysql_binlog library 33 | as well as debugging problems with binary logs. 34 | 35 | --tail, -t 36 | When reading from a file, follow the end of the binary log file instead 37 | of exiting when reaching the end. Exit with Control-C. 38 | 39 | --rotate, -r 40 | When reading from a file, follow the rotate events which may be at the 41 | end of a file (due to log rotation) so that the stream can be followed 42 | through multiple files. This is especially useful with --tail. 43 | 44 | END_OF_USAGE 45 | 46 | exit exit_code 47 | end 48 | 49 | 50 | @options = OpenStruct.new 51 | @options.file = nil 52 | @options.checksum = nil 53 | @options.position = nil 54 | @options.debug = false 55 | @options.tail = false 56 | @options.rotate = false 57 | @options.filenames = [] 58 | 59 | getopt_options = [ 60 | [ "--help", "-?", GetoptLong::NO_ARGUMENT ], 61 | [ "--file", "-f", GetoptLong::REQUIRED_ARGUMENT ], 62 | [ "--checksum", "-c", GetoptLong::NO_ARGUMENT ], 63 | [ "--position", "-j", GetoptLong::REQUIRED_ARGUMENT ], 64 | [ "--debug", "-d", GetoptLong::NO_ARGUMENT ], 65 | [ "--tail", "-t", GetoptLong::NO_ARGUMENT ], 66 | [ "--rotate", "-r", GetoptLong::NO_ARGUMENT ], 67 | ] 68 | 69 | getopt = GetoptLong.new(*getopt_options) 70 | 71 | getopt.each do |opt, arg| 72 | case opt 73 | when "--help" 74 | usage 0 75 | when "--file" 76 | @options.filenames << arg 77 | when "--checksum" 78 | @options.checksum = :crc32 79 | when "--position" 80 | @options.position = arg.to_i 81 | when "--debug" 82 | @options.debug = true 83 | when "--tail" 84 | @options.tail = true 85 | when "--rotate" 86 | @options.rotate = true 87 | end 88 | end 89 | 90 | @options.filenames.concat(ARGV) 91 | 92 | if @options.filenames.empty? 93 | usage 1, "One or more filenames must be provided" 94 | end 95 | 96 | @options.filenames.each_with_index do |filename, i| 97 | reader = MysqlBinlog::BinlogFileReader.new(filename) 98 | if @options.debug 99 | reader = MysqlBinlog::DebuggingReader.new(reader, :data => true, :calls => true) 100 | end 101 | binlog = MysqlBinlog::Binlog.new(reader) 102 | binlog.checksum = @options.checksum 103 | 104 | reader.tail = @options.tail 105 | binlog.ignore_rotate = !@options.rotate 106 | 107 | binlog.seek(@options.position) if @options.position && i.zero? 108 | 109 | binlog.each_event do |event| 110 | pp event 111 | puts 112 | end 113 | end -------------------------------------------------------------------------------- /bin/mysql_binlog_summary: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require 'mysql_binlog' 4 | require 'getoptlong' 5 | require 'ostruct' 6 | 7 | def usage(exit_code, message = nil) 8 | print "Error: #{message}\n\n" unless message.nil? 9 | 10 | print <<'END_OF_USAGE' 11 | 12 | Usage: 13 | To read from a binary log file on disk: 14 | mysql_binlog_summary [options] 15 | 16 | --help, -? 17 | Show this help. 18 | 19 | --file, -f 20 | Read from a binary log file on disk (deprecated). 21 | 22 | --checksum, -c 23 | Enable CRC32 checksums. 24 | 25 | --tail, -t 26 | When reading from a file, follow the end of the binary log file instead 27 | of exiting when reaching the end. Exit with Control-C. 28 | 29 | --rotate, -r 30 | When reading from a file, follow the rotate events which may be at the 31 | end of a file (due to log rotation) so that the stream can be followed 32 | through multiple files. This is especially useful with --tail. 33 | 34 | END_OF_USAGE 35 | 36 | exit exit_code 37 | end 38 | 39 | @options = OpenStruct.new 40 | @options.tail = false 41 | @options.rotate = false 42 | @options.checksum = nil 43 | @options.filenames = [] 44 | 45 | getopt_options = [ 46 | [ "--help", "-?", GetoptLong::NO_ARGUMENT ], 47 | [ "--file", "-f", GetoptLong::REQUIRED_ARGUMENT ], 48 | [ "--tail", "-t", GetoptLong::NO_ARGUMENT ], 49 | [ "--rotate", "-r", GetoptLong::NO_ARGUMENT ], 50 | [ "--checksum", "-c", GetoptLong::NO_ARGUMENT ], 51 | ] 52 | 53 | getopt = GetoptLong.new(*getopt_options) 54 | 55 | getopt.each do |opt, arg| 56 | case opt 57 | when "--help" 58 | usage 0 59 | when "--file" 60 | @options.filenames << arg 61 | when "--tail" 62 | @options.tail = true 63 | when "--rotate" 64 | @options.rotate = true 65 | when "--checksum" 66 | @options.checksum = :crc32 67 | end 68 | end 69 | 70 | @options.filenames.concat(ARGV) 71 | 72 | if @options.filenames.empty? 73 | usage 1, "A file must be provided" 74 | end 75 | 76 | files = {} 77 | min_timestamp = nil 78 | max_timestamp = nil 79 | events = [] 80 | events_processed = 0 81 | 82 | @options.filenames.each do |filename| 83 | reader = MysqlBinlog::BinlogFileReader.new(filename) 84 | binlog = MysqlBinlog::Binlog.new(reader) 85 | reader.tail = @options.tail 86 | binlog.ignore_rotate = !@options.rotate 87 | binlog.checksum = @options.checksum 88 | 89 | file_min_timestamp = nil 90 | file_max_timestamp = nil 91 | file_events_processed = 0 92 | 93 | #binlog.filter_event_types = [:query_event] 94 | #binlog.filter_flags = [0] 95 | query_pattern = /^(INSERT|UPDATE|DELETE)\s+(?:(?:INTO|FROM)\s+)?[`]?(\S+?)[`]?\s+/i 96 | 97 | binlog.each_event do |event| 98 | verb = nil 99 | table = nil 100 | 101 | if event[:type] == :query_event 102 | if match_query = event[:event][:query].match(query_pattern) 103 | verb = match_query[1].downcase 104 | table = match_query[2] 105 | end 106 | end 107 | 108 | if MysqlBinlog::ROW_EVENT_TYPES.include? event[:type] 109 | verb = event[:type].to_s.sub(/_event_v[12]/, '') 110 | table = event[:event][:table][:table] 111 | end 112 | 113 | timestamp = event[:header][:timestamp] 114 | 115 | file_min_timestamp = [file_min_timestamp || timestamp, timestamp].min 116 | file_max_timestamp = [file_max_timestamp || timestamp, timestamp].max 117 | 118 | net_change = 0 119 | event[:event][:row_image]&.each do |row_image| 120 | case verb 121 | when "delete_rows" 122 | net_change -= row_image[:before][:size] 123 | when "update_rows" 124 | net_change += row_image[:after][:size] - row_image[:before][:size] 125 | when "write_rows" 126 | net_change += row_image[:after][:size] 127 | end 128 | end 129 | 130 | events << { 131 | timestamp: timestamp, 132 | size: event[:header][:payload_length], 133 | type: event[:type], 134 | verb: verb, 135 | table: table, 136 | net_change: net_change, 137 | } 138 | 139 | file_events_processed += 1 140 | events_processed += 1 141 | 142 | if (file_events_processed % 1000) == 0 143 | puts "%-32s %6d MiB %10d %10d" % [ 144 | filename, event[:position]/(1024**2), file_events_processed, events_processed 145 | ] 146 | end 147 | end 148 | 149 | files[filename] = { 150 | filename: filename, 151 | events: file_events_processed, 152 | min_timestamp: file_min_timestamp, 153 | max_timestamp: file_max_timestamp, 154 | } 155 | 156 | min_timestamp = [min_timestamp || file_min_timestamp, file_min_timestamp].min 157 | max_timestamp = [max_timestamp || file_max_timestamp, file_max_timestamp].max 158 | end 159 | puts "Done." 160 | puts 161 | 162 | duration = max_timestamp - min_timestamp 163 | 164 | puts "File summary:" 165 | files.each do |filename, file| 166 | puts " %-32s%10s%26s%26s" % [ 167 | File.basename(filename), 168 | file[:events], 169 | Time.at(file[:min_timestamp]).utc, 170 | Time.at(file[:max_timestamp]).utc, 171 | ] 172 | end 173 | puts 174 | 175 | puts "Summary:" 176 | puts " Files: %d" % [files.size] 177 | puts " Events: %d" % [events_processed] 178 | puts " Min Time: %s" % [Time.at(min_timestamp).utc] 179 | puts " Max Time: %s" % [Time.at(max_timestamp).utc] 180 | puts " Duration: %ds" % [duration] 181 | puts " Event Rate: %0.2f/s" % [events_processed.to_f / duration.to_f] 182 | puts 183 | 184 | events_by_type = Hash.new(0) 185 | events_by_verb_and_table = {} 186 | size_by_verb_and_table = {} 187 | size_by_table = Hash.new(0) 188 | net_change_by_verb_and_table = {} 189 | net_change_by_table = Hash.new(0) 190 | events.each do |event| 191 | events_by_type[event[:type]] += 1 192 | if event[:verb] 193 | events_by_verb_and_table[event[:verb]] ||= Hash.new(0) 194 | events_by_verb_and_table[event[:verb]][event[:table]] += 1 195 | size_by_verb_and_table[event[:verb]] ||= Hash.new(0) 196 | size_by_verb_and_table[event[:verb]][event[:table]] += event[:size] 197 | size_by_table[event[:table]] += event[:size] 198 | net_change_by_verb_and_table[event[:verb]] ||= Hash.new(0) 199 | net_change_by_verb_and_table[event[:verb]][event[:table]] += event[:net_change] 200 | net_change_by_table[event[:table]] += event[:net_change] 201 | end 202 | end 203 | 204 | puts "Events by type:" 205 | events_by_type.sort { |a, b| b[1] <=> a[1] }.each do |type, count| 206 | puts " %-50s%10d%10.2f/s" % [type, count, count.to_f / duration.to_f] 207 | end 208 | puts 209 | 210 | puts "Events by verb and table:" 211 | events_by_verb_and_table.sort.each do |verb, table_and_count| 212 | puts "%s\n" % [verb] 213 | puts " %-50s%10s%14s%14s%14s" % [ 214 | "", "Count", "Rate/s", "Net (KiB/s)", "Size (KiB/s)" 215 | ] 216 | table_and_count.sort { |a, b| b[1] <=> a[1] }.each do |table, count| 217 | puts " %-50s%10d%14s%+14.2f%14.2f" % [ 218 | table, count, "%10.2f/s" % [count.to_f / duration.to_f], 219 | net_change_by_verb_and_table[verb][table] / 1024.0 / duration.to_f, 220 | size_by_verb_and_table[verb][table] / 1024.0 / duration.to_f, 221 | ] 222 | end 223 | puts 224 | end 225 | 226 | puts "Event payload by table (top 10):" 227 | size_by_table.sort { |a, b| b[1].abs <=> a[1].abs }.first(10).each do |table, size| 228 | puts " %-50s%+10.2f KiB/s" % [ 229 | table, size.to_f / 1024.0 / duration.to_f 230 | ] 231 | end 232 | puts 233 | 234 | puts "Net change by table (top 10):" 235 | net_change_by_table.sort { |a, b| b[1].abs <=> a[1].abs }.first(10).each do |table, net_change| 236 | puts " %-50s%+10.2f KiB/s" % [ 237 | table, net_change.to_f / 1024.0 / duration.to_f 238 | ] 239 | end 240 | puts 241 | 242 | -------------------------------------------------------------------------------- /lib/mysql_binlog.rb: -------------------------------------------------------------------------------- 1 | require 'mysql_binlog/version' 2 | require 'mysql_binlog/mysql_character_set' 3 | require 'mysql_binlog/binlog' 4 | require 'mysql_binlog/binlog_field_parser' 5 | require 'mysql_binlog/binlog_event_parser' 6 | require 'mysql_binlog/reader/debugging_reader' 7 | require 'mysql_binlog/reader/binlog_file_reader' 8 | require 'mysql_binlog/reader/binlog_stream_reader' 9 | 10 | # The MysqlBinlog module contains a series of classes for reading and 11 | # parsing binary log events from MySQL binary logs. 12 | module MysqlBinlog 13 | end 14 | -------------------------------------------------------------------------------- /lib/mysql_binlog/binlog.rb: -------------------------------------------------------------------------------- 1 | module MysqlBinlog 2 | # This version of the binary log format is not supported by this library. 3 | class UnsupportedVersionException < Exception; end 4 | 5 | # This field type is not supported by this library. 6 | class UnsupportedTypeException < Exception; end 7 | 8 | # An error was encountered when trying to read the log, which was likely 9 | # due to garbage data in the log. Continuing is likely impossible. 10 | class MalformedBinlogException < Exception; end 11 | 12 | # When attempting a read, no data was returned. 13 | class ZeroReadException < Exception; end 14 | 15 | # When attempting a read, fewer bytes of data were returned than were 16 | # requested by the reader, likely indicating a truncated file or corrupted 17 | # event. 18 | class ShortReadException < Exception; end 19 | 20 | # After an event or other structure was fully read, the log position exceeded 21 | # the end of the structure being read. This would indicate a bug in parsing 22 | # the fields in the structure. For example, reading garbage data for a length 23 | # field may cause a string read based on that length to read data well past 24 | # the end of the event or structure. This is essentially always fatal. 25 | class OverReadException < Exception; end 26 | 27 | # Read a binary log, parsing and returning events. 28 | # 29 | # == Examples 30 | # 31 | # A basic example of using the Binlog class: 32 | # 33 | # require 'mysql_binlog' 34 | # include MysqlBinlog 35 | # 36 | # # Open a binary log from a file on disk. 37 | # binlog = Binlog.new(BinlogFileReader.new("mysql-bin.000001")) 38 | # 39 | # # Iterate over all events from the log, printing the event type (such 40 | # # as :query_event, :write_rows_event, etc.) 41 | # binlog.each_event do |event| 42 | # puts event[:type] 43 | # end 44 | # 45 | class Binlog 46 | attr_reader :fde 47 | attr_accessor :reader 48 | attr_accessor :field_parser 49 | attr_accessor :event_parser 50 | attr_accessor :filter_event_types 51 | attr_accessor :filter_flags 52 | attr_accessor :ignore_rotate 53 | attr_accessor :max_query_length 54 | attr_accessor :checksum 55 | 56 | def initialize(reader) 57 | @reader = reader 58 | @field_parser = BinlogFieldParser.new(self) 59 | @event_parser = BinlogEventParser.new(self) 60 | @fde = nil 61 | @filter_event_types = nil 62 | @filter_flags = nil 63 | @ignore_rotate = false 64 | @max_query_length = 1048576 65 | @checksum = :nil 66 | end 67 | 68 | # Rewind to the beginning of the log, if supported by the reader. The 69 | # reader may throw an exception if rewinding is not supported (e.g. for 70 | # a stream-based reader). 71 | def rewind 72 | reader.rewind 73 | end 74 | 75 | def seek(position) 76 | # Try to find and consume the format description event which is necessary for understanding 77 | # the subsequent event format; can't seek arbitrarily until we have it. 78 | read_event until @fde 79 | reader.seek(position) 80 | end 81 | 82 | # Skip the remainder of this event. This can be used to skip an entire 83 | # event or merely the parts of the event this library does not understand. 84 | def skip_event(header) 85 | reader.skip(header) 86 | end 87 | private :skip_event 88 | 89 | # Read the content of the event, which follows the header. 90 | def read_event_fields(header) 91 | # Delegate the parsing of the event content to a method of the same name 92 | # in BinlogEventParser. 93 | if event_parser.methods.map(&:to_sym).include? header[:event_type] 94 | fields = event_parser.send(header[:event_type], header) 95 | end 96 | 97 | unless fields 98 | fields = { 99 | payload: reader.read(header[:payload_length]), 100 | } 101 | end 102 | 103 | # Check if we've read past the end of the event. This is normally because 104 | # of an unsupported substructure in the event causing field misalignment 105 | # or a bug in the event reader method in BinlogEventParser. This may also 106 | # be due to user error in providing an initial start position or later 107 | # seeking to a position which is not a valid event start position. 108 | if reader.position > header[:next_position] 109 | raise OverReadException.new("Read past end of event; corrupted event, bad start position, or bug in mysql_binlog?") 110 | end 111 | 112 | # Anything left unread at this point is skipped based on the event length 113 | # provided in the header. In this way, it is possible to skip over events 114 | # that are not able to be parsed completely by this library. 115 | skip_event(header) 116 | 117 | fields 118 | end 119 | private :read_event_fields 120 | 121 | def checksum_length 122 | case @checksum 123 | when :crc32 124 | 4 125 | else 126 | 0 127 | end 128 | end 129 | 130 | def payload_length(header) 131 | @fde ? (header[:event_length] - @fde[:header_length] - checksum_length) : 0 132 | end 133 | 134 | # Scan events until finding one that isn't rejected by the filter rules. 135 | # If there are no filter rules, this will return the next event provided 136 | # by the reader. 137 | def read_event 138 | while true 139 | skip_this_event = false 140 | return nil if reader.end? 141 | 142 | filename = reader.filename 143 | position = reader.position 144 | 145 | # Read the common header for an event. Every event has a header. 146 | unless header = event_parser.event_header 147 | return nil 148 | end 149 | 150 | # Skip the remaining part of the header which might not have been 151 | # parsed. 152 | if @fde 153 | reader.seek(position + @fde[:header_length]) 154 | header[:payload_length] = payload_length(header) 155 | header[:payload_end] = position + @fde[:header_length] + payload_length(header) 156 | else 157 | header[:payload_length] = 0 158 | header[:payload_end] = header[:next_position] 159 | end 160 | 161 | 162 | if @filter_event_types 163 | unless @filter_event_types.include? header[:event_type] 164 | skip_this_event = true 165 | end 166 | end 167 | 168 | if @filter_flags 169 | unless @filter_flags.include? header[:flags] 170 | skip_this_event = true 171 | end 172 | end 173 | 174 | # Never skip over rotate_event or format_description_event as they 175 | # are critical to understanding the format of this event stream. 176 | if skip_this_event 177 | unless [:rotate_event, :format_description_event].include? header[:event_type] 178 | skip_event(header) 179 | next 180 | end 181 | end 182 | 183 | fields = read_event_fields(header) 184 | 185 | case header[:event_type] 186 | when :rotate_event 187 | unless ignore_rotate 188 | reader.rotate(fields[:name], fields[:pos]) 189 | end 190 | when :format_description_event 191 | process_fde(fields) 192 | end 193 | 194 | break 195 | end 196 | 197 | { 198 | :type => header[:event_type], 199 | :filename => filename, 200 | :position => position, 201 | :header => header, 202 | :event => fields, 203 | } 204 | end 205 | 206 | # Process a format description event, which describes the version of this 207 | # file, and the format of events which will appear in this file. This also 208 | # provides the version of the MySQL server which generated this file. 209 | def process_fde(fde) 210 | if (version = fde[:binlog_version]) != 4 211 | raise UnsupportedVersionException.new("Binlog version #{version} is not supported") 212 | end 213 | 214 | # Save the interesting fields from an FDE so that this information is 215 | # available at any time later. 216 | @fde = { 217 | :header_length => fde[:header_length], 218 | :binlog_version => fde[:binlog_version], 219 | :server_version => fde[:server_version], 220 | } 221 | end 222 | private :process_fde 223 | 224 | # Iterate through all events. 225 | def each_event 226 | unless block_given? 227 | return Enumerable::Enumerator.new(self, :each_event) 228 | end 229 | 230 | while event = read_event 231 | yield event 232 | end 233 | end 234 | end 235 | end 236 | -------------------------------------------------------------------------------- /lib/mysql_binlog/binlog_event_parser.rb: -------------------------------------------------------------------------------- 1 | module MysqlBinlog 2 | # A hash of all possible event type IDs. 3 | # 4 | # Enumerated in sql/log_event.h line ~539 as Log_event_type 5 | EVENT_TYPES_HASH = { 6 | :unknown_event => 0, # 7 | :start_event_v3 => 1, # (deprecated) 8 | :query_event => 2, # 9 | :stop_event => 3, # 10 | :rotate_event => 4, # 11 | :intvar_event => 5, # 12 | :load_event => 6, # (deprecated) 13 | :slave_event => 7, # (deprecated) 14 | :create_file_event => 8, # (deprecated) 15 | :append_block_event => 9, # 16 | :exec_load_event => 10, # (deprecated) 17 | :delete_file_event => 11, # 18 | :new_load_event => 12, # (deprecated) 19 | :rand_event => 13, # 20 | :user_var_event => 14, # 21 | :format_description_event => 15, # 22 | :xid_event => 16, # 23 | :begin_load_query_event => 17, # 24 | :execute_load_query_event => 18, # 25 | :table_map_event => 19, # 26 | :pre_ga_write_rows_event => 20, # (deprecated) 27 | :pre_ga_update_rows_event => 21, # (deprecated) 28 | :pre_ga_delete_rows_event => 22, # (deprecated) 29 | :write_rows_event_v1 => 23, # 30 | :update_rows_event_v1 => 24, # 31 | :delete_rows_event_v1 => 25, # 32 | :incident_event => 26, # 33 | :heartbeat_log_event => 27, # 34 | :ignorable_log_event => 28, # 35 | :rows_query_log_event => 29, # 36 | :write_rows_event_v2 => 30, # 37 | :update_rows_event_v2 => 31, # 38 | :delete_rows_event_v2 => 32, # 39 | :gtid_log_event => 33, # 40 | :anonymous_gtid_log_event => 34, # 41 | :previous_gtids_log_event => 35, # 42 | :transaction_context_event => 36, # 43 | :view_change_event => 37, # 44 | :xa_prepare_log_event => 38, # 45 | 46 | :table_metadata_event => 50, # Only in Twitter MySQL 47 | } 48 | 49 | # A lookup array to map an integer event type ID to its symbol. 50 | EVENT_TYPES = EVENT_TYPES_HASH.inject(Array.new(256)) do |type_array, item| 51 | type_array[item[1]] = item[0] 52 | type_array 53 | end 54 | 55 | # A list of supported row-based replication event types. Since these all 56 | # have an identical structure, this list can be used by other programs to 57 | # know which events can be treated as row events. 58 | ROW_EVENT_TYPES = [ 59 | :write_rows_event_v1, 60 | :update_rows_event_v1, 61 | :delete_rows_event_v1, 62 | :write_rows_event_v2, 63 | :update_rows_event_v2, 64 | :delete_rows_event_v2, 65 | ] 66 | 67 | # Values for the +flags+ field that may appear in binary logs. There are 68 | # several other values that never appear in a file but may be used 69 | # in events in memory. 70 | # 71 | # Defined in sql/log_event.h line ~448 72 | EVENT_HEADER_FLAGS = { 73 | :binlog_in_use => 0x0001, # LOG_EVENT_BINLOG_IN_USE_F 74 | :thread_specific => 0x0004, # LOG_EVENT_THREAD_SPECIFIC_F 75 | :suppress_use => 0x0008, # LOG_EVENT_SUPPRESS_USE_F 76 | :artificial => 0x0020, # LOG_EVENT_ARTIFICIAL_F 77 | :relay_log => 0x0040, # LOG_EVENT_RELAY_LOG_F 78 | :ignorable => 0x0080, # LOG_EVENT_IGNORABLE_F 79 | :no_filter => 0x0100, # LOG_EVENT_NO_FILTER_F 80 | :mts_isolate => 0x0200, # LOG_EVENT_MTS_ISOLATE_F 81 | } 82 | 83 | # A mapping array for all values that may appear in the +status+ field of 84 | # a query_event. 85 | # 86 | # Defined in sql/log_event.h line ~316 87 | QUERY_EVENT_STATUS_TYPES = [ 88 | :flags2, # 0 (Q_FLAGS2_CODE) 89 | :sql_mode, # 1 (Q_SQL_MODE_CODE) 90 | :catalog_deprecated, # 2 (Q_CATALOG_CODE) 91 | :auto_increment, # 3 (Q_AUTO_INCREMENT) 92 | :charset, # 4 (Q_CHARSET_CODE) 93 | :time_zone, # 5 (Q_TIME_ZONE_CODE) 94 | :catalog, # 6 (Q_CATALOG_NZ_CODE) 95 | :lc_time_names, # 7 (Q_LC_TIME_NAMES_CODE) 96 | :charset_database, # 8 (Q_CHARSET_DATABASE_CODE) 97 | :table_map_for_update, # 9 (Q_TABLE_MAP_FOR_UPDATE_CODE) 98 | :master_data_written, # 10 (Q_MASTER_DATA_WRITTEN_CODE) 99 | :invoker, # 11 (Q_INVOKER) 100 | :updated_db_names, # 12 (Q_UPDATED_DB_NAMES) 101 | :microseconds, # 13 (Q_MICROSECONDS) 102 | :commit_ts, # 14 (Q_COMMIT_TS) 103 | :commit_ts2, # 15 104 | :explicit_defaults_for_timestamp, # 16 (Q_EXPLICIT_DEFAULTS_FOR_TIMESTAMP) 105 | ] 106 | 107 | QUERY_EVENT_OVER_MAX_DBS_IN_EVENT_MTS = 254 108 | 109 | # A mapping hash for all values that may appear in the +flags2+ field of 110 | # a query_event. 111 | # 112 | # Defined in sql/log_event.h line ~521 in OPTIONS_WRITTEN_TO_BIN_LOG 113 | # 114 | # Defined in sql/sql_priv.h line ~84 115 | QUERY_EVENT_FLAGS2 = { 116 | :auto_is_null => 1 << 14, # OPTION_AUTO_IS_NULL 117 | :not_autocommit => 1 << 19, # OPTION_NOT_AUTOCOMMIT 118 | :no_foreign_key_checks => 1 << 26, # OPTION_NO_FOREIGN_KEY_CHECKS 119 | :relaxed_unique_checks => 1 << 27, # OPTION_RELAXED_UNIQUE_CHECKS 120 | } 121 | 122 | # A mapping array for all values that may appear in the +Intvar_type+ field 123 | # of an intvar_event. 124 | # 125 | # Enumerated in sql/log_event.h line ~613 as Int_event_type 126 | INTVAR_EVENT_INTVAR_TYPES = [ 127 | nil, # INVALID_INT_EVENT 128 | :last_insert_id, # LAST_INSERT_ID_EVENT 129 | :insert_id, # INSERT_ID_EVENT 130 | ] 131 | 132 | # A mapping array for all values that may appear in the +flags+ field of a 133 | # table_map_event. 134 | # 135 | # Enumerated in sql/log_event.h line ~3413 within Table_map_log_event 136 | TABLE_MAP_EVENT_FLAGS = { 137 | :bit_len_exact => 1 << 0, # TM_BIT_LEN_EXACT_F 138 | } 139 | 140 | # A mapping array for all values that may appear in the +flags+ field of a 141 | # table_metadata_event. 142 | # 143 | # There are none of these at the moment. 144 | TABLE_METADATA_EVENT_FLAGS = { 145 | } 146 | 147 | # A mapping hash for all values that may appear in the +flags+ field of 148 | # a column descriptor for a table_metadata_event. 149 | # 150 | # Defined in include/mysql_com.h line ~92 151 | TABLE_METADATA_EVENT_COLUMN_FLAGS = { 152 | :not_null => 1, # NOT_NULL_FLAG 153 | :primary_key => 2, # PRI_KEY_FLAG 154 | :unique_key => 4, # UNIQUE_KEY_FLAG 155 | :multiple_key => 8, # MULTIPLE_KEY_FLAG 156 | :blob => 16, # BLOB_FLAG 157 | :unsigned => 32, # UNSIGNED_FLAG 158 | :zerofill => 64, # ZEROFILL_FLAG 159 | :binary => 128, # BINARY_FLAG 160 | :enum => 256, # ENUM_FLAG 161 | :auto_increment => 512, # AUTO_INCREMENT_FLAG 162 | :timestamp => 1024, # TIMESTAMP_FLAG 163 | :set => 2048, # SET_FLAG 164 | :no_default_value => 4096, # NO_DEFAULT_VALUE_FLAG 165 | :on_update_now => 8192, # ON_UPDATE_NOW_FLAG 166 | :part_key => 16384, # PART_KEY_FLAG 167 | } 168 | 169 | # A mapping array for all values that may appear in the +flags+ field of a 170 | # write_rows_event, update_rows_event, or delete_rows_event. 171 | # 172 | # Enumerated in sql/log_event.h line ~3533 within Rows_log_event 173 | GENERIC_ROWS_EVENT_FLAGS = { 174 | :stmt_end => 1 << 0, # STMT_END_F 175 | :no_foreign_key_checks => 1 << 1, # NO_FOREIGN_KEY_CHECKS_F 176 | :relaxed_unique_checks => 1 << 2, # RELAXED_UNIQUE_CHECKS_F 177 | :complete_rows => 1 << 3, # COMPLETE_ROWS_F 178 | } 179 | 180 | GENERIC_ROWS_EVENT_VH_FIELD_TYPES = [ 181 | :extra_rows_info, # ROWS_V_EXTRAINFO_TAG 182 | ] 183 | 184 | # Parse binary log events from a provided binary log. Must be driven 185 | # externally, but handles all the details of parsing an event header 186 | # and the content of the various event types. 187 | class BinlogEventParser 188 | # The binary log object this event parser will parse events from. 189 | attr_accessor :binlog 190 | 191 | # The binary log reader extracted from the binlog object for convenience. 192 | attr_accessor :reader 193 | 194 | # The binary log field parser extracted from the binlog object for 195 | # convenience. 196 | attr_accessor :parser 197 | 198 | def initialize(binlog_instance) 199 | @binlog = binlog_instance 200 | @reader = binlog_instance.reader 201 | @parser = binlog_instance.field_parser 202 | @table_map = {} 203 | end 204 | 205 | # Parse an event header, which is consistent for all event types. 206 | # 207 | # Documented in sql/log_event.h line ~749 as "Common-Header" 208 | # 209 | # Implemented in sql/log_event.cc line ~936 in Log_event::write_header 210 | def event_header 211 | header = {} 212 | header[:timestamp] = parser.read_uint32 213 | event_type = parser.read_uint8 214 | header[:event_type] = EVENT_TYPES[event_type] || "unknown_#{event_type}".to_sym 215 | header[:server_id] = parser.read_uint32 216 | header[:event_length] = parser.read_uint32 217 | header[:next_position] = parser.read_uint32 218 | header[:flags] = parser.read_uint_bitmap_by_size_and_name(2, EVENT_HEADER_FLAGS) 219 | 220 | header 221 | end 222 | 223 | # Parse fields for a +Format_description+ event. 224 | # 225 | # Implemented in sql/log_event.cc line ~4123 in Format_description_log_event::write 226 | def format_description_event(header) 227 | fields = {} 228 | fields[:binlog_version] = parser.read_uint16 229 | fields[:server_version] = parser.read_nstringz(50).force_encoding("UTF-8") 230 | fields[:create_timestamp] = parser.read_uint32 231 | fields[:header_length] = parser.read_uint8 232 | fields 233 | end 234 | 235 | # Parse fields for a +Rotate+ event. 236 | # 237 | # Implemented in sql/log_event.cc line ~5157 in Rotate_log_event::write 238 | def rotate_event(header) 239 | fields = {} 240 | fields[:pos] = parser.read_uint64 241 | name_length = reader.remaining(header) 242 | fields[:name] = parser.read_nstring(name_length).force_encoding("UTF-8") 243 | fields 244 | end 245 | 246 | def _query_event_status_updated_db_names 247 | db_count = parser.read_uint8 248 | return nil if db_count == QUERY_EVENT_OVER_MAX_DBS_IN_EVENT_MTS 249 | 250 | db_names = [] 251 | db_count.times do |n| 252 | db_name = "" 253 | loop do 254 | c = reader.read(1) 255 | break if c == "\0" 256 | db_name << c 257 | end 258 | db_names << db_name.force_encoding("UTF-8") 259 | end 260 | 261 | db_names 262 | end 263 | private :_query_event_status_updated_db_names 264 | 265 | # Parse a dynamic +status+ structure within a query_event, which consists 266 | # of a status_length (uint16) followed by a number of status variables 267 | # (determined by the +status_length+) each of which consist of: 268 | # * A type code (+uint8+), one of +QUERY_EVENT_STATUS_TYPES+. 269 | # * The content itself, determined by the type. Additional processing is 270 | # required based on the type. 271 | def _query_event_status(header, fields) 272 | status = {} 273 | status_length = parser.read_uint16 274 | end_position = reader.position + status_length 275 | while reader.position < end_position 276 | status_type_id = parser.read_uint8 277 | status_type = QUERY_EVENT_STATUS_TYPES[status_type_id] 278 | status[status_type] = case status_type 279 | when :flags2 280 | parser.read_uint_bitmap_by_size_and_name(4, QUERY_EVENT_FLAGS2) 281 | when :sql_mode 282 | parser.read_uint64 283 | when :catalog_deprecated 284 | parser.read_lpstringz.force_encoding("UTF-8") 285 | when :auto_increment 286 | { 287 | :increment => parser.read_uint16, 288 | :offset => parser.read_uint16, 289 | } 290 | when :charset 291 | { 292 | :character_set_client => COLLATION[parser.read_uint16], 293 | :collation_connection => COLLATION[parser.read_uint16], 294 | :collation_server => COLLATION[parser.read_uint16], 295 | } 296 | when :time_zone 297 | parser.read_lpstring.force_encoding("UTF-8") 298 | when :catalog 299 | parser.read_lpstring.force_encoding("UTF-8") 300 | when :lc_time_names 301 | parser.read_uint16 302 | when :charset_database 303 | parser.read_uint16 304 | when :table_map_for_update 305 | parser.read_uint64 306 | when :master_data_written 307 | parser.read_uint32 308 | when :invoker 309 | { 310 | :user => parser.read_lpstring.force_encoding("UTF-8"), 311 | :host => parser.read_lpstring.force_encoding("UTF-8"), 312 | } 313 | when :updated_db_names 314 | _query_event_status_updated_db_names 315 | when :commit_ts 316 | parser.read_uint64 317 | when :microseconds 318 | parser.read_uint24 319 | when :explicit_defaults_for_timestamp 320 | parser.read_uint8 321 | else 322 | raise "Unknown status type #{status_type_id}" 323 | end 324 | end 325 | 326 | # We may have read too much due to an invalid string read especially. 327 | # Raise a more specific exception here instead of the generic 328 | # OverReadException from the entire event. 329 | if reader.position > end_position 330 | raise OverReadException.new("Read past end of Query event status field") 331 | end 332 | 333 | status 334 | end 335 | private :_query_event_status 336 | 337 | # Parse fields for a +Query+ event. 338 | # 339 | # Implemented in sql/log_event.cc line ~2214 in Query_log_event::write 340 | def query_event(header) 341 | fields = {} 342 | fields[:thread_id] = parser.read_uint32 343 | fields[:elapsed_time] = parser.read_uint32 344 | db_length = parser.read_uint8 345 | fields[:error_code] = parser.read_uint16 346 | fields[:status] = _query_event_status(header, fields) 347 | fields[:db] = parser.read_nstringz(db_length + 1).force_encoding("UTF-8") 348 | query_length = reader.remaining(header) 349 | 350 | character_set = fields[:status][:charset][:character_set_client][:character_set] 351 | query = reader.read([query_length, binlog.max_query_length].min) 352 | case character_set 353 | when :utf8mb4, :utf8 354 | fields[:query] = query.force_encoding("UTF-8") 355 | when :latin1 # Note: Used for internally-generated query events such as "BEGIN". 356 | fields[:query] = query.force_encoding("ISO-8859-1") 357 | else 358 | raise "Unsupported character set #{character_set} for query event in #{reader.filename} at offset #{reader.position}" 359 | end 360 | 361 | fields 362 | end 363 | 364 | # Parse fields for an +Intvar+ event. 365 | # 366 | # Implemented in sql/log_event.cc line ~5326 in Intvar_log_event::write 367 | def intvar_event(header) 368 | fields = {} 369 | 370 | fields[:intvar_type] = parser.read_uint8 371 | fields[:intvar_name] = INTVAR_EVENT_INTVAR_TYPES[fields[:intvar_type]] 372 | fields[:intvar_value] = parser.read_uint64 373 | 374 | fields 375 | end 376 | 377 | # Parse fields for an +Xid+ event. 378 | # 379 | # Implemented in sql/log_event.cc line ~5559 in Xid_log_event::write 380 | def xid_event(header) 381 | fields = {} 382 | fields[:xid] = parser.read_uint64 383 | fields 384 | end 385 | 386 | # Parse fields for an +Rand+ event. 387 | # 388 | # Implemented in sql/log_event.cc line ~5454 in Rand_log_event::write 389 | def rand_event(header) 390 | fields = {} 391 | fields[:seed1] = parser.read_uint64 392 | fields[:seed2] = parser.read_uint64 393 | fields 394 | end 395 | 396 | # Parse a number of bytes from the metadata section of a +Table_map+ event 397 | # representing various fields based on the column type of the column 398 | # being processed. 399 | def _table_map_event_column_metadata_read(column_type) 400 | case column_type 401 | when :float, :double 402 | { :size => parser.read_uint8 } 403 | when :varchar 404 | { :max_length => parser.read_uint16 } 405 | when :bit 406 | bits = parser.read_uint8 407 | bytes = parser.read_uint8 408 | { 409 | :bits => (bytes * 8) + bits 410 | } 411 | when :newdecimal 412 | { 413 | :precision => parser.read_uint8, 414 | :decimals => parser.read_uint8, 415 | } 416 | when :blob, :geometry, :json 417 | { :length_size => parser.read_uint8 } 418 | when :string, :var_string 419 | # The :string type sets a :real_type field to indicate the actual type 420 | # which is fundamentally incompatible with :string parsing. Setting 421 | # a :type key in this hash will cause table_map_event to override the 422 | # main field :type with the provided type here. 423 | # See Field_string::do_save_field_metadata for reference. 424 | metadata = (parser.read_uint8 << 8) + parser.read_uint8 425 | real_type = MYSQL_TYPES[metadata >> 8] 426 | case real_type 427 | when :enum, :set 428 | { :type => real_type, :size => metadata & 0x00ff } 429 | else 430 | { :max_length => (((metadata >> 4) & 0x300) ^ 0x300) + (metadata & 0x00ff) } 431 | end 432 | when :timestamp2, :datetime2, :time2 433 | { 434 | :decimals => parser.read_uint8, 435 | } 436 | end 437 | end 438 | private :_table_map_event_column_metadata_read 439 | 440 | # Parse column metadata within a +Table_map+ event. 441 | def _table_map_event_column_metadata(columns_type) 442 | length = parser.read_varint 443 | columns_type.map do |column| 444 | _table_map_event_column_metadata_read(column) 445 | end 446 | end 447 | private :_table_map_event_column_metadata 448 | 449 | # Parse fields for a +Table_map+ event. 450 | # 451 | # Implemented in sql/log_event.cc line ~8638 452 | # in Table_map_log_event::write_data_header 453 | # and Table_map_log_event::write_data_body 454 | def table_map_event(header) 455 | fields = {} 456 | fields[:table_id] = parser.read_uint48 457 | fields[:flags] = parser.read_uint_bitmap_by_size_and_name(2, TABLE_MAP_EVENT_FLAGS) 458 | map_entry = @table_map[fields[:table_id]] = {} 459 | map_entry[:db] = parser.read_lpstringz.force_encoding("UTF-8") 460 | map_entry[:table] = parser.read_lpstringz.force_encoding("UTF-8") 461 | columns = parser.read_varint 462 | columns_type = parser.read_uint8_array(columns).map { |c| MYSQL_TYPES[c] || "unknown_#{c}".to_sym } 463 | columns_metadata = _table_map_event_column_metadata(columns_type) 464 | columns_nullable = parser.read_bit_array(columns) 465 | 466 | # Remap overloaded types before we piece together the entire event. 467 | columns.times do |c| 468 | if columns_metadata[c] and columns_metadata[c][:type] 469 | columns_type[c] = columns_metadata[c][:type] 470 | columns_metadata[c].delete :type 471 | end 472 | end 473 | 474 | map_entry[:columns] = columns.times.map do |c| 475 | { 476 | :type => columns_type[c], 477 | :nullable => columns_nullable[c], 478 | :metadata => columns_metadata[c], 479 | } 480 | end 481 | 482 | fields[:map_entry] = map_entry 483 | fields 484 | end 485 | 486 | # Parse fields for a +Table_metadata+ event, which is specific to 487 | # Twitter MySQL releases at the moment. 488 | # 489 | # Implemented in sql/log_event.cc line ~8772 (in Twitter MySQL) 490 | # in Table_metadata_log_event::write_data_header 491 | # and Table_metadata_log_event::write_data_body 492 | def table_metadata_event(header) 493 | fields = {} 494 | table_id = parser.read_uint48 495 | columns = parser.read_uint16 496 | 497 | fields[:table] = @table_map[table_id] 498 | fields[:flags] = parser.read_uint16 499 | fields[:columns] = columns.times.map do |c| 500 | descriptor_length = parser.read_uint32 501 | column_type = parser.read_uint8 502 | @table_map[table_id][:columns][c][:description] = { 503 | :type => MYSQL_TYPES[column_type] || "unknown_#{column_type}".to_sym, 504 | :length => parser.read_uint32, 505 | :scale => parser.read_uint8, 506 | :character_set => COLLATION[parser.read_uint16], 507 | :flags => parser.read_uint_bitmap_by_size_and_name(2, 508 | TABLE_METADATA_EVENT_COLUMN_FLAGS), 509 | :name => parser.read_varstring.force_encoding("UTF-8"), 510 | :type_name => parser.read_varstring.force_encoding("UTF-8"), 511 | :comment => parser.read_varstring.force_encoding("UTF-8"), 512 | } 513 | end 514 | fields 515 | end 516 | 517 | # Parse a single row image, which is comprised of a series of columns. Not 518 | # all columns are present in the row image, the columns_used array of true 519 | # and false values identifies which columns are present. 520 | def _generic_rows_event_row_image(header, fields, columns_used) 521 | row_image = [] 522 | start_position = reader.position 523 | columns_null = parser.read_bit_array(fields[:table][:columns].size) 524 | fields[:table][:columns].each_with_index do |column, column_index| 525 | #puts "column #{column_index} #{column}: used=#{columns_used[column_index]}, null=#{columns_null[column_index]}" 526 | if !columns_used[column_index] 527 | row_image << nil 528 | elsif columns_null[column_index] 529 | row_image << { column_index => nil } 530 | else 531 | value = parser.read_mysql_type(column[:type], column[:metadata]) 532 | row_image << { 533 | column_index => value, 534 | } 535 | end 536 | end 537 | end_position = reader.position 538 | 539 | { 540 | image: row_image, 541 | size: end_position-start_position 542 | } 543 | end 544 | private :_generic_rows_event_row_image 545 | 546 | def diff_row_images(before, after) 547 | diff = {} 548 | before.each_with_index do |before_column, index| 549 | after_column = after[index] 550 | before_value = before_column.first[1] 551 | after_value = after_column.first[1] 552 | if before_value != after_value 553 | diff[index] = { before: before_value, after: after_value } 554 | end 555 | end 556 | diff 557 | end 558 | 559 | # Parse the row images present in a row-based replication row event. This 560 | # is rather incomplete right now due missing support for many MySQL types, 561 | # but can parse some basic events. 562 | def _generic_rows_event_row_images(header, fields, columns_used) 563 | row_images = [] 564 | end_position = reader.position + reader.remaining(header) 565 | while reader.position < end_position 566 | row_image = {} 567 | case header[:event_type] 568 | when :write_rows_event_v1, :write_rows_event_v2 569 | row_image[:after] = _generic_rows_event_row_image(header, fields, columns_used[:after]) 570 | when :delete_rows_event_v1, :delete_rows_event_v2 571 | row_image[:before] = _generic_rows_event_row_image(header, fields, columns_used[:before]) 572 | when :update_rows_event_v1, :update_rows_event_v2 573 | row_image[:before] = _generic_rows_event_row_image(header, fields, columns_used[:before]) 574 | row_image[:after] = _generic_rows_event_row_image(header, fields, columns_used[:after]) 575 | row_image[:diff] = diff_row_images(row_image[:before][:image], row_image[:after][:image]) 576 | end 577 | row_images << row_image 578 | end 579 | 580 | # We may have read too much, especially if any of the fields in the row 581 | # image were misunderstood. Raise a more specific exception here instead 582 | # of the generic OverReadException from the entire event. 583 | if reader.position > end_position 584 | raise OverReadException.new("Read past end of row image") 585 | end 586 | 587 | row_images 588 | end 589 | private :_generic_rows_event_row_images 590 | 591 | # Parse the variable header from a v2 rows event. This is only used for 592 | # ROWS_V_EXTRAINFO_TAG which is used by NDB. Ensure it can be skipped 593 | # properly but don't bother parsing it. 594 | def _generic_rows_event_vh 595 | vh_payload_len = parser.read_uint16 - 2 596 | return unless vh_payload_len > 0 597 | 598 | reader.read(vh_payload_len) 599 | end 600 | 601 | # Parse fields for any of the row-based replication row events: 602 | # * +Write_rows+ which is used for +INSERT+. 603 | # * +Update_rows+ which is used for +UPDATE+. 604 | # * +Delete_rows+ which is used for +DELETE+. 605 | # 606 | # Implemented in sql/log_event.cc line ~8039 607 | # in Rows_log_event::write_data_header 608 | # and Rows_log_event::write_data_body 609 | def _generic_rows_event(header, contains_vh: false) 610 | fields = {} 611 | table_id = parser.read_uint48 612 | fields[:table] = @table_map[table_id] 613 | fields[:flags] = parser.read_uint_bitmap_by_size_and_name(2, GENERIC_ROWS_EVENT_FLAGS) 614 | 615 | # Rows_log_event v2 events contain a variable-sized header. Only NDB 616 | # uses it right now, so let's just make sure it's skipped properly. 617 | _generic_rows_event_vh if contains_vh 618 | 619 | columns = parser.read_varint 620 | columns_used = {} 621 | case header[:event_type] 622 | when :write_rows_event_v1, :write_rows_event_v2 623 | columns_used[:after] = parser.read_bit_array(columns) 624 | when :delete_rows_event_v1, :delete_rows_event_v2 625 | columns_used[:before] = parser.read_bit_array(columns) 626 | when :update_rows_event_v1, :update_rows_event_v2 627 | columns_used[:before] = parser.read_bit_array(columns) 628 | columns_used[:after] = parser.read_bit_array(columns) 629 | end 630 | fields[:row_image] = _generic_rows_event_row_images(header, fields, columns_used) 631 | fields 632 | end 633 | private :_generic_rows_event 634 | 635 | def generic_rows_event_v1(header) 636 | _generic_rows_event(header, contains_vh: false) 637 | end 638 | 639 | alias :write_rows_event_v1 :generic_rows_event_v1 640 | alias :update_rows_event_v1 :generic_rows_event_v1 641 | alias :delete_rows_event_v1 :generic_rows_event_v1 642 | 643 | def generic_rows_event_v2(header) 644 | _generic_rows_event(header, contains_vh: true) 645 | end 646 | 647 | alias :write_rows_event_v2 :generic_rows_event_v2 648 | alias :update_rows_event_v2 :generic_rows_event_v2 649 | alias :delete_rows_event_v2 :generic_rows_event_v2 650 | 651 | def rows_query_log_event(header) 652 | reader.read(1) # skip useless byte length which is unused 653 | # TODO: What character set? 654 | { query: reader.read(header[:payload_length]-1) } 655 | end 656 | 657 | def in_hex(bytes) 658 | bytes.each_byte.map { |c| "%02x" % c.ord }.join 659 | end 660 | 661 | def format_gtid_sid(sid) 662 | [0..3, 4..5, 6..7, 8..9, 10..15].map { |r| in_hex(sid[r]) }.join("-") 663 | end 664 | 665 | # 6d9190a2-cca6-11e8-aa8c-42010aef0019:551845019 666 | def format_gtid(sid, gno_or_ivs) 667 | "#{format_gtid_sid(sid)}:#{gno_or_ivs}" 668 | end 669 | 670 | def previous_gtids_log_event(header) 671 | n_sids = parser.read_uint64 672 | 673 | gtids = [] 674 | n_sids.times do 675 | sid = parser.read_nstring(16) 676 | n_ivs = parser.read_uint64 677 | ivs = [] 678 | n_ivs.times do 679 | iv_start = parser.read_uint64 680 | iv_end = parser.read_uint64 681 | ivs << "#{iv_start}-#{iv_end}" 682 | end 683 | gtids << format_gtid(sid, ivs.join(":")) 684 | end 685 | 686 | { 687 | previous_gtids: gtids 688 | } 689 | end 690 | 691 | def gtid_log_event(header) 692 | flags = parser.read_uint8 693 | sid = parser.read_nstring(16).force_encoding("UTF-8") 694 | gno = parser.read_uint64 695 | lts_type = parser.read_uint8 696 | lts_last_committed = parser.read_uint64 697 | lts_sequence_number = parser.read_uint64 698 | 699 | { 700 | flags: flags, 701 | gtid: format_gtid(sid, gno), 702 | lts: { 703 | type: lts_type, 704 | last_committed: lts_last_committed, 705 | sequence_number: lts_sequence_number, 706 | }, 707 | } 708 | end 709 | end 710 | end 711 | -------------------------------------------------------------------------------- /lib/mysql_binlog/binlog_field_parser.rb: -------------------------------------------------------------------------------- 1 | require 'bigdecimal' 2 | 3 | module MysqlBinlog 4 | # All MySQL types mapping to their integer values. 5 | MYSQL_TYPES_HASH = { 6 | :decimal => 0, 7 | :tiny => 1, 8 | :short => 2, 9 | :long => 3, 10 | :float => 4, 11 | :double => 5, 12 | :null => 6, 13 | :timestamp => 7, 14 | :longlong => 8, 15 | :int24 => 9, 16 | :date => 10, 17 | :time => 11, 18 | :datetime => 12, 19 | :year => 13, 20 | :newdate => 14, 21 | :varchar => 15, 22 | :bit => 16, 23 | :timestamp2 => 17, 24 | :datetime2 => 18, 25 | :time2 => 19, 26 | :json => 245, 27 | :newdecimal => 246, 28 | :enum => 247, 29 | :set => 248, 30 | :tiny_blob => 249, 31 | :medium_blob => 250, 32 | :long_blob => 251, 33 | :blob => 252, 34 | :var_string => 253, 35 | :string => 254, 36 | :geometry => 255, 37 | } 38 | 39 | # All MySQL types in a simple lookup array to map an integer to its symbol. 40 | MYSQL_TYPES = MYSQL_TYPES_HASH.inject(Array.new(256)) do |type_array, item| 41 | type_array[item[1]] = item[0] 42 | type_array 43 | end 44 | 45 | # Parse various types of standard and non-standard data types from a 46 | # provided binary log using its reader to read data. 47 | class BinlogFieldParser 48 | attr_accessor :binlog 49 | attr_accessor :reader 50 | 51 | def initialize(binlog_instance) 52 | @format_cache = {} 53 | @binlog = binlog_instance 54 | @reader = binlog_instance.reader 55 | end 56 | 57 | # Read an unsigned 8-bit (1-byte) integer. 58 | def read_uint8 59 | reader.read(1).unpack("C").first 60 | end 61 | 62 | # Read an unsigned 16-bit (2-byte) integer. 63 | def read_uint16 64 | reader.read(2).unpack("v").first 65 | end 66 | 67 | # Read an unsigned 16-bit (2-byte) big-endian integer. 68 | def read_uint16_be 69 | reader.read(2).unpack("n").first 70 | end 71 | 72 | # Read an unsigned 24-bit (3-byte) integer. 73 | def read_uint24 74 | a, b, c = reader.read(3).unpack("CCC") 75 | a + (b << 8) + (c << 16) 76 | end 77 | 78 | # Read an unsigned 24-bit (3-byte) big-endian integer. 79 | def read_uint24_be 80 | a, b = reader.read(3).unpack("nC") 81 | (a << 8) + b 82 | end 83 | 84 | # Read an unsigned 32-bit (4-byte) integer. 85 | def read_uint32_be 86 | reader.read(4).unpack("N").first 87 | end 88 | 89 | # Read an unsigned 32-bit (4-byte) integer. 90 | def read_uint32 91 | reader.read(4).unpack("V").first 92 | end 93 | 94 | # Read an unsigned 40-bit (5-byte) integer. 95 | def read_uint40 96 | a, b = reader.read(5).unpack("CV") 97 | a + (b << 8) 98 | end 99 | 100 | # Read an unsigned 40-bit (5-byte) big-endian integer. 101 | def read_uint40_be 102 | a, b = reader.read(5).unpack("NC") 103 | (a << 8) + b 104 | end 105 | 106 | # Read an unsigned 48-bit (6-byte) integer. 107 | def read_uint48 108 | a, b, c = reader.read(6).unpack("vvv") 109 | a + (b << 16) + (c << 32) 110 | end 111 | 112 | # Read an unsigned 56-bit (7-byte) integer. 113 | def read_uint56 114 | a, b, c = reader.read(7).unpack("CvV") 115 | a + (b << 8) + (c << 24) 116 | end 117 | 118 | # Read an unsigned 64-bit (8-byte) integer. 119 | def read_uint64 120 | reader.read(8).unpack("Q<").first 121 | end 122 | 123 | # Read an unsigned 64-bit (8-byte) integer. 124 | def read_uint64_be 125 | reader.read(8).unpack("Q>").first 126 | end 127 | 128 | # Read a signed 8-bit (1-byte) integer. 129 | def read_int8 130 | reader.read(1).unpack("c").first 131 | end 132 | 133 | # Read a signed 16-bit (2-byte) big-endian integer. 134 | def read_int16_be 135 | reader.read(2).unpack('s>').first 136 | end 137 | 138 | # Read a signed 24-bit (3-byte) big-endian integer. 139 | def read_int24_be 140 | a, b, c = reader.read(3).unpack('CCC') 141 | if (a & 128) == 0 142 | (a << 16) | (b << 8) | c 143 | else 144 | (-1 << 24) | (a << 16) | (b << 8) | c 145 | end 146 | end 147 | 148 | # Read a signed 32-bit (4-byte) big-endian integer. 149 | def read_int32_be 150 | reader.read(4).unpack('l>').first 151 | end 152 | 153 | def read_uint_by_size(size) 154 | case size 155 | when 1 156 | read_uint8 157 | when 2 158 | read_uint16 159 | when 3 160 | read_uint24 161 | when 4 162 | read_uint32 163 | when 5 164 | read_uint40 165 | when 6 166 | read_uint48 167 | when 7 168 | read_uint56 169 | when 8 170 | read_uint64 171 | end 172 | end 173 | 174 | def read_int_be_by_size(size) 175 | case size 176 | when 1 177 | read_int8 178 | when 2 179 | read_int16_be 180 | when 3 181 | read_int24_be 182 | when 4 183 | read_int32_be 184 | else 185 | raise "read_int#{size*8}_be not implemented" 186 | end 187 | end 188 | 189 | # Read a single-precision (4-byte) floating point number. 190 | def read_float 191 | reader.read(4).unpack("e").first 192 | end 193 | 194 | # Read a double-precision (8-byte) floating point number. 195 | def read_double 196 | reader.read(8).unpack("E").first 197 | end 198 | 199 | # Read a variable-length "Length Coded Binary" integer. This is derived 200 | # from the MySQL protocol, and re-used in the binary log format. This 201 | # format uses the first byte to alternately store the actual value for 202 | # integer values <= 250, or to encode the number of following bytes 203 | # used to store the actual value, which can be 2, 3, or 8. It also 204 | # includes support for SQL NULL as a special case. 205 | # 206 | # See: http://forge.mysql.com/wiki/MySQL_Internals_ClientServer_Protocol#Elements 207 | def read_varint 208 | first_byte = read_uint8 209 | 210 | case 211 | when first_byte <= 250 212 | first_byte 213 | when first_byte == 251 214 | nil 215 | when first_byte == 252 216 | read_uint16 217 | when first_byte == 253 218 | read_uint24 219 | when first_byte == 254 220 | read_uint64 221 | when first_byte == 255 222 | raise "Invalid variable-length integer" 223 | end 224 | end 225 | 226 | # Read a non-terminated string, provided its length. 227 | def read_nstring(length) 228 | reader.read(length) 229 | end 230 | 231 | # Read a null-terminated string, provided its length (with the null). 232 | def read_nstringz(length) 233 | reader.read(length).unpack("A*").first 234 | end 235 | 236 | # Read a (Pascal-style) length-prefixed string. The length is stored as a 237 | # 8-bit (1-byte) to 32-bit (4-byte) unsigned integer, depending on the 238 | # optional size parameter (default 1), followed by the string itself with 239 | # no termination character. 240 | def read_lpstring(size=1) 241 | length = read_uint_by_size(size) 242 | read_nstring(length) 243 | end 244 | 245 | # Read an lpstring (as above) which is also terminated with a null byte. 246 | def read_lpstringz(size=1) 247 | string = read_lpstring(size) 248 | reader.read(1) # null 249 | string 250 | end 251 | 252 | # Read a MySQL-style varint length-prefixed string. The length is stored 253 | # as a variable-length "Length Coded Binary" value (see read_varint) which 254 | # is followed by the string content itself. No termination is included. 255 | def read_varstring 256 | length = read_varint 257 | read_nstring(length) 258 | end 259 | 260 | # Read a (new) decimal value. The value is stored as a sequence of signed 261 | # big-endian integers, each representing up to 9 digits of the integral 262 | # and fractional parts. The first integer of the integral part and/or the 263 | # last integer of the fractional part might be compressed (or packed) and 264 | # are of variable length. The remaining integers (if any) are 265 | # uncompressed and 32 bits wide. 266 | def read_newdecimal(precision, scale) 267 | digits_per_integer = 9 268 | compressed_bytes = [0, 1, 1, 2, 2, 3, 3, 4, 4, 4] 269 | integral = (precision - scale) 270 | uncomp_integral = integral / digits_per_integer 271 | uncomp_fractional = scale / digits_per_integer 272 | comp_integral = integral - (uncomp_integral * digits_per_integer) 273 | comp_fractional = scale - (uncomp_fractional * digits_per_integer) 274 | 275 | # The sign is encoded in the high bit of the first byte/digit. The byte 276 | # might be part of a larger integer, so apply the optional bit-flipper 277 | # and push back the byte into the input stream. 278 | value = read_uint8 279 | str, mask = (value & 0x80 != 0) ? ["", 0] : ["-", -1] 280 | reader.unget(value ^ 0x80) 281 | 282 | size = compressed_bytes[comp_integral] 283 | 284 | if size > 0 285 | value = read_int_be_by_size(size) ^ mask 286 | str << value.to_s 287 | end 288 | 289 | (1..uncomp_integral).each do 290 | value = read_int32_be ^ mask 291 | str << value.to_s 292 | end 293 | 294 | str << "." 295 | 296 | (1..uncomp_fractional).each do 297 | value = read_int32_be ^ mask 298 | str << value.to_s 299 | end 300 | 301 | size = compressed_bytes[comp_fractional] 302 | 303 | if size > 0 304 | value = read_int_be_by_size(size) ^ mask 305 | str << value.to_s 306 | end 307 | 308 | BigDecimal(str) 309 | end 310 | 311 | # Read an array of unsigned 8-bit (1-byte) integers. 312 | def read_uint8_array(length) 313 | reader.read(length).bytes.to_a 314 | end 315 | 316 | # Read an arbitrary-length bitmap, provided its length. Returns an array 317 | # of true/false values. This is used both for internal usage in RBR 318 | # events that need bitmaps, as well as for the BIT type. 319 | def read_bit_array(length) 320 | data = reader.read((length+7)/8) 321 | data.unpack("b*").first. # Unpack into a string of "10101" 322 | split("").map { |c| c == "1" }.shift(length) # Return true/false array 323 | end 324 | 325 | # Read a uint value using the provided size, and convert it to an array 326 | # of symbols derived from a mapping table provided. 327 | def read_uint_bitmap_by_size_and_name(size, bit_names) 328 | value = read_uint_by_size(size) 329 | named_bits = [] 330 | 331 | # Do an efficient scan for the named bits we know about using the hash 332 | # provided. 333 | bit_names.each do |(name, bit_value)| 334 | if (value & bit_value) != 0 335 | value -= bit_value 336 | named_bits << name 337 | end 338 | end 339 | 340 | # If anything is left over in +value+, add "unknown" names to the result 341 | # so that they can be identified and corrected. 342 | if value > 0 343 | 0.upto(size * 8).map { |n| 1 << n }.each do |bit_value| 344 | if (value & bit_value) != 0 345 | named_bits << "unknown_#{bit_value}".to_sym 346 | end 347 | end 348 | end 349 | 350 | named_bits 351 | end 352 | 353 | # Extract a number of sequential bits at a given offset within an integer. 354 | # This is used to unpack bit-packed fields. 355 | def extract_bits(value, bits, offset) 356 | (value & ((1 << bits) - 1) << offset) >> offset 357 | end 358 | 359 | # Convert a packed +DATE+ from a uint24 into a string representing 360 | # the date. 361 | def convert_mysql_type_date(value) 362 | "%04i-%02i-%02i" % [ 363 | extract_bits(value, 15, 9), 364 | extract_bits(value, 4, 5), 365 | extract_bits(value, 5, 0), 366 | ] 367 | end 368 | 369 | # Convert a packed +TIME+ from a uint24 into a string representing 370 | # the time. 371 | def convert_mysql_type_time(value) 372 | "%02i:%02i:%02i" % [ 373 | value / 10000, 374 | (value % 10000) / 100, 375 | value % 100, 376 | ] 377 | end 378 | 379 | # Convert a packed +DATETIME+ from a uint64 into a string representing 380 | # the date and time. 381 | def convert_mysql_type_datetime(value) 382 | date = value / 1000000 383 | time = value % 1000000 384 | 385 | "%04i-%02i-%02i %02i:%02i:%02i" % [ 386 | date / 10000, 387 | (date % 10000) / 100, 388 | date % 100, 389 | time / 10000, 390 | (time % 10000) / 100, 391 | time % 100, 392 | ] 393 | end 394 | 395 | def convert_mysql_type_datetimef(int_part, frac_part) 396 | year_month = extract_bits(int_part, 17, 22) 397 | year = year_month / 13 398 | month = year_month % 13 399 | day = extract_bits(int_part, 5, 17) 400 | hour = extract_bits(int_part, 5, 12) 401 | minute = extract_bits(int_part, 6, 6) 402 | second = extract_bits(int_part, 6, 0) 403 | 404 | "%04i-%02i-%02i %02i:%02i:%02i.%06i" % [ 405 | year, 406 | month, 407 | day, 408 | hour, 409 | minute, 410 | second, 411 | frac_part, 412 | ] 413 | end 414 | 415 | def read_frac_part(decimals) 416 | case decimals 417 | when 0 418 | 0 419 | when 1, 2 420 | read_uint8 * 10000 421 | when 3, 4 422 | read_uint16_be * 100 423 | when 5, 6 424 | read_uint24_be 425 | end 426 | end 427 | 428 | def read_datetimef(decimals) 429 | convert_mysql_type_datetimef(read_uint40_be, read_frac_part(decimals)) 430 | end 431 | 432 | def read_timestamp2(decimals) 433 | read_uint32_be + (read_frac_part(decimals) / 1000000) 434 | end 435 | 436 | # Read a single field, provided the MySQL column type as a symbol. Not all 437 | # types are currently supported. 438 | def read_mysql_type(type, metadata=nil) 439 | case type 440 | when :tiny 441 | read_uint8 442 | when :short 443 | read_uint16 444 | when :int24 445 | read_uint24 446 | when :long 447 | read_uint32 448 | when :longlong 449 | read_uint64 450 | when :float 451 | read_float 452 | when :double 453 | read_double 454 | when :var_string 455 | read_varstring.force_encoding('UTF-8') 456 | when :varchar, :string 457 | prefix_size = (metadata[:max_length] > 255) ? 2 : 1 458 | read_lpstring(prefix_size).force_encoding('UTF-8') 459 | when :blob, :geometry 460 | read_lpstring(metadata[:length_size]).force_encoding('binary') 461 | when :json 462 | read_lpstring(metadata[:length_size]).force_encoding('UTF-8') 463 | when :timestamp 464 | read_uint32 465 | when :timestamp2 466 | read_timestamp2(metadata[:decimals]) 467 | when :year 468 | read_uint8 + 1900 469 | when :date 470 | convert_mysql_type_date(read_uint24) 471 | when :time 472 | convert_mysql_type_time(read_uint24) 473 | when :datetime 474 | convert_mysql_type_datetime(read_uint64) 475 | when :datetime2 476 | read_datetimef(metadata[:decimals]) 477 | when :enum, :set 478 | read_uint_by_size(metadata[:size]) 479 | when :bit 480 | byte_length = (metadata[:bits]+7)/8 481 | read_uint_by_size(byte_length) 482 | when :newdecimal 483 | precision = metadata[:precision] 484 | scale = metadata[:decimals] 485 | read_newdecimal(precision, scale) 486 | else 487 | raise UnsupportedTypeException.new("Type #{type} is not supported.") 488 | end 489 | end 490 | end 491 | end 492 | -------------------------------------------------------------------------------- /lib/mysql_binlog/mysql_character_set.rb: -------------------------------------------------------------------------------- 1 | module MysqlBinlog 2 | # A hash to map MySQL collation name to ID and character set name. 3 | # 4 | # This hash is produced by the following query: 5 | # SELECT concat( 6 | # " :", 7 | # rpad(collation_name, 24, " "), 8 | # " => { :id => ", 9 | # lpad(id, 3, " "), 10 | # ", :character_set => :", 11 | # rpad(character_set_name, 8, " "), 12 | # " }," 13 | # ) AS ruby_code 14 | # FROM information_schema.collations 15 | # ORDER BY collation_name 16 | # 17 | COLLATION_HASH = { 18 | :armscii8_bin => { :id => 64, :character_set => :armscii8 }, 19 | :armscii8_general_ci => { :id => 32, :character_set => :armscii8 }, 20 | :ascii_bin => { :id => 65, :character_set => :ascii }, 21 | :ascii_general_ci => { :id => 11, :character_set => :ascii }, 22 | :big5_bin => { :id => 84, :character_set => :big5 }, 23 | :big5_chinese_ci => { :id => 1, :character_set => :big5 }, 24 | :binary => { :id => 63, :character_set => :binary }, 25 | :cp1250_bin => { :id => 66, :character_set => :cp1250 }, 26 | :cp1250_croatian_ci => { :id => 44, :character_set => :cp1250 }, 27 | :cp1250_czech_cs => { :id => 34, :character_set => :cp1250 }, 28 | :cp1250_general_ci => { :id => 26, :character_set => :cp1250 }, 29 | :cp1250_polish_ci => { :id => 99, :character_set => :cp1250 }, 30 | :cp1251_bin => { :id => 50, :character_set => :cp1251 }, 31 | :cp1251_bulgarian_ci => { :id => 14, :character_set => :cp1251 }, 32 | :cp1251_general_ci => { :id => 51, :character_set => :cp1251 }, 33 | :cp1251_general_cs => { :id => 52, :character_set => :cp1251 }, 34 | :cp1251_ukrainian_ci => { :id => 23, :character_set => :cp1251 }, 35 | :cp1256_bin => { :id => 67, :character_set => :cp1256 }, 36 | :cp1256_general_ci => { :id => 57, :character_set => :cp1256 }, 37 | :cp1257_bin => { :id => 58, :character_set => :cp1257 }, 38 | :cp1257_general_ci => { :id => 59, :character_set => :cp1257 }, 39 | :cp1257_lithuanian_ci => { :id => 29, :character_set => :cp1257 }, 40 | :cp850_bin => { :id => 80, :character_set => :cp850 }, 41 | :cp850_general_ci => { :id => 4, :character_set => :cp850 }, 42 | :cp852_bin => { :id => 81, :character_set => :cp852 }, 43 | :cp852_general_ci => { :id => 40, :character_set => :cp852 }, 44 | :cp866_bin => { :id => 68, :character_set => :cp866 }, 45 | :cp866_general_ci => { :id => 36, :character_set => :cp866 }, 46 | :cp932_bin => { :id => 96, :character_set => :cp932 }, 47 | :cp932_japanese_ci => { :id => 95, :character_set => :cp932 }, 48 | :dec8_bin => { :id => 69, :character_set => :dec8 }, 49 | :dec8_swedish_ci => { :id => 3, :character_set => :dec8 }, 50 | :eucjpms_bin => { :id => 98, :character_set => :eucjpms }, 51 | :eucjpms_japanese_ci => { :id => 97, :character_set => :eucjpms }, 52 | :euckr_bin => { :id => 85, :character_set => :euckr }, 53 | :euckr_korean_ci => { :id => 19, :character_set => :euckr }, 54 | :gb2312_bin => { :id => 86, :character_set => :gb2312 }, 55 | :gb2312_chinese_ci => { :id => 24, :character_set => :gb2312 }, 56 | :gbk_bin => { :id => 87, :character_set => :gbk }, 57 | :gbk_chinese_ci => { :id => 28, :character_set => :gbk }, 58 | :geostd8_bin => { :id => 93, :character_set => :geostd8 }, 59 | :geostd8_general_ci => { :id => 92, :character_set => :geostd8 }, 60 | :greek_bin => { :id => 70, :character_set => :greek }, 61 | :greek_general_ci => { :id => 25, :character_set => :greek }, 62 | :hebrew_bin => { :id => 71, :character_set => :hebrew }, 63 | :hebrew_general_ci => { :id => 16, :character_set => :hebrew }, 64 | :hp8_bin => { :id => 72, :character_set => :hp8 }, 65 | :hp8_english_ci => { :id => 6, :character_set => :hp8 }, 66 | :keybcs2_bin => { :id => 73, :character_set => :keybcs2 }, 67 | :keybcs2_general_ci => { :id => 37, :character_set => :keybcs2 }, 68 | :koi8r_bin => { :id => 74, :character_set => :koi8r }, 69 | :koi8r_general_ci => { :id => 7, :character_set => :koi8r }, 70 | :koi8u_bin => { :id => 75, :character_set => :koi8u }, 71 | :koi8u_general_ci => { :id => 22, :character_set => :koi8u }, 72 | :latin1_bin => { :id => 47, :character_set => :latin1 }, 73 | :latin1_danish_ci => { :id => 15, :character_set => :latin1 }, 74 | :latin1_general_ci => { :id => 48, :character_set => :latin1 }, 75 | :latin1_general_cs => { :id => 49, :character_set => :latin1 }, 76 | :latin1_german1_ci => { :id => 5, :character_set => :latin1 }, 77 | :latin1_german2_ci => { :id => 31, :character_set => :latin1 }, 78 | :latin1_spanish_ci => { :id => 94, :character_set => :latin1 }, 79 | :latin1_swedish_ci => { :id => 8, :character_set => :latin1 }, 80 | :latin2_bin => { :id => 77, :character_set => :latin2 }, 81 | :latin2_croatian_ci => { :id => 27, :character_set => :latin2 }, 82 | :latin2_czech_cs => { :id => 2, :character_set => :latin2 }, 83 | :latin2_general_ci => { :id => 9, :character_set => :latin2 }, 84 | :latin2_hungarian_ci => { :id => 21, :character_set => :latin2 }, 85 | :latin5_bin => { :id => 78, :character_set => :latin5 }, 86 | :latin5_turkish_ci => { :id => 30, :character_set => :latin5 }, 87 | :latin7_bin => { :id => 79, :character_set => :latin7 }, 88 | :latin7_estonian_cs => { :id => 20, :character_set => :latin7 }, 89 | :latin7_general_ci => { :id => 41, :character_set => :latin7 }, 90 | :latin7_general_cs => { :id => 42, :character_set => :latin7 }, 91 | :macce_bin => { :id => 43, :character_set => :macce }, 92 | :macce_general_ci => { :id => 38, :character_set => :macce }, 93 | :macroman_bin => { :id => 53, :character_set => :macroman }, 94 | :macroman_general_ci => { :id => 39, :character_set => :macroman }, 95 | :sjis_bin => { :id => 88, :character_set => :sjis }, 96 | :sjis_japanese_ci => { :id => 13, :character_set => :sjis }, 97 | :swe7_bin => { :id => 82, :character_set => :swe7 }, 98 | :swe7_swedish_ci => { :id => 10, :character_set => :swe7 }, 99 | :tis620_bin => { :id => 89, :character_set => :tis620 }, 100 | :tis620_thai_ci => { :id => 18, :character_set => :tis620 }, 101 | :ucs2_bin => { :id => 90, :character_set => :ucs2 }, 102 | :ucs2_czech_ci => { :id => 138, :character_set => :ucs2 }, 103 | :ucs2_danish_ci => { :id => 139, :character_set => :ucs2 }, 104 | :ucs2_esperanto_ci => { :id => 145, :character_set => :ucs2 }, 105 | :ucs2_estonian_ci => { :id => 134, :character_set => :ucs2 }, 106 | :ucs2_general_ci => { :id => 35, :character_set => :ucs2 }, 107 | :ucs2_general_mysql500_ci => { :id => 159, :character_set => :ucs2 }, 108 | :ucs2_hungarian_ci => { :id => 146, :character_set => :ucs2 }, 109 | :ucs2_icelandic_ci => { :id => 129, :character_set => :ucs2 }, 110 | :ucs2_latvian_ci => { :id => 130, :character_set => :ucs2 }, 111 | :ucs2_lithuanian_ci => { :id => 140, :character_set => :ucs2 }, 112 | :ucs2_persian_ci => { :id => 144, :character_set => :ucs2 }, 113 | :ucs2_polish_ci => { :id => 133, :character_set => :ucs2 }, 114 | :ucs2_romanian_ci => { :id => 131, :character_set => :ucs2 }, 115 | :ucs2_roman_ci => { :id => 143, :character_set => :ucs2 }, 116 | :ucs2_sinhala_ci => { :id => 147, :character_set => :ucs2 }, 117 | :ucs2_slovak_ci => { :id => 141, :character_set => :ucs2 }, 118 | :ucs2_slovenian_ci => { :id => 132, :character_set => :ucs2 }, 119 | :ucs2_spanish2_ci => { :id => 142, :character_set => :ucs2 }, 120 | :ucs2_spanish_ci => { :id => 135, :character_set => :ucs2 }, 121 | :ucs2_swedish_ci => { :id => 136, :character_set => :ucs2 }, 122 | :ucs2_turkish_ci => { :id => 137, :character_set => :ucs2 }, 123 | :ucs2_unicode_ci => { :id => 128, :character_set => :ucs2 }, 124 | :ujis_bin => { :id => 91, :character_set => :ujis }, 125 | :ujis_japanese_ci => { :id => 12, :character_set => :ujis }, 126 | :utf16_bin => { :id => 55, :character_set => :utf16 }, 127 | :utf16_czech_ci => { :id => 111, :character_set => :utf16 }, 128 | :utf16_danish_ci => { :id => 112, :character_set => :utf16 }, 129 | :utf16_esperanto_ci => { :id => 118, :character_set => :utf16 }, 130 | :utf16_estonian_ci => { :id => 107, :character_set => :utf16 }, 131 | :utf16_general_ci => { :id => 54, :character_set => :utf16 }, 132 | :utf16_hungarian_ci => { :id => 119, :character_set => :utf16 }, 133 | :utf16_icelandic_ci => { :id => 102, :character_set => :utf16 }, 134 | :utf16_latvian_ci => { :id => 103, :character_set => :utf16 }, 135 | :utf16_lithuanian_ci => { :id => 113, :character_set => :utf16 }, 136 | :utf16_persian_ci => { :id => 117, :character_set => :utf16 }, 137 | :utf16_polish_ci => { :id => 106, :character_set => :utf16 }, 138 | :utf16_romanian_ci => { :id => 104, :character_set => :utf16 }, 139 | :utf16_roman_ci => { :id => 116, :character_set => :utf16 }, 140 | :utf16_sinhala_ci => { :id => 120, :character_set => :utf16 }, 141 | :utf16_slovak_ci => { :id => 114, :character_set => :utf16 }, 142 | :utf16_slovenian_ci => { :id => 105, :character_set => :utf16 }, 143 | :utf16_spanish2_ci => { :id => 115, :character_set => :utf16 }, 144 | :utf16_spanish_ci => { :id => 108, :character_set => :utf16 }, 145 | :utf16_swedish_ci => { :id => 109, :character_set => :utf16 }, 146 | :utf16_turkish_ci => { :id => 110, :character_set => :utf16 }, 147 | :utf16_unicode_ci => { :id => 101, :character_set => :utf16 }, 148 | :utf32_bin => { :id => 61, :character_set => :utf32 }, 149 | :utf32_czech_ci => { :id => 170, :character_set => :utf32 }, 150 | :utf32_danish_ci => { :id => 171, :character_set => :utf32 }, 151 | :utf32_esperanto_ci => { :id => 177, :character_set => :utf32 }, 152 | :utf32_estonian_ci => { :id => 166, :character_set => :utf32 }, 153 | :utf32_general_ci => { :id => 60, :character_set => :utf32 }, 154 | :utf32_hungarian_ci => { :id => 178, :character_set => :utf32 }, 155 | :utf32_icelandic_ci => { :id => 161, :character_set => :utf32 }, 156 | :utf32_latvian_ci => { :id => 162, :character_set => :utf32 }, 157 | :utf32_lithuanian_ci => { :id => 172, :character_set => :utf32 }, 158 | :utf32_persian_ci => { :id => 176, :character_set => :utf32 }, 159 | :utf32_polish_ci => { :id => 165, :character_set => :utf32 }, 160 | :utf32_romanian_ci => { :id => 163, :character_set => :utf32 }, 161 | :utf32_roman_ci => { :id => 175, :character_set => :utf32 }, 162 | :utf32_sinhala_ci => { :id => 179, :character_set => :utf32 }, 163 | :utf32_slovak_ci => { :id => 173, :character_set => :utf32 }, 164 | :utf32_slovenian_ci => { :id => 164, :character_set => :utf32 }, 165 | :utf32_spanish2_ci => { :id => 174, :character_set => :utf32 }, 166 | :utf32_spanish_ci => { :id => 167, :character_set => :utf32 }, 167 | :utf32_swedish_ci => { :id => 168, :character_set => :utf32 }, 168 | :utf32_turkish_ci => { :id => 169, :character_set => :utf32 }, 169 | :utf32_unicode_ci => { :id => 160, :character_set => :utf32 }, 170 | :utf8mb4_bin => { :id => 46, :character_set => :utf8mb4 }, 171 | :utf8mb4_czech_ci => { :id => 234, :character_set => :utf8mb4 }, 172 | :utf8mb4_danish_ci => { :id => 235, :character_set => :utf8mb4 }, 173 | :utf8mb4_esperanto_ci => { :id => 241, :character_set => :utf8mb4 }, 174 | :utf8mb4_estonian_ci => { :id => 230, :character_set => :utf8mb4 }, 175 | :utf8mb4_general_ci => { :id => 45, :character_set => :utf8mb4 }, 176 | :utf8mb4_hungarian_ci => { :id => 242, :character_set => :utf8mb4 }, 177 | :utf8mb4_icelandic_ci => { :id => 225, :character_set => :utf8mb4 }, 178 | :utf8mb4_latvian_ci => { :id => 226, :character_set => :utf8mb4 }, 179 | :utf8mb4_lithuanian_ci => { :id => 236, :character_set => :utf8mb4 }, 180 | :utf8mb4_persian_ci => { :id => 240, :character_set => :utf8mb4 }, 181 | :utf8mb4_polish_ci => { :id => 229, :character_set => :utf8mb4 }, 182 | :utf8mb4_romanian_ci => { :id => 227, :character_set => :utf8mb4 }, 183 | :utf8mb4_roman_ci => { :id => 239, :character_set => :utf8mb4 }, 184 | :utf8mb4_sinhala_ci => { :id => 243, :character_set => :utf8mb4 }, 185 | :utf8mb4_slovak_ci => { :id => 237, :character_set => :utf8mb4 }, 186 | :utf8mb4_slovenian_ci => { :id => 228, :character_set => :utf8mb4 }, 187 | :utf8mb4_spanish2_ci => { :id => 238, :character_set => :utf8mb4 }, 188 | :utf8mb4_spanish_ci => { :id => 231, :character_set => :utf8mb4 }, 189 | :utf8mb4_swedish_ci => { :id => 232, :character_set => :utf8mb4 }, 190 | :utf8mb4_turkish_ci => { :id => 233, :character_set => :utf8mb4 }, 191 | :utf8mb4_unicode_ci => { :id => 224, :character_set => :utf8mb4 }, 192 | :utf8_bin => { :id => 83, :character_set => :utf8 }, 193 | :utf8_czech_ci => { :id => 202, :character_set => :utf8 }, 194 | :utf8_danish_ci => { :id => 203, :character_set => :utf8 }, 195 | :utf8_esperanto_ci => { :id => 209, :character_set => :utf8 }, 196 | :utf8_estonian_ci => { :id => 198, :character_set => :utf8 }, 197 | :utf8_general_ci => { :id => 33, :character_set => :utf8 }, 198 | :utf8_general_mysql500_ci => { :id => 223, :character_set => :utf8 }, 199 | :utf8_hungarian_ci => { :id => 210, :character_set => :utf8 }, 200 | :utf8_icelandic_ci => { :id => 193, :character_set => :utf8 }, 201 | :utf8_latvian_ci => { :id => 194, :character_set => :utf8 }, 202 | :utf8_lithuanian_ci => { :id => 204, :character_set => :utf8 }, 203 | :utf8_persian_ci => { :id => 208, :character_set => :utf8 }, 204 | :utf8_polish_ci => { :id => 197, :character_set => :utf8 }, 205 | :utf8_romanian_ci => { :id => 195, :character_set => :utf8 }, 206 | :utf8_roman_ci => { :id => 207, :character_set => :utf8 }, 207 | :utf8_sinhala_ci => { :id => 211, :character_set => :utf8 }, 208 | :utf8_slovak_ci => { :id => 205, :character_set => :utf8 }, 209 | :utf8_slovenian_ci => { :id => 196, :character_set => :utf8 }, 210 | :utf8_spanish2_ci => { :id => 206, :character_set => :utf8 }, 211 | :utf8_spanish_ci => { :id => 199, :character_set => :utf8 }, 212 | :utf8_swedish_ci => { :id => 200, :character_set => :utf8 }, 213 | :utf8_turkish_ci => { :id => 201, :character_set => :utf8 }, 214 | :utf8_unicode_ci => { :id => 192, :character_set => :utf8 }, 215 | } 216 | 217 | # An array of collation IDs to collation and character set name for 218 | # efficient lookup by ID. 219 | COLLATION = COLLATION_HASH.inject(Array.new) do |collation_array, item| 220 | collation_array[item[1][:id]] = { 221 | :character_set => item[1][:character_set], 222 | :collation => item[0], 223 | } 224 | collation_array 225 | end 226 | end 227 | -------------------------------------------------------------------------------- /lib/mysql_binlog/reader/binlog_file_reader.rb: -------------------------------------------------------------------------------- 1 | module MysqlBinlog 2 | # Read a binary log from a file on disk. 3 | class BinlogFileReader 4 | MAGIC_SIZE = 4 5 | MAGIC_VALUE = 1852400382 6 | 7 | attr_accessor :tail 8 | 9 | def initialize(filename) 10 | @tail = false 11 | open_file(filename) 12 | end 13 | 14 | def verify_magic 15 | if (magic = read(MAGIC_SIZE).unpack("V").first) != MAGIC_VALUE 16 | raise MalformedBinlogException.new("Magic number #{magic} is incorrect") 17 | end 18 | end 19 | 20 | def open_file(filename) 21 | @dirname = File.dirname(filename) 22 | @filename = File.basename(filename) 23 | @binlog = File.open(filename, "r:BINARY") 24 | 25 | verify_magic 26 | end 27 | 28 | def rotate(filename, position) 29 | retries = 10 30 | begin 31 | open_file(@dirname + "/" + filename) 32 | seek(position) 33 | rescue Errno::ENOENT 34 | # A rotate event will be seen in the previous log file before the 35 | # new file exists. Retry a few times with a little sleep to give 36 | # the server a chance to create the new file. 37 | if (retries -= 1) > 0 38 | sleep 0.01 39 | retry 40 | else 41 | raise 42 | end 43 | end 44 | end 45 | 46 | def filename 47 | @filename 48 | end 49 | 50 | def position 51 | @binlog.tell 52 | end 53 | 54 | def rewind 55 | seek(MAGIC_SIZE) 56 | end 57 | 58 | def seek(pos) 59 | @binlog.seek(pos) 60 | end 61 | 62 | def unget(char) 63 | @binlog.ungetc(char) 64 | end 65 | 66 | def end? 67 | return false if tail 68 | @binlog.eof? 69 | end 70 | 71 | def remaining(header) 72 | header[:payload_end] - @binlog.tell 73 | end 74 | 75 | def skip(header) 76 | seek(header[:next_position]) 77 | end 78 | 79 | def read(length) 80 | if tail 81 | needed_position = position + length 82 | while @binlog.stat.size < needed_position 83 | sleep 0.02 84 | end 85 | end 86 | return "" if length == 0 87 | data = @binlog.read(length) 88 | if !data 89 | raise MalformedBinlogException.new 90 | elsif data.length == 0 91 | raise ZeroReadException.new 92 | elsif data.length < length 93 | raise ShortReadException.new 94 | end 95 | data 96 | end 97 | end 98 | end 99 | -------------------------------------------------------------------------------- /lib/mysql_binlog/reader/binlog_stream_reader.rb: -------------------------------------------------------------------------------- 1 | module MysqlBinlog 2 | # Read a binary log from a stream dumped using the +MysqlBinlogDump+ 3 | # library to request a +COM_BINLOG_DUMP+ from a MySQL server via the 4 | # +Mysql+ library. 5 | class BinlogStreamReader 6 | def initialize(connection, filename, position) 7 | require 'mysql_binlog_dump' 8 | @filename = nil 9 | @position = nil 10 | @packet_data = nil 11 | @packet_pos = nil 12 | @connection = connection 13 | MysqlBinlogDump.binlog_dump(connection, filename, position) 14 | end 15 | 16 | def rotate(filename, position) 17 | @filename = filename 18 | @position = position 19 | end 20 | 21 | def filename 22 | @filename 23 | end 24 | 25 | def position 26 | @position 27 | end 28 | 29 | def rewind 30 | false 31 | end 32 | 33 | def tell 34 | @packet_pos 35 | end 36 | 37 | def end? 38 | false 39 | end 40 | 41 | def remaining(header) 42 | @packet_data.length - @packet_pos 43 | end 44 | 45 | def skip(header) 46 | @packet_data = nil 47 | @packet_pos = nil 48 | end 49 | 50 | def read_packet 51 | @packet_data = MysqlBinlogDump.next_packet(@connection) 52 | @packet_pos = 0 53 | end 54 | 55 | def read(length) 56 | unless @packet_data 57 | read_packet 58 | return nil unless @packet_data 59 | end 60 | pos = @packet_pos 61 | @position += length if @position 62 | @packet_pos += length 63 | @packet_data[pos...(pos+length)] 64 | end 65 | end 66 | end -------------------------------------------------------------------------------- /lib/mysql_binlog/reader/debugging_reader.rb: -------------------------------------------------------------------------------- 1 | module MysqlBinlog 2 | # A simple method to print a string as in hex representation per byte, 3 | # with no more than 24 bytes per line, and spaces between each byte. 4 | # There is probably a better way to do this, but I don't know it. 5 | def hexdump(data) 6 | data.bytes.each_slice(24).inject("") do |string, slice| 7 | string << " " + slice.map { |b| "%02x" % b }.join(" ") + "\n" 8 | string 9 | end 10 | end 11 | 12 | # Wrap another Reader class, passing through all method calls, but optionally 13 | # printing the contents of data read, and the method calls themselves. This 14 | # is very useful for debugging the library itself, or if exceptions are 15 | # getting thrown when reading a possibly unsupported log. 16 | class DebuggingReader 17 | def initialize(wrapped, options={}) 18 | @wrapped = wrapped 19 | @options = options 20 | end 21 | 22 | # Pass through all method calls to the reader class we're delegating to. 23 | # If various options are enabled, print debugging information. 24 | def method_missing(method, *args) 25 | if @options[:calls] 26 | puts "#{@wrapped.class}.#{method}" 27 | end 28 | 29 | return_value = @wrapped.send(method, *args) 30 | 31 | # Print the returned data from :read in a nice hex dump format. 32 | if method == :read and @options[:data] 33 | puts "Read #{args[0]} bytes #{caller.first.split(":")[2]}:" 34 | puts hexdump(return_value) 35 | end 36 | 37 | return_value 38 | end 39 | end 40 | end -------------------------------------------------------------------------------- /lib/mysql_binlog/version.rb: -------------------------------------------------------------------------------- 1 | module MysqlBinlog 2 | VERSION = "0.3.7" 3 | end 4 | -------------------------------------------------------------------------------- /mysql_binlog.gemspec: -------------------------------------------------------------------------------- 1 | lib = File.expand_path('../lib/', __FILE__) 2 | $:.unshift lib unless $:.include?(lib) 3 | require "mysql_binlog/version" 4 | 5 | Gem::Specification.new do |s| 6 | s.name = 'mysql_binlog' 7 | s.version = MysqlBinlog::VERSION 8 | s.date = Time.now.strftime("%Y-%m-%d") 9 | s.summary = 'MySQL Binary Log Parser' 10 | s.license = 'BSD-3-Clause' 11 | s.description = 'Library for parsing MySQL binary logs in Ruby' 12 | s.authors = [ 'Jeremy Cole' ] 13 | s.email = 'jeremy@jcole.us' 14 | s.homepage = 'http://jcole.us/' 15 | s.files = [ 16 | 'lib/mysql_binlog.rb', 17 | 'lib/mysql_binlog/binlog.rb', 18 | 'lib/mysql_binlog/binlog_event_parser.rb', 19 | 'lib/mysql_binlog/binlog_field_parser.rb', 20 | 'lib/mysql_binlog/mysql_character_set.rb', 21 | 'lib/mysql_binlog/reader/binlog_file_reader.rb', 22 | 'lib/mysql_binlog/reader/binlog_stream_reader.rb', 23 | 'lib/mysql_binlog/reader/debugging_reader.rb', 24 | 'lib/mysql_binlog/version.rb', 25 | ] 26 | s.executables = [ 27 | 'mysql_binlog_dump', 28 | 'mysql_binlog_summary', 29 | ] 30 | end 31 | --------------------------------------------------------------------------------