├── tests ├── __init__.py ├── parser_test.py └── ybinlogp_test.py ├── ybinlogp.pth ├── .gitignore ├── src ├── ybinlogp │ ├── version.py │ ├── __init__.py │ └── parser.py ├── debugs.h ├── ybinlogp.c ├── ybinlogp-private.h ├── ybinlogp.h └── libybinlogp.c ├── testing ├── data │ ├── mysql-bin.default-path │ └── mysql-bin.delayed-event ├── gen_test_data.sql ├── print_events.py └── gen_test_data.sh ├── MANIFEST.in ├── Makefile ├── license.txt ├── setup.py ├── ybinlogp.spec ├── README.md └── NEWS.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ybinlogp.pth: -------------------------------------------------------------------------------- 1 | src/ 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .*.swp 3 | *.egg-info 4 | dist/ 5 | build/ 6 | logs/ 7 | -------------------------------------------------------------------------------- /src/ybinlogp/version.py: -------------------------------------------------------------------------------- 1 | version_info = 0, 6 2 | 3 | __version__ = '.'.join(map(str, version_info)) 4 | -------------------------------------------------------------------------------- /testing/data/mysql-bin.default-path: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yelp/ybinlogp/HEAD/testing/data/mysql-bin.default-path -------------------------------------------------------------------------------- /testing/data/mysql-bin.delayed-event: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yelp/ybinlogp/HEAD/testing/data/mysql-bin.delayed-event -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include build/Makefile 2 | include license.txt 3 | include Makefile 4 | include NEWS.md 5 | include README.md 6 | include src/*.c 7 | include src/*.h 8 | -------------------------------------------------------------------------------- /src/ybinlogp/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | __author__ = 'James Brown ' 3 | 4 | from ybinlogp.parser import NextEventError 5 | from ybinlogp.parser import NoEventsAfterTime 6 | from ybinlogp.parser import NoEventsAfterOffset 7 | from ybinlogp.parser import EmptyEventError 8 | from ybinlogp.parser import YBinlogP 9 | from ybinlogp.parser import EventType 10 | from ybinlogp.version import __version__ 11 | from ybinlogp.version import version_info 12 | -------------------------------------------------------------------------------- /tests/parser_test.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | 3 | import mock 4 | from testify import TestCase, setup, assert_equal 5 | 6 | from ybinlogp.parser import YBinlogP, EventType 7 | 8 | 9 | class YBinlogPTestCase(TestCase): 10 | 11 | @setup 12 | def setup_parser(self): 13 | self.binlog_file = tempfile.NamedTemporaryFile() 14 | self.parser = YBinlogP(self.binlog_file.name) 15 | 16 | 17 | # TODO: empty event at start 18 | 19 | # TODO: empty event midway 20 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | 2 | 3 | .PHONY: flakes tests clean docs build 4 | 5 | 6 | all: build 7 | 8 | build: 9 | make -C build all 10 | 11 | debug: 12 | make -C build debug 13 | 14 | flakes: 15 | find -name "*.py" -print0 | xargs -0 pyflakes 16 | 17 | test: tests 18 | 19 | tests: all 20 | LD_LIBRARY_PATH=build \ 21 | PYTHONPATH=src \ 22 | testify --summary --exclude-suite=disabled --verbose tests 23 | 24 | clean: 25 | make -C build clean 26 | find . -iname '*.pyc' -delete 27 | 28 | -------------------------------------------------------------------------------- /license.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2010-2011, Yelp, Inc. 2 | 3 | Permission to use, copy, modify, and/or distribute this software for any 4 | purpose with or without fee is hereby granted, provided that the above 5 | copyright notice and this permission notice appear in all copies. 6 | 7 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 10 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 12 | ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 13 | OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 | -------------------------------------------------------------------------------- /testing/gen_test_data.sql: -------------------------------------------------------------------------------- 1 | -- Used by testing/gen_test_data.sh to create testing data 2 | 3 | CREATE DATABASE ybinlogp; 4 | USE ybinlogp; 5 | CREATE TABLE test1(x INT AUTO_INCREMENT PRIMARY KEY) ENGINE=InnoDB; 6 | INSERT INTO test1 VALUES(1); 7 | INSERT INTO test1 VALUES(2); 8 | INSERT INTO test1 VALUES(3); 9 | INSERT INTO test1 VALUES(4); 10 | INSERT INTO test1 VALUES(5); 11 | INSERT INTO test1 VALUES(6); 12 | INSERT INTO test1 VALUES(7); 13 | 14 | CREATE DATABASE foobar; 15 | USE foobar; 16 | CREATE TABLE test2(id INT AUTO_INCREMENT PRIMARY KEY, x VARCHAR(255)) ENGINE=InnoDB; 17 | INSERT INTO test2(x) VALUES("This is the winter of our discontent"); 18 | INSERT INTO test2(x) VALUES("Tomorrow and tomorrow and tomorrow"); 19 | INSERT INTO test2(x) VALUES("Bananas r good"); 20 | 21 | FLUSH LOGS; 22 | 23 | USE ybinlogp; 24 | 25 | BEGIN; 26 | INSERT INTO test1 VALUES(8); 27 | ROLLBACK; 28 | BEGIN; 29 | INSERT INTO test1 VALUES(8); 30 | COMMIT; 31 | INSERT INTO test1 VALUES(9); 32 | 33 | FLUSH LOGS; 34 | -------------------------------------------------------------------------------- /testing/print_events.py: -------------------------------------------------------------------------------- 1 | """ 2 | Print events from a mysql binlog. 3 | """ 4 | import logging 5 | import os.path 6 | import sys 7 | 8 | from ybinlogp import YBinlogP 9 | 10 | 11 | def get_filename(): 12 | if len(sys.argv) != 2: 13 | raise SystemExit("Usage: %s " % sys.argv[0]) 14 | return sys.argv[1] 15 | 16 | 17 | def main(filename): 18 | dirname = os.path.dirname(filename) 19 | parser = YBinlogP(filename, always_update=True) 20 | while True: 21 | for i, event in enumerate(parser): 22 | print event 23 | if event.event_type == "ROTATE_EVENT": 24 | next_file = os.path.join(dirname, event.data.file_name) 25 | parser.close() 26 | parser = YBinlogP(next_file, always_update=True) 27 | else: 28 | print "Got to end at %r" % (parser.tell(),) 29 | break 30 | parser.close() 31 | 32 | 33 | if __name__ == "__main__": 34 | logging.basicConfig() 35 | filename = get_filename() 36 | main(filename) 37 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | import subprocess 3 | 4 | from setuptools import setup 5 | from distutils.command.build import build 6 | 7 | class YBinlogPBuild(build): 8 | def initialize_options(self): 9 | build.initialize_options(self) 10 | self.build_base = os.path.join(self.build_base, 'python') 11 | 12 | def run(self): 13 | subprocess.call(['make', 'build']) 14 | build.run(self) 15 | 16 | about = {} 17 | with open(os.path.join(os.path.dirname(__file__), 'src/ybinlogp/version.py')) as f: 18 | exec f.read() in about 19 | 20 | setup( 21 | author='Yelp', 22 | author_email='yelplabs@yelp.com', 23 | cmdclass={'build': YBinlogPBuild}, 24 | data_files=[('lib', ['build/libybinlogp.so', 'build/libybinlogp.so.1']), 25 | ('include', ['src/ybinlogp.h'])], 26 | description='Library, program, and python bindings for parsing MySQL binlogs', 27 | license='BSD', 28 | name='YBinlogP', 29 | package_dir={'': 'src'}, 30 | packages=['ybinlogp'], 31 | scripts=['build/ybinlogp'], 32 | url='http://github.com/Yelp/ybinlogp', 33 | version=about['__version__'] 34 | ) 35 | -------------------------------------------------------------------------------- /src/debugs.h: -------------------------------------------------------------------------------- 1 | #ifndef DEBUGS_H 2 | #define DEBUGS_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #define Jperror(lbl, format) { fprintf(stdout, format " [%s:%d] : %s\n", __FILE__, __LINE__, strerror(errno)) ; goto lbl; } 9 | #define Jperrori(lbl, format, ...) { fprintf(stdout, format " [%s:%d] : %s\n", ##__VA_ARGS__, __FILE__, __LINE__, strerror(errno)) ; goto lbl; } 10 | #if DEBUG 11 | #define Dprintf(...) fprintf(stderr, __VA_ARGS__) 12 | #define Dperror(format) fprintf(stdout, format " [%s:%d] : %s\n", __FILE__, __LINE__, strerror(errno)) 13 | #define Dperrori(format, ...) fprintf(stdout, format " [%s:%d] : %s\n", ##__VA_ARGS__, __FILE__, __LINE__, strerror(errno)) 14 | #define DJperror(lbl, format) { fprintf(stdout, format " [%s:%d] : %s\n", __FILE__, __LINE__, strerror(errno)) ; goto lbl; } 15 | #define DJperrori(lbl, format, ...) { fprintf(stdout, format " [%s:%d] : %s\n", ##__VA_ARGS__, __FILE__, __LINE__, strerror(errno)) ; goto lbl; } 16 | #else 17 | #define Dprintf(...) (void) 0 18 | #define Dperror(...) (void) 0 19 | #define Dperrori(...) (void) 0 20 | #define DJperror(lbl, ...) goto lbl 21 | #define DJperrori(lbl, ...) goto lbl 22 | #endif /* DEBUG */ 23 | 24 | #endif /* DEBUGS_H */ 25 | -------------------------------------------------------------------------------- /ybinlogp.spec: -------------------------------------------------------------------------------- 1 | %define name ybinlogp 2 | %define version 0.6 3 | %define release 1 4 | 5 | Summary: Library, program, and python bindings for parsing MySQL binlogs 6 | Name: %{name} 7 | Version: %{version} 8 | Release: %{release} 9 | Source0: %{name}-%{version}.tar.gz 10 | License: BSD 11 | Group: Development/Libraries 12 | BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-buildroot 13 | Prefix: %{_prefix} 14 | BuildArch: x86_64 15 | Vendor: Yelp 16 | 17 | %description 18 | Library, program, and python bindings for parsing MySQL binlogs 19 | 20 | %prep 21 | %setup 22 | 23 | %build 24 | make 25 | 26 | %install 27 | install -D -m 444 src/ybinlogp.h $RPM_BUILD_ROOT/usr/include/ybinlogp.h 28 | install -D -m 755 build/ybinlogp $RPM_BUILD_ROOT/usr/sbin/ybinlogp 29 | install -D -m 555 build/libybinlogp.so.1 $RPM_BUILD_ROOT/usr/lib64/libybinlogp.so.1 30 | install -D -m 555 build/libybinlogp.so $RPM_BUILD_ROOT/usr/lib64/libybinlogp.so 31 | install -D -d src/ybinlogp $RPM_BUILD_ROOT/usr/lib64/python2.6/site-packages/ybinlogp 32 | 33 | %clean 34 | rm -rf $RPM_BUILD_ROOT 35 | 36 | %files 37 | /usr/include/ybinlogp.h 38 | /usr/sbin/ybinlogp 39 | /usr/lib64/libybinlogp.so.1 40 | /usr/lib64/libybinlogp.so 41 | /usr/lib64/python2.6/site-packages/ybinlogp 42 | %defattr(-,root,root) 43 | -------------------------------------------------------------------------------- /tests/ybinlogp_test.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | from testify import TestCase, setup, assert_equal 4 | 5 | from ybinlogp import YBinlogP, EventType 6 | 7 | 8 | class YBinlogPAcceptanceTestCase(TestCase): 9 | 10 | _suites = ['acceptance'] 11 | 12 | def test_default_path(self): 13 | filename = 'testing/data/mysql-bin.default-path' 14 | parser = YBinlogP(filename) 15 | events = list(parser) 16 | assert_equal(len(events), 38) 17 | 18 | last_event = events[-1] 19 | assert_equal(last_event.event_type, EventType.rotate) 20 | assert_equal(last_event.data.file_name, 'mysql-bin.000008') 21 | assert_equal(last_event.data.next_position, 4) 22 | 23 | last_commit = events[-2] 24 | assert_equal(last_commit.event_type, EventType.xid) 25 | 26 | last_query = events[-3] 27 | assert_equal(last_query.event_type, EventType.query) 28 | assert_equal(last_query.data.db_name, 'foobar') 29 | assert_equal(last_query.data.statement, 30 | 'INSERT INTO test2(x) VALUES("Bananas r good")') 31 | 32 | def test_with_delayed_statement(self): 33 | filename = 'testing/data/mysql-bin.delayed-event' 34 | parser = YBinlogP(filename) 35 | events = list(parser) 36 | assert_equal(len(events), 38) 37 | # Event has a timestamp way in the past relative to FDE 38 | assert_equal(events[30].time, datetime.datetime(2013, 07, 30, 10, 2, 37)) 39 | 40 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ybinlogp - a fast mysql binlog parsing utility 2 | ============================================== 3 | **ybinlogp** is a mysql utility for analyzing mysql binlogs. It provides a library, 4 | libybinlogp, which has a really terrible build system, a little tool documented 5 | below which uses this library, and a python-ctypes wrapper that exposes some 6 | critical functionality (namely, opening a binlog, reading from it, and handling 7 | query, xid, and rotate events). 8 | 9 | Usage 10 | ----- 11 | ybinlogp [options] binlog-file 12 | 13 | Options: 14 | 15 | * `-o OFFSET Find events after a given offset` 16 | * `-t TIME Find events after a given unix timestamp` 17 | * `-a NUMBER Print N events after the given one (accepts 'all')` 18 | * `-D DBNAME Filter out query statements not on database DBNAME` 19 | * `-q Be quieter (may be specified multiple times)` 20 | * `-h Show help` 21 | 22 | 23 | Why? 24 | ---- 25 | If you have a replicated MySQL instance, you're probably used to ocassionally seeing 26 | it freak out. **ybinlogp** lets you just put in a time or offset and see exactly what 27 | was going on around then. Compare this to the default mysql binlog parser, which uses 28 | the linked list feature in the binlogs and so is uselessly slow when dealing with anything 29 | past the first few events (and also doesn't have a time search feature; how often do 30 | you actually know what the offset of an event is?) 31 | 32 | Who? 33 | ---- 34 | **ybinlogp** was developed by some engineers at [Yelp](http://www.yelp.com) for use 35 | with their MySQL installation. The initial development was done by James Brown (); 36 | Evan Klitzke () worked on some bugfixes and a partially-complete Python API, and 37 | Eskil Olsen () has a branch that does some crazy stuff with Boost. 38 | 39 | Contributing 40 | ----------- 41 | It's Github... Fork away! 42 | 43 | License 44 | ------- 45 | This work is available under the ISC (OpenBSD) license. The full contents are available 46 | as license.txt 47 | -------------------------------------------------------------------------------- /testing/gen_test_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Generate test data for ybinlogp 3 | 4 | workdir=`mktemp -d mysqldXXXXXXXXXX -t` 5 | pidfile="$workdir/mysqld.pid" 6 | errfile="$workdir/mysqld.err" 7 | datadir="$workdir/data/" 8 | logdir="`pwd`/logs/" 9 | tmpdir="$workdir/tmp/" 10 | socket="$workdir/mysqld.sock" 11 | 12 | stop_mysql() { 13 | pid=`cat "$pidfile" 2>/dev/null` 14 | if [ -z "$pid" ] ; then 15 | return 16 | fi 17 | if kill -0 "$pid" ; then 18 | mysqladmin -S "$socket" shutdown 2>&1 > /dev/null 19 | fi 20 | } 21 | 22 | mysql_alive() { 23 | mysqladmin -S "$socket" ping 2>&1 > /dev/null 24 | return $? 25 | } 26 | 27 | do_at_exit() { 28 | if [ -d "$workdir" ] ; then 29 | rm -rf "$workdir" 30 | fi 31 | } 32 | 33 | mkdir "$datadir" 34 | mkdir -p "$logdir" 35 | mkdir "$tmpdir" 36 | 37 | trap 'do_at_exit' EXIT INT TERM 38 | 39 | echo "installing db" 40 | mysql_install_db --datadir="$datadir" --ldata="$datadir" \ 41 | 2>&1 > "$logdir/mysql_install.log" 42 | 43 | echo "starting mysqld on $socket" 44 | mysqld --pid-file="$pidfile" \ 45 | --log-error="$errfile" \ 46 | --log-warnings \ 47 | --datadir="$datadir" \ 48 | --server-id=1337 \ 49 | --skip-external-locking \ 50 | --log_bin="$logdir/mysql-bin.log" \ 51 | --socket="$socket" \ 52 | --skip-networking \ 53 | --innodb_file_per_table \ 54 | --tmpdir="$tmpdir" \ 55 | --innodb_buffer_pool_size=128M \ 56 | --innodb_fast_shutdown=2 \ 57 | --skip-innodb_checksums \ 58 | --sync-binlog=1 \ 59 | --max-allowed-packet=32M \ 60 | --skip-grant-tables 2>&1 > "$logdir/mysql_run.log" & 61 | 62 | for i in `seq 1 10` ; do 63 | if mysql_alive > /dev/null; then 64 | echo "alive" 65 | break 66 | fi 67 | echo -n . 68 | sleep 1 69 | done 70 | if ! mysql_alive ; then 71 | echo "failed to start" 72 | exit 1 73 | fi 74 | 75 | echo "writing test data." 76 | mysql -S "$socket" < testing/gen_test_data.sql 77 | 78 | # Start a shell to prevent exit 79 | echo "Working dir: $workdir" 80 | $SHELL 81 | 82 | echo "shutting down mysql" 83 | stop_mysql 84 | -------------------------------------------------------------------------------- /NEWS.md: -------------------------------------------------------------------------------- 1 | 0.6 2 | --- 3 | * Fixed a bug that would cause an EmptyEventError if ybinlogp received an event 4 | that was older than 1 hour 5 | * Misc code cleanup, repo restructuring and adding tests 6 | 7 | 0.5.8.5 8 | ------- 9 | * Apparently, I broke time-based search in the last release. This fixes it. 10 | 11 | 0.5.8.4 12 | ------- 13 | * More bugfixes when attempting to use ybinlogp to "tail" a log 14 | 15 | 0.5.8.3 16 | ------- 17 | * Minor bugfix in Python bindings 18 | 19 | 0.5.8.2 20 | ------ 21 | * Limit the number of retries in a row 22 | 23 | 0.5.8.1 24 | ------- 25 | * Minor bugfix 26 | 27 | 0.5.8 28 | ----- 29 | * Add some retry logic around reading the binlog 30 | 31 | 0.5.7 32 | ----- 33 | * Change Makefile ordering 34 | 35 | 0.5.6 36 | ---- 37 | * misc bugs 38 | 39 | 0.5.5 40 | ----- 41 | * 0.5.4 lost some commas, sorry 42 | 43 | 0.5.4 44 | ----- 45 | * Some code cleanup 46 | * Fixes a bug where the library might return a partial event 47 | * Improves the python example script 48 | * Adds a TODO file 49 | 50 | 0.5.3 51 | ----- 52 | * indentation bug 53 | 54 | 0.5.2 55 | ----- 56 | * Fix a memory leak. Thanks to Evan Klitzke for pointing it 57 | out. 58 | 59 | 0.5.1 60 | ----- 61 | * Remove a print statement that snuck into the python bindings 62 | 63 | 0.5.0 64 | ----- 65 | Big update! 66 | 67 | * Adds python bindings with ctypes! 68 | * Refactors into a library! 69 | * A new and differently-terrible Makefile! 70 | 71 | 0.3.1 72 | ----- 73 | * Bump NEWS.md 74 | * Remove debian packaging (maintaining internally only now) 75 | 76 | 0.3 77 | --- 78 | * Adds Server-ID to the match heuristics (might break if you reparent 79 | without doing a `FLUSH LOGS`, use `-S` to disable 80 | * Supports parsing the status variables in the binlog (which, as far as 81 | I can tell, `mysqlbinlog` doesn't do correctly 82 | * Lots of bugfixes 83 | 84 | 0.2 85 | --- 86 | * Fixes a bug in 0.1 which caused an infinite loop if a binlog ended with a 87 | `STOP_EVENT` instead of a `ROTATE_EVENT` 88 | * Added `-Q` mode to only print queries 89 | * Added `-D` option to limit query printing to specific databases 90 | * Added `-v` option to be more verbose 91 | 92 | 0.1 93 | --- 94 | * Initial release. Includes debian packaging. 95 | -------------------------------------------------------------------------------- /src/ybinlogp.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ybinlogp: A mysql binary log parser and query tool 3 | * 4 | * (C) 2010-2011 Yelp, Inc. 5 | * 6 | * This work is licensed under the ISC/OpenBSD License. The full 7 | * contents of that license can be found under license.txt 8 | */ 9 | 10 | #define _XOPEN_SOURCE 600 11 | #define _GNU_SOURCE 12 | 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | 21 | #include "debugs.h" 22 | #include "ybinlogp.h" 23 | 24 | void usage(void) { 25 | fprintf(stderr, "ybinlogp_test [options] binlog\n"); 26 | fprintf(stderr, "\n"); 27 | fprintf(stderr, "Options\n"); 28 | fprintf(stderr, "\t-h show this help\n"); 29 | fprintf(stderr, "\t-E do not enforce server-id checking\n"); 30 | fprintf(stderr, "\t-o OFFSET find the first event after the given offset\n"); 31 | fprintf(stderr, "\t-t TIME find the first event after the given time\n"); 32 | fprintf(stderr, "\t-a COUNT When used with one of the above, print COUNT items after the first one, default 2\n"); 33 | fprintf(stderr, "\t\t\t\tAccepts either an integer or the text 'all'\n"); 34 | fprintf(stderr, "\t-D DBNAME Filter query events that were not in DBNAME\n"); 35 | fprintf(stderr, "\t\t\t\tNote that this still shows transaction control events\n"); 36 | fprintf(stderr, "\t\t\t\tsince those do not have an associated database. Mea culpa.\n"); 37 | fprintf(stderr, "\t-q Be quieter\n"); 38 | } 39 | 40 | int main(int argc, char** argv) { 41 | int opt; 42 | int fd; 43 | struct ybp_binlog_parser* bp; 44 | struct ybp_event* evbuf; 45 | long starting_offset = -1; 46 | long starting_time = -1; 47 | int num_to_show = 2; 48 | int show_all = false; 49 | bool q_mode = false; 50 | bool esi = true; 51 | char* database_limit = NULL; 52 | while ((opt = getopt(argc, argv, "ho:t:a:D:qE")) != -1) { 53 | switch (opt) { 54 | case 'h': 55 | usage(); 56 | return 0; 57 | case 'o': /* Offset mode */ 58 | starting_offset = atoll(optarg); 59 | break; 60 | case 'E': 61 | esi = false; 62 | break; 63 | case 't': /* Time mode */ 64 | starting_time = atoll(optarg); 65 | break; 66 | case 'a': 67 | if (strncmp(optarg, "all", 3) == 0) { 68 | num_to_show = 2; 69 | show_all = 1; 70 | break; 71 | } 72 | num_to_show = atoi(optarg); 73 | if (num_to_show < 1) 74 | num_to_show = 1; 75 | break; 76 | case 'D': 77 | database_limit = strdup(optarg); 78 | break; 79 | case 'q': 80 | q_mode = true; 81 | break; 82 | case '?': 83 | fprintf(stderr, "Unknown argument %c\n", optopt); 84 | usage(); 85 | return 1; 86 | break; 87 | } 88 | } 89 | if (optind >= argc) { 90 | usage(); 91 | return 2; 92 | } 93 | if ((fd = open(argv[optind], O_RDONLY|O_LARGEFILE)) <= 0) { 94 | perror("Error opening file"); 95 | return 1; 96 | } 97 | if ((bp = ybp_get_binlog_parser(fd)) == NULL) { 98 | perror("init_binlog_parser"); 99 | return 1; 100 | } 101 | bp->enforce_server_id = esi; 102 | if ((evbuf = malloc(sizeof(struct ybp_event))) == NULL) { 103 | perror("malloc event"); 104 | return 1; 105 | } 106 | ybp_init_event(evbuf); 107 | if (starting_offset >= 0) { 108 | off64_t offset = ybp_nearest_offset(bp, starting_offset); 109 | if (offset == -2) { 110 | fprintf(stderr, "Unable to find anything after offset %ld\n", starting_offset); 111 | return 1; 112 | } 113 | else if (offset == -1) { 114 | perror("nearest_offset"); 115 | return 1; 116 | } 117 | else { 118 | ybp_rewind_bp(bp, offset); 119 | } 120 | } 121 | if (starting_time >= 0) { 122 | off64_t offset = ybp_nearest_time(bp, starting_time); 123 | if (offset == -2) { 124 | fprintf(stderr, "Unable to find anything after time %ld\n", starting_time); 125 | return 1; 126 | } 127 | else if (offset == -1) { 128 | perror("nearest_time"); 129 | return 1; 130 | } 131 | else { 132 | ybp_rewind_bp(bp, offset); 133 | } 134 | } 135 | int i = 0; 136 | while ((ybp_next_event(bp, evbuf) >= 0) && (show_all || i < num_to_show)) { 137 | if (q_mode) { 138 | if (evbuf->type_code == QUERY_EVENT) { 139 | struct ybp_query_event_safe* s = ybp_event_to_safe_qe(evbuf); 140 | if ((database_limit == NULL) || (strcmp(s->db_name, database_limit) == 0)) { 141 | printf("%d %s\n", evbuf->timestamp, s->statement); 142 | } 143 | ybp_dispose_safe_qe(s); 144 | } 145 | else if (evbuf->type_code == XID_EVENT) { 146 | struct ybp_xid_event* s = ybp_event_to_safe_xe(evbuf); 147 | printf("%d XID %llu\n", evbuf->timestamp, (long long unsigned)s->id); 148 | ybp_dispose_safe_xe(s); 149 | } 150 | } else { 151 | ybp_print_event(evbuf, bp, stdout, q_mode, false, database_limit); 152 | fprintf(stdout, "\n"); 153 | } 154 | ybp_reset_event(evbuf); 155 | i+=1; 156 | } 157 | ybp_dispose_event(evbuf); 158 | ybp_dispose_binlog_parser(bp); 159 | } 160 | 161 | /* vim: set sts=0 sw=4 ts=4 noexpandtab: */ 162 | -------------------------------------------------------------------------------- /src/ybinlogp-private.h: -------------------------------------------------------------------------------- 1 | /* 2 | * binlogp: A mysql binary log parser and query tool 3 | * 4 | * (C) 2010-2011 Yelp, Inc. 5 | * 6 | * This work is licensed under the ISC/OpenBSD License. The full 7 | * contents of that license can be found under license.txt 8 | */ 9 | 10 | #ifndef _YBINLOGP_PRIVATE_H_ 11 | #define _YBINLOGP_PRIVATE_H_ 12 | 13 | /******* various mappings ********/ 14 | static const char* ybpi_event_types[27] = { 15 | "UNKNOWN_EVENT", // 0 16 | "START_EVENT_V3", // 1 17 | "QUERY_EVENT", // 2 18 | "STOP_EVENT", // 3 19 | "ROTATE_EVENT", // 4 20 | "INTVAR_EVENT", // 5 21 | "LOAD_EVENT", // 6 22 | "SLAVE_EVENT", // 7 23 | "CREATE_FILE_EVENT", // 8 24 | "APPEND_BLOCK_EVENT", // 9 25 | "EXEC_LOAD_EVENT", // 10 26 | "DELETE_FILE_EVENT", // 11 27 | "NEW_LOAD_EVENT", // 12 28 | "RAND_EVENT", // 13 29 | "USER_VAR_EVENT", // 14 30 | "FORMAT_DESCRIPTION_EVENT", // 15 31 | "XID_EVENT", // 16 32 | "BEGIN_LOAD_QUERY_EVENT", // 17 33 | "EXECUTE_LOAD_QUERY_EVENT", // 18 34 | "TABLE_MAP_EVENT", // 19 35 | "PRE_GA_WRITE_ROWS_EVENT", // 20 36 | "PRE_GA_DELETE_ROWS_EVENT", // 21 37 | "WRITE_ROWS_EVENT", // 22 38 | "UPDATE_ROWS_EVENT", // 23 39 | "DELETE_ROWS_EVENT", // 24 40 | "INCIDENT_EVENT", // 25 41 | "HEARTBEAT_LOG_EVENT", // 26 42 | }; 43 | 44 | static const char* ybpi_variable_types[10] = { 45 | "Q_FLAGS2_CODE", // 0 46 | "Q_SQL_MODE_CODE", // 1 47 | "Q_CATALOG_CODE", // 2 48 | "Q_AUTO_INCREMENT", // 3 49 | "Q_CHARSET_CODE", // 4 50 | "Q_TIME_ZONE_CODE", // 5 51 | "Q_CATALOG_NZ_CODE", // 6 52 | "Q_LC_TIME_NAMES_CODE", // 7 53 | "Q_CHARSET_DATABASE_CODE", // 8 54 | "Q_TABLE_MAP_FOR_UPDATE_CODE", // 9 55 | }; 56 | 57 | static const char* ybpi_intvar_types[3] = { 58 | "", 59 | "LAST_INSERT_ID_EVENT", // 1 60 | "INSERT_ID_EVENT", // 2 61 | }; 62 | 63 | static const char* ybpi_flags[16] = { 64 | "LOG_EVENT_BINLOG_IN_USE", // 0x01 65 | "LOG_EVENT_FORCED_ROTATE", // 0x02 (deprecated) 66 | "LOG_EVENT_THREAD_SPECIFIC", // 0x04 67 | "LOG_EVENT_SUPPRESS_USE", // 0x08 68 | "LOG_EVENT_UPDATE_TABLE_MAP_VERSION", // 0x10 69 | "LOG_EVENT_ARTIFICIAL", // 0x20 70 | "LOG_EVENT_RELAY_LOG", // 0x40 71 | "", 72 | "", 73 | "", 74 | "", 75 | "", 76 | "", 77 | "", 78 | "", 79 | "", 80 | }; 81 | 82 | /* The mysterious FLAGS2 binlog code. 83 | * Seems to be a subset of mysql options. 84 | * A very small subset. 85 | */ 86 | static const char* ybpi_flags2[32] = { 87 | "", // 0x01 88 | "", // 0x02 89 | "", // 0x04 90 | "", // 0x08 91 | "", // 0x10 92 | "", // 0x20 93 | "", // 0x40 94 | "", // 0x80 95 | "", // 0x100 96 | "", // 0x200 97 | "", // 0x400 98 | "", // 0x800 99 | "", // 0x1000 100 | "", // 0x2000 101 | "OPTION_AUTO_IS_NULL", // 0x4000 102 | "", // 0x8000 103 | "", // 0x10000 104 | "", // 0x20000 105 | "", // 0x40000 106 | "OPTION_NOT_AUTOCOMMIT", // 0x80000 107 | "", // 0x100000 108 | "", // 0x200000 109 | "", // 0x400000 110 | "", // 0x800000 111 | "", // 0x1000000 112 | "", // 0x2000000 113 | "OPTION_NO_FOREIGN_KEY_CHECKS", // 0x4000000 114 | "OPTION_RELAXED_UNIQUE_CHECKS", // 0x8000000 115 | }; 116 | 117 | /* Map of the lengths of status var data. 118 | * -1 indicates variable (the first byte is a length byte) 119 | * -2 indicates variable + 1 (the first byte is a length byte that is 120 | * wrong) 121 | */ 122 | static int ybpi_status_var_data_len_by_type[10] = { 123 | 4, // 0 = Q_FLAGS2_CODE 124 | 8, // 1 = Q_SQL_MODE_CODE 125 | -2,// 2 = Q_CATALOG_CODE (length byte + string + NUL) 126 | 4, // 3 = Q_AUTO_INCREMENT (2 2-byte ints) 127 | 6, // 4 = Q_CHARSET_CODE (3 2-byte ints) 128 | -1,// 5 = Q_TIME_ZONE_CODE (length byte + string) 129 | -1,// 6 = Q_CATALOG_NZ_CODE (length byte + string) 130 | 2, // 7 = Q_LC_TIME_NAMES_COE 131 | 2, // 8 = Q_CHARSET_DATABASE_CODE 132 | 8, // 9 = Q_TABLE_MAP_FOR_UPDATE_COE 133 | }; 134 | 135 | enum ybpi_e_status_var_types { 136 | Q_FLAGS2_CODE=0, 137 | Q_SQL_MODE_CODE=1, 138 | Q_CATALOG_CODE=2, 139 | Q_AUTO_INCREMENT=3, 140 | Q_CHARSET_CODE=4, 141 | Q_TIME_ZONE_CODE=5, 142 | Q_CATALOG_NZ_CODE=6, 143 | Q_LC_TIME_NAMES_CODE=7, 144 | Q_CHARSET_DATABASE_CODE=8, 145 | Q_TABLE_MAP_FOR_UPDATE_CODE=9 146 | }; 147 | 148 | static const char* ybpi_status_var_types[10] = { 149 | "Q_FLAGS2_CODE", 150 | "Q_SQL_MODE_CODE", 151 | "Q_CATALOG_CODE", 152 | "Q_AUTO_INCREMENT", 153 | "Q_CHARSET_CODE", 154 | "Q_TIME_ZONE_CODE", 155 | "Q_CATALOG_NZ_CODE", 156 | "Q_LC_TIME_NAMES_CODE", 157 | "Q_CHARSET_DATABASE_CODE", 158 | "Q_TABLE_MAP_FOR_UPDATE_CODE" 159 | }; 160 | 161 | 162 | /** Macros to do things with event data 163 | * 164 | * These macros are horribly unsafe. Only use them of you know EXACTLY what 165 | * you are doing. Otherwise, use ybp_query_event_safe_data and 166 | * ybp_event_to_qes 167 | **/ 168 | #define query_event_statement(e) (e->data + sizeof(struct ybp_query_event) + ((struct ybp_query_event*)e->data)->status_var_len + ((struct ybp_query_event*)e->data)->db_name_len + 1) 169 | #define query_event_status_vars(e) (e->data + sizeof(struct ybp_query_event)) 170 | #define query_event_statement_len(e) (e->length - EVENT_HEADER_SIZE - sizeof(struct ybp_query_event) - ((struct ybp_query_event*)e->data)->status_var_len - ((struct ybp_query_event*)e->data)->db_name_len - 1) 171 | #define query_event_db_name(e) (e->data + sizeof(struct ybp_query_event) + ((struct ybp_query_event*)e->data)->status_var_len) 172 | #define rotate_event_file_name(e) (e->data + 8) 173 | #define rotate_event_file_name_len(e) ((size_t)(e->length - EVENT_HEADER_SIZE - sizeof(uint64_t))) 174 | 175 | #endif /* _YBINLOGP_PRIVATE_H */ 176 | -------------------------------------------------------------------------------- /src/ybinlogp.h: -------------------------------------------------------------------------------- 1 | /* 2 | * binlogp: A mysql binary log parser and query tool 3 | * 4 | * (C) 2010-2011 Yelp, Inc. 5 | * 6 | * This work is licensed under the ISC/OpenBSD License. The full 7 | * contents of that license can be found under license.txt 8 | */ 9 | 10 | #ifndef _YBINLOGP_H_ 11 | #define _YBINLOGP_H_ 12 | 13 | #include 14 | #include 15 | #include 16 | 17 | #define BINLOG_VERSION 4 18 | 19 | #define EVENT_HEADER_SIZE 19 /* we tack on extra stuff at the end */ 20 | 21 | struct ybp_binlog_parser { 22 | int fd; 23 | off_t file_size; 24 | ssize_t offset; 25 | bool enforce_server_id; 26 | bool has_read_fde; 27 | uint32_t slave_server_id; 28 | uint32_t master_server_id; 29 | time_t min_timestamp; 30 | time_t max_timestamp; 31 | }; 32 | 33 | enum ybp_event_types { 34 | UNKNOWN_EVENT=0, 35 | START_EVENT_V3=1, 36 | QUERY_EVENT=2, 37 | STOP_EVENT=3, 38 | ROTATE_EVENT=4, 39 | INTVAR_EVENT=5, 40 | LOAD_EVENT=6, 41 | SLAVE_EVENT=7, 42 | CREATE_FILE_EVENT=8, 43 | APPEND_BLOCK_EVENT=9, 44 | EXEC_LOAD_EVENT=10, 45 | DELETE_FILE_EVENT=11, 46 | NEW_LOAD_EVENT=12, 47 | RAND_EVENT=13, 48 | USER_VAR_EVENT=14, 49 | FORMAT_DESCRIPTION_EVENT=15, 50 | XID_EVENT=16, 51 | BEGIN_LOAD_QUERY_EVENT=17, 52 | EXECUTE_LOAD_QUERY_EVENT=18, 53 | TABLE_MAP_EVENT=19, 54 | PRE_GA_WRITE_ROWS_EVENT=20, 55 | PRE_GA_DELETE_ROWS_EVENT=21, 56 | WRITE_ROWS_EVENT=22, 57 | UPDATE_ROWS_EVENT=23, 58 | DELETE_ROWS_EVENT=24, 59 | INCIDENT_EVENT=25, 60 | HEARTBEAT_LOG_EVENT=26 61 | }; 62 | 63 | #pragma pack(push) 64 | #pragma pack(1) /* force byte alignment */ 65 | struct ybp_event { 66 | uint32_t timestamp; 67 | uint8_t type_code; 68 | uint32_t server_id; 69 | uint32_t length; 70 | uint32_t next_position; 71 | uint16_t flags; 72 | char* data; 73 | off64_t offset; 74 | }; 75 | 76 | struct ybp_format_description_event { 77 | uint16_t format_version; /* ought to be 4 */ 78 | char server_version[50]; 79 | uint32_t timestamp; 80 | uint8_t header_len; 81 | // random data 82 | }; 83 | 84 | struct ybp_query_event { 85 | uint32_t thread_id; 86 | uint32_t query_time; 87 | uint8_t db_name_len; 88 | uint16_t error_code; 89 | uint16_t status_var_len; 90 | // status variables (status_var_len) 91 | // database name (db_name_len + 1, NUL) 92 | // statement (the rest, not NUL) 93 | }; 94 | 95 | 96 | struct ybp_rand_event { 97 | uint64_t seed_1; 98 | uint64_t seed_2; 99 | }; 100 | 101 | struct ybp_xid_event { 102 | uint64_t id; 103 | }; 104 | 105 | struct ybp_intvar_event { 106 | uint8_t type; 107 | uint64_t value; 108 | }; 109 | 110 | struct ybp_rotate_event { 111 | uint64_t next_position; 112 | // file name of the next file (not NUL) 113 | }; 114 | #pragma pack(pop) 115 | 116 | /** 117 | * Use this to safely access the data portions of a query event. Note that 118 | * this involves copying things, so it's pretty slow. 119 | **/ 120 | struct ybp_query_event_safe { 121 | uint32_t thread_id; 122 | uint32_t query_time; 123 | uint8_t db_name_len; 124 | uint16_t error_code; 125 | uint16_t status_var_len; 126 | char* statement; 127 | size_t statement_len; 128 | char* status_var; 129 | char* db_name; 130 | }; 131 | 132 | struct ybp_rotate_event_safe { 133 | uint64_t next_position; 134 | char* file_name; 135 | size_t file_name_len; 136 | }; 137 | 138 | /** 139 | * Initialize a ybp_binlog_parser. Returns 0 on success, non-zero otherwise. 140 | * 141 | * Arguments: 142 | * fd: A file descriptor open in reading mode to a binlog file 143 | **/ 144 | struct ybp_binlog_parser* ybp_get_binlog_parser(int); 145 | 146 | /** 147 | * Update the ybp_binlog_parser. 148 | * 149 | * Call this any time you expect that the underlying file might've changed, 150 | * and want to be able to see those changes. 151 | **/ 152 | void ybp_update_bp(struct ybp_binlog_parser*); 153 | 154 | /** 155 | * Get the offset in the bp 156 | **/ 157 | off64_t ybp_tell_bp(struct ybp_binlog_parser*); 158 | 159 | /** 160 | * Rewind the ybp_binlog_parser to the given offset 161 | * 162 | * Call this any time you expect that the underlying file might've changed, 163 | * and want to be able to see those changes. 164 | **/ 165 | void ybp_rewind_bp(struct ybp_binlog_parser*, off_t); 166 | 167 | /** 168 | * Clean up a ybp_binlog_parser 169 | **/ 170 | void ybp_dispose_binlog_parser(struct ybp_binlog_parser*); 171 | 172 | /** 173 | * Advance a ybp_binlog_parser structure to the next event. 174 | * 175 | * Arguments: 176 | * p: A binlog_parser 177 | * evbuf: An event buffer (inited with ybp_init_event, or resetted with 178 | * ybp_reset_event) which will be written to 179 | * Returns 0 if the current event is the last event, <0 on error, and >0 180 | * otherwise. 181 | */ 182 | int ybp_next_event(struct ybp_binlog_parser* restrict, struct ybp_event* restrict); 183 | 184 | /** 185 | * Initialize an event object. Event objects must live on the heap 186 | * and must be destroyed with dispose_event(). 187 | * 188 | * Just sets everything to 0 for now. 189 | **/ 190 | void ybp_init_event(struct ybp_event*); 191 | 192 | /** 193 | * Get a clean event object. Like ybp_init_event, but it does the malloc for 194 | * you. 195 | **/ 196 | struct ybp_event* ybp_get_event(void); 197 | 198 | /** 199 | * Reset an event object, making it re-fillable 200 | * 201 | * Deletes the extra data and re-inits the object 202 | */ 203 | void ybp_reset_event(struct ybp_event*); 204 | 205 | /** 206 | * Destroy an event object and any associated data 207 | **/ 208 | void ybp_dispose_event(struct ybp_event*); 209 | 210 | /** 211 | * Copy an event and attached data from source to dest. Both must already 212 | * exist and have been init'd 213 | **/ 214 | int ybp_copy_event(struct ybp_event* dest, struct ybp_event* source); 215 | 216 | /** 217 | * Print event e to the given iostream. 218 | * 219 | * if the stream is null, print to stdout. 220 | **/ 221 | void ybp_print_event_simple(struct ybp_event* restrict, struct ybp_binlog_parser* restrict, FILE* restrict); 222 | 223 | /** 224 | * Print event e to the given iostream 225 | * 226 | * Args: 227 | * event 228 | * binlog parser 229 | * iostream 230 | * q_mode 231 | * v_mode 232 | * database restriction 233 | **/ 234 | void ybp_print_event(struct ybp_event* restrict, struct ybp_binlog_parser* restrict, FILE* restrict, bool, bool, char*); 235 | 236 | /** 237 | * Get the string type of an event 238 | **/ 239 | const char* ybp_event_type(struct ybp_event*); 240 | 241 | /** 242 | * Interpret an event as an FDE. Returns either a pointer to the FDE, or 243 | * NULL. 244 | * 245 | * WARNING: The pointer returned will share memory space with the evbuf 246 | * argument passed in. 247 | */ 248 | struct ybp_format_description_event* ybp_event_as_fde(struct ybp_event* restrict); 249 | 250 | /** 251 | * Get a safe-to-mess-with query event from an event 252 | **/ 253 | struct ybp_query_event_safe* ybp_event_to_safe_qe(struct ybp_event* restrict); 254 | 255 | /** 256 | * Dispose a structure returned from ybp_event_to_safe_qe 257 | **/ 258 | void ybp_dispose_safe_qe(struct ybp_query_event_safe*); 259 | 260 | /** 261 | * Get a safe-to-mess-with rotate event from an event 262 | **/ 263 | struct ybp_rotate_event_safe* ybp_event_to_safe_re(struct ybp_event* restrict); 264 | 265 | /** 266 | * Dispose a structure returned from ybp_event_to_safe_qe 267 | **/ 268 | void ybp_dispose_safe_re(struct ybp_rotate_event_safe*); 269 | 270 | /** 271 | * Get a safe-to-mess-with xid event from an event 272 | **/ 273 | struct ybp_xid_event* ybp_event_to_safe_xe(struct ybp_event* restrict); 274 | 275 | /** 276 | * Dispose a structure returned from ybp_event_to_safe_xe 277 | **/ 278 | void ybp_dispose_safe_xe(struct ybp_xid_event*); 279 | 280 | /** 281 | * Search tools! 282 | **/ 283 | off64_t ybp_nearest_offset(struct ybp_binlog_parser* restrict, off64_t); 284 | 285 | off64_t ybp_nearest_time(struct ybp_binlog_parser* restrict, time_t target); 286 | 287 | /* vim: set sts=0 sw=4 ts=4 noexpandtab: */ 288 | 289 | #endif /* _YBINLOGP_H_ */ 290 | -------------------------------------------------------------------------------- /src/ybinlogp/parser.py: -------------------------------------------------------------------------------- 1 | """ 2 | ybinlogp: A mysql binary log parser and query tool 3 | 4 | (C) 2010-2011 Yelp, Inc. 5 | 6 | This work is licensed under the ISC/OpenBSD License. The full 7 | contents of that license can be found under license.txt 8 | """ 9 | 10 | import ctypes 11 | import datetime 12 | import errno 13 | import logging 14 | import time 15 | 16 | 17 | log = logging.getLogger('ybinlogp') 18 | 19 | library = ctypes.CDLL("libybinlogp.so.1", use_errno=True) 20 | 21 | 22 | class EventStruct(ctypes.Structure): 23 | """Internal data structure for Events""" 24 | _fields_ = [("timestamp", ctypes.c_uint32), 25 | ("type_code", ctypes.c_uint8), 26 | ("server_id", ctypes.c_uint32), 27 | ("length", ctypes.c_uint32), 28 | ("next_position", ctypes.c_uint32), 29 | ("flags", ctypes.c_uint16), 30 | ("data", ctypes.c_void_p), 31 | ("offset", ctypes.c_uint64)] 32 | 33 | _pack_ = 1 34 | 35 | class Event(object): 36 | """User-facing data structure for Events""" 37 | __slots__ = 'event_type', 'offset', 'time', 'data' 38 | 39 | def __init__(self, event_type, offset, timestamp): 40 | self.event_type = event_type 41 | self.offset = offset 42 | self.time = datetime.datetime.fromtimestamp(timestamp) 43 | self.data = None 44 | 45 | def __str__(self): 46 | data = str(self.data) if self.data else "" 47 | return "%s at %s: %s" % (self.event_type, self.time, data) 48 | 49 | class QueryEventStruct(ctypes.Structure): 50 | """Internal data structure for query events""" 51 | _fields_ = [("thread_id", ctypes.c_uint32), 52 | ("query_time", ctypes.c_uint32), 53 | ("db_name_len", ctypes.c_uint8), 54 | ("error_code", ctypes.c_uint16), 55 | ("status_var_len", ctypes.c_uint16), 56 | ("statement", ctypes.c_char_p), 57 | ("statement_len", ctypes.c_size_t), 58 | ("status_var", ctypes.c_char_p), 59 | ("db_name", ctypes.c_char_p)] 60 | 61 | class QueryEvent(object): 62 | """User-facing data structure for query events""" 63 | __slots__ = 'db_name', 'statement', 'query_time' 64 | 65 | def __init__(self, db_name, statement, query_time): 66 | self.db_name = db_name 67 | self.statement = statement 68 | self.query_time = query_time 69 | 70 | def __str__(self): 71 | return "Query(db='%s', statement='%s', query_time=%d)" % ( 72 | self.db_name, self.statement, self.query_time) 73 | 74 | class RotateEventStruct(ctypes.Structure): 75 | """Internal data structure for rotatation events""" 76 | _fields_ = [("next_position", ctypes.c_uint64), 77 | ("file_name", ctypes.c_char_p), 78 | ("file_name_len", ctypes.c_size_t)] 79 | 80 | class RotateEvent(object): 81 | """User-facing data structure for rotatation events""" 82 | __slots__ = 'next_position', 'file_name' 83 | 84 | def __init__(self, next_position, file_name): 85 | self.next_position = next_position 86 | self.file_name = file_name 87 | 88 | def __str__(self): 89 | return "Rotate(next file=%s, next_position=%d)" % ( 90 | self.file_name, self.next_position) 91 | 92 | class XIDEventStruct(ctypes.Structure): 93 | """Internal data structure for XID events""" 94 | _fields_ = [("id", ctypes.c_uint64)] 95 | 96 | class XIDEvent(object): 97 | """User-facing data structure for XID events, which seem to all represent COMMITs""" 98 | def __init__(self, xid): 99 | self.xid = xid 100 | 101 | def __str__(self): 102 | return "COMMIT xid %d" % self.xid 103 | 104 | _init_bp = library.ybp_get_binlog_parser 105 | _init_bp.argtypes = [ctypes.c_int] 106 | _init_bp.restype = ctypes.c_void_p 107 | 108 | _get_event = library.ybp_get_event 109 | _get_event.argtypes = [] 110 | _get_event.restype = ctypes.POINTER(EventStruct) 111 | 112 | _next_event = library.ybp_next_event 113 | _next_event.argtypes = [ctypes.c_void_p, ctypes.POINTER(EventStruct)] 114 | _next_event.restype = ctypes.c_int 115 | 116 | _reset_event = library.ybp_reset_event 117 | _reset_event.argtypes = [ctypes.POINTER(EventStruct)] 118 | _reset_event.restype = None 119 | 120 | _dispose_event = library.ybp_dispose_event 121 | _dispose_event.argtypes = [ctypes.POINTER(EventStruct)] 122 | _dispose_event.restype = None 123 | 124 | _dispose_bp = library.ybp_dispose_binlog_parser 125 | _dispose_bp.argtypes = [ctypes.c_void_p] 126 | _dispose_bp.restype = None 127 | 128 | _update_bp = library.ybp_update_bp 129 | _update_bp.argtypes = [ctypes.c_void_p] 130 | _update_bp.restype = None 131 | 132 | _event_type = library.ybp_event_type 133 | _event_type.argtypes = [ctypes.POINTER(EventStruct)] 134 | _event_type.restype = ctypes.c_char_p 135 | 136 | _event_to_safe_qe = library.ybp_event_to_safe_qe 137 | _event_to_safe_qe.argtypes = [ctypes.POINTER(EventStruct)] 138 | _event_to_safe_qe.restype = ctypes.POINTER(QueryEventStruct) 139 | 140 | _dispose_safe_qe = library.ybp_dispose_safe_qe 141 | _dispose_safe_qe.argtype = [ctypes.POINTER(QueryEventStruct)] 142 | _dispose_safe_qe.qestype = None 143 | 144 | _event_to_safe_re = library.ybp_event_to_safe_re 145 | _event_to_safe_re.argtypes = [ctypes.POINTER(EventStruct)] 146 | _event_to_safe_re.restype = ctypes.POINTER(RotateEventStruct) 147 | 148 | _dispose_safe_re = library.ybp_dispose_safe_re 149 | _dispose_safe_re.argtype = [ctypes.POINTER(RotateEventStruct)] 150 | _dispose_safe_re.restype = None 151 | 152 | _event_to_safe_xe = library.ybp_event_to_safe_xe 153 | _event_to_safe_xe.argtypes = [ctypes.POINTER(EventStruct)] 154 | _event_to_safe_xe.restype = ctypes.POINTER(XIDEventStruct) 155 | 156 | _dispose_safe_xe = library.ybp_dispose_safe_xe 157 | _dispose_safe_xe.argtype = [ctypes.POINTER(XIDEventStruct)] 158 | _dispose_safe_xe.restype = None 159 | 160 | # no c_off in ctypes, using c_longlong instead 161 | _rewind_bp = library.ybp_rewind_bp 162 | _rewind_bp.argtypes = [ctypes.c_void_p, ctypes.c_longlong] 163 | _rewind_bp.restype = None 164 | 165 | _tell_bp = library.ybp_tell_bp 166 | _tell_bp.argtypes = [ctypes.c_void_p] 167 | _tell_bp.restype = ctypes.c_longlong 168 | 169 | _nearest_offset = library.ybp_nearest_offset 170 | _nearest_offset.argtypes = [ctypes.c_void_p, ctypes.c_longlong] 171 | _nearest_offset.restype = ctypes.c_longlong 172 | 173 | _nearest_time = library.ybp_nearest_time 174 | _nearest_time.argtypes = [ctypes.c_void_p, ctypes.c_long] 175 | _nearest_time.restype = ctypes.c_longlong 176 | 177 | class YBinlogPError(Exception): 178 | pass 179 | 180 | class YBinlogPSysError(YBinlogPError): 181 | def __init__(self, errno): 182 | self.errno = errno 183 | 184 | def __repr__(self): 185 | return "NextEventError(%s)" % errno.errorcode.get(self.errno, "Unknown") 186 | 187 | def __str__(self): 188 | return repr(self) 189 | 190 | class NextEventError(YBinlogPSysError): 191 | pass 192 | 193 | class NoEventsAfterTime(YBinlogPError): 194 | pass 195 | 196 | class NoEventsAfterOffset(YBinlogPError): 197 | pass 198 | 199 | class EmptyEventError(YBinlogPError): 200 | pass 201 | 202 | 203 | class EventType(object): 204 | """Enumeration of event types.""" 205 | 206 | rotate = "ROTATE_EVENT" 207 | query = "QUERY_EVENT" 208 | xid = "XID_EVENT" 209 | 210 | 211 | def build_event(event_buffer): 212 | """Create an :class:`Event` object from the mysql event. 213 | 214 | :param event_buffer: a mysql event buffer 215 | :returns: :class:`Event` for the event 216 | :raises: EmptyEventError 217 | """ 218 | event_type = _event_type(event_buffer) 219 | base_event = Event(event_type, 220 | event_buffer.contents.offset, 221 | event_buffer.contents.timestamp) 222 | 223 | if event_buffer.contents.data is None: 224 | raise EmptyEventError() 225 | 226 | if event_type == EventType.query: 227 | query_event = _event_to_safe_qe(event_buffer) 228 | base_event.data = QueryEvent(query_event.contents.db_name, 229 | query_event.contents.statement, 230 | query_event.contents.query_time) 231 | _dispose_safe_qe(query_event) 232 | 233 | if event_type == EventType.rotate: 234 | rotate_event = _event_to_safe_re(event_buffer) 235 | base_event.data = RotateEvent(rotate_event.contents.next_position, 236 | rotate_event.contents.file_name) 237 | _dispose_safe_re(rotate_event) 238 | 239 | if event_type == EventType.xid: 240 | xid_event = _event_to_safe_xe(event_buffer) 241 | base_event.data = XIDEvent(xid_event.contents.id) 242 | _dispose_safe_xe(xid_event) 243 | 244 | return base_event 245 | 246 | 247 | class YBinlogP(object): 248 | """Python interface to ybinlogp, the fast mysql binlog parser. 249 | 250 | Example usage: 251 | 252 | .. code-block:: python 253 | 254 | bp = YBinlogP('/path/to/binlog/file') 255 | for query in bp: 256 | if event.event_type == "QUERY_EVENT": 257 | print event.data.statement 258 | bp.clean_up() 259 | """ 260 | 261 | def __init__(self, filename, always_update=False, max_retries=3, sleep_interval=0.1): 262 | """ 263 | :param filename: filename of a mysql binary log 264 | :type filename: string 265 | :param always_update: if True stat the binlog file before doing anything 266 | interesting 267 | :type always_update: boolean 268 | :param max_retries: number of retries to perform on a EmptyEventError 269 | :type max_retries: int 270 | :param sleep_interval: seconds to sleep between retries 271 | :type sleep_interval: float 272 | """ 273 | self.filename = filename 274 | self._file = open(self.filename, 'r') 275 | self.binlog_parser_handle = _init_bp(self._file.fileno()) 276 | self.event_buffer = _get_event() 277 | self.always_update = always_update 278 | self.max_retries = max_retries 279 | self.sleep_interval = sleep_interval 280 | 281 | def _get_next_event(self): 282 | _reset_event(self.event_buffer) 283 | last = _next_event(self.binlog_parser_handle, self.event_buffer) 284 | if last < 0: 285 | raise NextEventError(ctypes.get_errno()) 286 | return build_event(self.event_buffer), last == 0 287 | 288 | def close(self): 289 | """Clean up some things that are allocated in C-land. Attempting to 290 | use this object after calling this method will break. 291 | """ 292 | # TODO: should this be a __del__? 293 | _dispose_bp(self.binlog_parser_handle) 294 | self.binlog_parser_handle = None 295 | _dispose_event(self.event_buffer) 296 | self.event_buffer = None 297 | self._file.close() 298 | 299 | clean_up = close 300 | 301 | def tell(self): 302 | """Return the current position of the binlog parser. 303 | 304 | :return: a tuple of binlog filename, offset 305 | :rtype: tuple 306 | """ 307 | return self.filename, _tell_bp(self.binlog_parser_handle) 308 | 309 | def update(self): 310 | """Update the binlog parser. This just re-stats the underlying file descriptor. 311 | Call this if you have reason to believe that the underlying file has changed size 312 | (or set always_update to be true on the Python wrapper object).""" 313 | _update_bp(self.binlog_parser_handle) 314 | 315 | def __iter__(self): 316 | """Return an iteration over the events in the binlog. 317 | :raises: NextEventError, EmptyEventError 318 | """ 319 | last = False 320 | current_offset = -1 321 | retries = 0 322 | while not last: 323 | if self.always_update: 324 | self.update() 325 | 326 | try: 327 | event, last = self._get_next_event() 328 | current_offset = event.offset 329 | yield event 330 | except EmptyEventError, e: 331 | if retries >= self.max_retries: 332 | raise 333 | self.handle_empty_event(e, current_offset) 334 | retries += 1 335 | except NextEventError, e: 336 | if e.errno == 0: 337 | return 338 | else: 339 | raise 340 | 341 | def handle_empty_event(self, exc, current_offset): 342 | """If the empty event is at the start of a file, update and sleep, 343 | otherwise return to the previous good offset and try again. 344 | """ 345 | if current_offset == -1: 346 | log.error("Got an empty offset at the beginning, re-statting " 347 | "and retrying in %fs", self.sleep_interval) 348 | time.sleep(self.sleep_interval) 349 | self.update() 350 | return 351 | 352 | log.error("Got an empty event, retrying at offset %d in %fs", 353 | current_offset, self.sleep_interval) 354 | time.sleep(self.sleep_interval) 355 | self.seek(current_offset) 356 | 357 | def seek(self, offset): 358 | """Seek the binlog parser pointer to offset. 359 | 360 | :param offset: offset within the binlog to move to 361 | :type offset: int 362 | """ 363 | _rewind_bp(self.binlog_parser_handle, offset) 364 | 365 | rewind = seek 366 | """Deprecated, renamed to :func:`seek`.""" 367 | 368 | def first_offset_after_time(self, t): 369 | """Find the first offset after the given unix timestamp. Usage: 370 | 371 | bp = YBinlogP('/path/to/binlog') 372 | offset = bp.first_offset_after_time(1293868800) # jan 1 2011 373 | bp.rewind(offset) 374 | for record in bp: 375 | # ... 376 | """ 377 | offset = _nearest_time(self.binlog_parser_handle, t) 378 | if offset == -1: 379 | raise NextEventError(ctypes.get_errno()) 380 | elif offset == -2: 381 | raise NoEventsAfterTime() 382 | else: 383 | return offset 384 | 385 | def first_offset_after_offset(self, t): 386 | """Find the first valid offset after the given offset. Usage: 387 | 388 | bp = YBinlogP('/path/to/binlog') 389 | offset = bp.first_offset_after_offset(1048576) # skip the first 1 MB 390 | bp.rewind(offset) 391 | for record in bp: 392 | # ... 393 | """ 394 | offset = _nearest_offset(self.binlog_parser_handle, t) 395 | if offset == -1: 396 | raise NextEventError(ctypes.get_errno()) 397 | elif offset == -2: 398 | raise NoEventsAfterOffset() 399 | else: 400 | return offset 401 | 402 | # vim: set noexpandtab ts=4 sw=4: 403 | -------------------------------------------------------------------------------- /src/libybinlogp.c: -------------------------------------------------------------------------------- 1 | /* 2 | * ybinlogp: A mysql binary log parser and query tool 3 | * 4 | * (C) 2010-2011 Yelp, Inc. 5 | * 6 | * This work is licensed under the ISC/OpenBSD License. The full 7 | * contents of that license can be found under license.txt 8 | */ 9 | 10 | /* Conventions used in this file: 11 | * 12 | * Functions starting with ybp_ will be in the .h file and will be exported 13 | * Functions starting with ybpi_ are internal-only and should be static 14 | */ 15 | 16 | #define _XOPEN_SOURCE 700 17 | #define _GNU_SOURCE 18 | 19 | #include 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | 30 | #include "debugs.h" 31 | #include "ybinlogp.h" 32 | 33 | /******* binlog parameters ********/ 34 | #define MIN_TYPE_CODE 0 35 | #define MAX_TYPE_CODE 27 36 | #define MIN_EVENT_LENGTH 19 37 | #define MAX_EVENT_LENGTH 16*1048576 /* Max statement len is generally 16MB */ 38 | #define MAX_SERVER_ID 4294967295 /* 0 <= server_id <= 2**32 */ 39 | #define TIMESTAMP_FUDGE_FACTOR 3600 /* seconds */ 40 | 41 | /******* more defines ********/ 42 | #define MAX_RETRIES 16*1048576 /* how many bytes to seek ahead looking for a record */ 43 | 44 | #define GET_BIT(x,bit) (unsigned char)(!!(x & 1 << (bit-1))) 45 | 46 | #define min(x,y) (((x) < (y)) ? (x) : (y)) 47 | 48 | /* Pulls in a bunch of strings and things that I don't really want in this 49 | * file, but are only to be used here. 50 | */ 51 | #include "ybinlogp-private.h" 52 | 53 | /******* predeclarations of ybpi functions *******/ 54 | static int ybpi_read_fde(struct ybp_binlog_parser* restrict); 55 | static int ybpi_read_event(struct ybp_binlog_parser* restrict, off_t, struct ybp_event* restrict); 56 | static bool ybpi_check_event(struct ybp_event*, struct ybp_binlog_parser*); 57 | static off64_t ybpi_next_after(struct ybp_event* restrict); 58 | static off64_t ybpi_nearest_offset(struct ybp_binlog_parser* restrict, off64_t, struct ybp_event* restrict, int); 59 | 60 | /******** implementation begins here ********/ 61 | 62 | struct ybp_binlog_parser* ybp_get_binlog_parser(int fd) 63 | { 64 | struct ybp_binlog_parser* result; 65 | if ((result = malloc(sizeof(struct ybp_binlog_parser))) == NULL) { 66 | return NULL; 67 | } 68 | result->fd = fd; 69 | result->offset = 4; 70 | result->enforce_server_id = false; 71 | result->slave_server_id = 0; 72 | result->master_server_id = 0; 73 | result->min_timestamp = 0; 74 | result->max_timestamp = time(NULL) + TIMESTAMP_FUDGE_FACTOR; 75 | result->has_read_fde = false; 76 | ybp_update_bp(result); 77 | ybpi_read_fde(result); 78 | return result; 79 | } 80 | 81 | void ybp_rewind_bp(struct ybp_binlog_parser* p, off_t offset) 82 | { 83 | p->offset = offset; 84 | } 85 | 86 | off64_t ybp_tell_bp(struct ybp_binlog_parser* p) 87 | { 88 | return p->offset; 89 | } 90 | 91 | void ybp_update_bp(struct ybp_binlog_parser* p) 92 | { 93 | struct stat stbuf; 94 | fstat(p->fd, &stbuf); 95 | p->file_size = stbuf.st_size; 96 | } 97 | 98 | void ybp_dispose_binlog_parser(struct ybp_binlog_parser* p) 99 | { 100 | if (p != NULL) 101 | free(p); 102 | } 103 | 104 | void ybp_init_event(struct ybp_event* evbuf) 105 | { 106 | memset(evbuf, 0, sizeof(struct ybp_event)); 107 | } 108 | 109 | struct ybp_event* ybp_get_event(void) 110 | { 111 | struct ybp_event* event = malloc(sizeof(struct ybp_event)); 112 | Dprintf("Creating event at 0x%p\n", (void*)event); 113 | ybp_init_event(event); 114 | return event; 115 | } 116 | 117 | void ybp_dispose_event(struct ybp_event* evbuf) 118 | { 119 | Dprintf("About to dispose_event 0x%p\n", (void*)evbuf); 120 | if (evbuf->data != NULL) { 121 | Dprintf("Freeing data at 0x%p\n", (void*)evbuf->data); 122 | free(evbuf->data); 123 | evbuf->data = NULL; 124 | } 125 | free(evbuf); 126 | } 127 | 128 | int ybp_copy_event(struct ybp_event *dest, struct ybp_event *source) 129 | { 130 | Dprintf("About to copy 0x%p to 0x%p\n", (void*)source, (void*)dest); 131 | memmove(dest, source, sizeof(struct ybp_event)); 132 | if (source->data != 0) { 133 | Dprintf("mallocing %d bytes for the target\n", source->length - EVENT_HEADER_SIZE); 134 | if ((dest->data = malloc(source->length - EVENT_HEADER_SIZE)) == NULL) { 135 | perror("malloc:"); 136 | return -1; 137 | } 138 | Dprintf("copying extra data from 0x%p to 0x%p\n", source->data, dest->data); 139 | memmove(dest->data, source->data, source->length - EVENT_HEADER_SIZE); 140 | } 141 | return 0; 142 | } 143 | 144 | void ybp_reset_event(struct ybp_event* evbuf) 145 | { 146 | Dprintf("Resetting event\n"); 147 | if (evbuf->data != 0) { 148 | Dprintf("Freeing data at 0x%p\n", (void*)evbuf->data); 149 | free(evbuf->data); 150 | evbuf->data = 0; 151 | } 152 | ybp_init_event(evbuf); 153 | } 154 | 155 | /** 156 | * check if an event "looks" valid. returns true if it does and false if it 157 | * doesn't 158 | **/ 159 | static bool ybpi_check_event(struct ybp_event* e, struct ybp_binlog_parser* p) 160 | { 161 | Dprintf("e->type_code = %d, e->length=%zd, e->timestamp=%d\n", 162 | e->type_code, 163 | e->length, 164 | e->timestamp); 165 | Dprintf("p->min_timestamp = %d, p->max_timestamp = %d\n", 166 | p->min_timestamp, 167 | p->max_timestamp); 168 | 169 | return ((!p->enforce_server_id || 170 | (e->server_id == p->slave_server_id) || 171 | (e->server_id == p->master_server_id)) && 172 | e->type_code > MIN_TYPE_CODE && 173 | e->type_code < MAX_TYPE_CODE && 174 | e->length >= MIN_EVENT_LENGTH && 175 | e->length < MAX_EVENT_LENGTH); 176 | } 177 | 178 | /** 179 | * Find the offset of the next event after the one passed in. 180 | * Uses the built-in event chaining. 181 | **/ 182 | static off64_t ybpi_next_after(struct ybp_event *evbuf) { 183 | /* Can't actually use next_position, because it will vary between 184 | * messages that are from master and messages that are from slave. 185 | * Usually, only the FDE is from the slave. But, still... 186 | */ 187 | return evbuf->offset + evbuf->length; 188 | } 189 | 190 | /* 191 | * Get the first event after starting_offset in fd 192 | * 193 | * If evbuf is non-null, copy it into there 194 | */ 195 | off64_t ybp_nearest_offset(struct ybp_binlog_parser* p, off64_t starting_offset) 196 | { 197 | return ybpi_nearest_offset(p, starting_offset, NULL, 1); 198 | } 199 | 200 | off64_t ybpi_nearest_offset(struct ybp_binlog_parser* restrict p, off64_t starting_offset, struct ybp_event* restrict outbuf, int direction) 201 | { 202 | unsigned int num_increments = 0; 203 | off64_t offset; 204 | struct ybp_event *evbuf = ybp_get_event(); 205 | offset = starting_offset; 206 | Dprintf("In nearest offset mode, got fd=%d, starting_offset=%llu, direction=%d\n", p->fd, (long long)starting_offset, direction); 207 | while ((num_increments < MAX_RETRIES) && (offset >= 0) && (offset <= p->file_size - EVENT_HEADER_SIZE) ) 208 | { 209 | ybp_reset_event(evbuf); 210 | if (ybpi_read_event(p, offset, evbuf) == -1) { 211 | ybp_dispose_event(evbuf); 212 | return -1; 213 | } 214 | if (ybpi_check_event(evbuf, p)) { 215 | if (outbuf != NULL) 216 | ybp_copy_event(outbuf, evbuf); 217 | ybp_dispose_event(evbuf); 218 | return offset; 219 | } 220 | else { 221 | Dprintf("incrementing offset from %zd to %zd\n", offset, offset + direction); 222 | offset += direction; 223 | ++num_increments; 224 | } 225 | } 226 | ybp_dispose_event(evbuf); 227 | Dprintf("Unable to find anything (offset=%llu)\n",(long long) offset); 228 | return -2; 229 | } 230 | 231 | /** 232 | * Binary-search to find the record closest to the requested time 233 | **/ 234 | off64_t ybp_nearest_time(struct ybp_binlog_parser* restrict p, time_t target) 235 | { 236 | off64_t file_size = p->file_size; 237 | struct ybp_event *evbuf = ybp_get_event(); 238 | off64_t offset = file_size / 2; 239 | off64_t next_increment = file_size / 4; 240 | int directionality = 1; 241 | off64_t found, last_found = 0; 242 | Dprintf("Starting nearest_time with next_increment=%d\n", next_increment); 243 | while (next_increment > 2) { 244 | long long delta; 245 | ybp_reset_event(evbuf); 246 | found = ybpi_nearest_offset(p, offset, evbuf, directionality); 247 | Dprintf("Looking for nearest offset to %zd, got %d\n", offset, found); 248 | if (found == -1) { 249 | return found; 250 | } 251 | else if (found == -2) { 252 | fprintf(stderr, "Ran off the end of the file, probably going to have a bad match\n"); 253 | last_found = found; 254 | break; 255 | } 256 | last_found = found; 257 | delta = (evbuf->timestamp - target); 258 | if (delta > 0) { 259 | directionality = -1; 260 | } 261 | else if (delta < 0) { 262 | directionality = 1; 263 | } 264 | Dprintf("delta=%lld at %llu, directionality=%d, next_increment=%lld\n", (long long)delta, (unsigned long long)found, directionality, (long long)next_increment); 265 | if (delta == 0) { 266 | break; 267 | } 268 | if (directionality == -1) { 269 | offset += (next_increment * directionality); 270 | } 271 | else { 272 | offset += (next_increment * directionality); 273 | } 274 | next_increment /= 2; 275 | } 276 | ybp_dispose_event(evbuf); 277 | return last_found; 278 | } 279 | 280 | 281 | /** 282 | * Read an event from the parser parser, at offset offet, storing it in 283 | * event evbuf (which should be already init'd) 284 | * 285 | * Returns -1 for system errors (seek, malloc) and -2 for format errors 286 | */ 287 | static int ybpi_read_event(struct ybp_binlog_parser* restrict p, off_t offset, struct ybp_event* restrict evbuf) 288 | { 289 | ssize_t amt_read; 290 | Dprintf("Reading event at offset %zd\n", offset); 291 | p->max_timestamp = time(NULL) + TIMESTAMP_FUDGE_FACTOR; 292 | if ((lseek(p->fd, offset, SEEK_SET) < 0)) { 293 | perror("Error seeking"); 294 | return -1; 295 | } 296 | amt_read = read(p->fd, (void*)evbuf, EVENT_HEADER_SIZE); 297 | evbuf->offset = offset; 298 | evbuf->data = NULL; 299 | if (amt_read < 0) { 300 | fprintf(stderr, "Error reading event at %lld: %s\n", (long long) offset, strerror(errno)); 301 | return -1; 302 | } else if ((size_t)amt_read != EVENT_HEADER_SIZE) { 303 | Dprintf("read %zd bytes, expected to read %d bytes in ybpi_read_event", amt_read, EVENT_HEADER_SIZE); 304 | return -1; 305 | } 306 | if (evbuf->length + evbuf->offset > p->file_size) { 307 | return -2; 308 | } 309 | if (ybpi_check_event(evbuf, p)) { 310 | Dprintf("mallocing %d bytes\n", evbuf->length - EVENT_HEADER_SIZE); 311 | if ((evbuf->data = malloc(evbuf->length - EVENT_HEADER_SIZE)) == NULL) { 312 | perror("malloc:"); 313 | return -1; 314 | } 315 | amt_read = 0; 316 | Dprintf("malloced %d bytes at 0x%p for a %s\n", evbuf->length - EVENT_HEADER_SIZE, evbuf->data, ybpi_event_types[evbuf->type_code]); 317 | while (amt_read < evbuf->length - EVENT_HEADER_SIZE) { 318 | ssize_t remaining = evbuf->length - EVENT_HEADER_SIZE - amt_read; 319 | char* target = evbuf->data + amt_read; 320 | ssize_t read_this_time = read(p->fd, target, remaining); 321 | if (read_this_time < 0) { 322 | perror("read extra data"); 323 | free(evbuf->data); 324 | return -1; 325 | } 326 | amt_read += read_this_time; 327 | } 328 | } 329 | else { 330 | Dprintf("check_event failed\n"); 331 | } 332 | return 0; 333 | } 334 | 335 | /** 336 | * Read the FDE. It's the first record in ALL binlogs 337 | **/ 338 | static int ybpi_read_fde(struct ybp_binlog_parser* p) 339 | { 340 | struct ybp_event* evbuf; 341 | off64_t offset; 342 | bool esi = p->enforce_server_id; 343 | int fd = p->fd; 344 | time_t fde_time; 345 | time_t evt_time; 346 | 347 | p->enforce_server_id = false; 348 | 349 | if ((evbuf = ybp_get_event()) == NULL) { 350 | return -1; 351 | } 352 | 353 | if (ybpi_read_event(p, 4, evbuf) < 0) { 354 | Dprintf("Reading FDE failed\n"); 355 | ybp_dispose_event(evbuf); 356 | return -1; 357 | } 358 | p->enforce_server_id = esi; 359 | struct ybp_format_description_event *f = ybp_event_as_fde(evbuf); 360 | Dprintf("passed in evbuf 0x%p, got back fde 0x%p\n", (void*)evbuf, (void*)f); 361 | if (f->format_version != BINLOG_VERSION) { 362 | fprintf(stderr, "Invalid binlog! Expected version %d, got %d\n", BINLOG_VERSION, f->format_version); 363 | exit(1); 364 | } 365 | fde_time = evbuf->timestamp; 366 | p->slave_server_id = evbuf->server_id; 367 | 368 | offset = ybpi_next_after(evbuf); 369 | p->offset = offset; 370 | ybp_reset_event(evbuf); 371 | ybpi_read_event(p, offset, evbuf); 372 | 373 | p->master_server_id = evbuf->server_id; 374 | evt_time = evbuf->timestamp; 375 | ybp_dispose_event(evbuf); 376 | 377 | /* 378 | * Another signal: events will all be after *either* the FDE time (which 379 | * is the start of this server writing this binlog) or the first event 380 | * time (which is the start of the master writing its binlog), unless 381 | * you're in multi-master, which we don't particularly support. 382 | */ 383 | p->min_timestamp = min(fde_time, evt_time) - TIMESTAMP_FUDGE_FACTOR; 384 | 385 | lseek(fd, 4, SEEK_SET); 386 | Dprintf("Done reading FDE\n"); 387 | p->has_read_fde = true; 388 | return 0; 389 | } 390 | 391 | int ybp_next_event(struct ybp_binlog_parser* restrict parser, struct ybp_event* restrict evbuf) 392 | { 393 | int ret = 0; 394 | bool esi = parser->enforce_server_id; 395 | Dprintf("looking for next event, offset=%zd\n", parser->offset); 396 | if (!parser->has_read_fde) { 397 | ybpi_read_fde(parser); 398 | } 399 | parser->enforce_server_id = false; 400 | ret = ybpi_read_event(parser, parser->offset, evbuf); 401 | parser->enforce_server_id = esi; 402 | if (ret < 0) { 403 | Dprintf("error in ybp_next_event: %d\n", ret); 404 | return ret; 405 | } else { 406 | parser->offset = ybpi_next_after(evbuf); 407 | if ((parser->offset <= 0) || (evbuf->next_position == evbuf->offset) || 408 | (evbuf->next_position >= parser->file_size) || 409 | (parser->offset >= parser->file_size)) { 410 | Dprintf("Got to last event, parser->offset=%zd, evbuf->next_position=%u, parser->file_size=%zd\n", 411 | parser->offset, 412 | evbuf->next_position, 413 | parser->file_size); 414 | return 0; 415 | } else { 416 | return 1; 417 | } 418 | } 419 | } 420 | 421 | struct ybp_format_description_event* ybp_event_as_fde(struct ybp_event* restrict e) 422 | { 423 | if (e->type_code != FORMAT_DESCRIPTION_EVENT) { 424 | fprintf(stderr, "Illegal conversion attempted: %d -> %d\n", e->type_code, FORMAT_DESCRIPTION_EVENT); 425 | return NULL; 426 | } 427 | else { 428 | return (struct ybp_format_description_event*)(e->data); 429 | } 430 | } 431 | 432 | struct ybp_query_event* ybp_event_as_qe(struct ybp_event* restrict e) 433 | { 434 | if (e->type_code != QUERY_EVENT) { 435 | fprintf(stderr, "Illegal conversion attempted: %d -> %d\n", e->type_code, QUERY_EVENT); 436 | return NULL; 437 | } else { 438 | return (struct ybp_query_event*)(e->data); 439 | } 440 | } 441 | 442 | struct ybp_query_event_safe* ybp_event_to_safe_qe(struct ybp_event* restrict e) { 443 | struct ybp_query_event_safe* s; 444 | if (e->type_code != QUERY_EVENT) { 445 | fprintf(stderr, "Illegal conversion attempted: %d -> %d\n", e->type_code, QUERY_EVENT); 446 | return NULL; 447 | } else { 448 | assert(e->data != NULL); 449 | struct ybp_query_event* qe = (struct ybp_query_event*)(e->data); 450 | Dprintf("Constructing safe query event for 0x%p\n", (void*) e); 451 | s = malloc(sizeof(struct ybp_query_event_safe)); 452 | if (s == NULL) 453 | return NULL; 454 | Dprintf("malloced 0x%p\n", (void*)s); 455 | s->thread_id = qe->thread_id; 456 | s->query_time = qe->query_time; 457 | s->db_name_len = qe->db_name_len; 458 | Dprintf("qe->db_name_len = %d\n", qe->db_name_len); 459 | s->error_code = qe->error_code; 460 | s->status_var_len = qe->status_var_len; 461 | if (s == NULL) { 462 | perror("malloc"); 463 | return NULL; 464 | } 465 | s->statement_len = query_event_statement_len(e); 466 | s->statement = strndup((const char*)query_event_statement(e), s->statement_len); 467 | Dprintf("s->statement_len = %zd\n", s->statement_len); 468 | Dprintf("s->statement = %s\n", s->statement); 469 | if (s->statement == NULL) { 470 | perror("strndup"); 471 | return NULL; 472 | } 473 | s->db_name = strndup((char*)query_event_db_name(e), s->db_name_len); 474 | s->status_var = strndup((char*)query_event_status_vars(e), s->status_var_len); 475 | } 476 | Dprintf("Returning s\n"); 477 | return s; 478 | } 479 | 480 | void ybp_dispose_safe_qe(struct ybp_query_event_safe* s) 481 | { 482 | if (s == NULL) { 483 | return; 484 | } 485 | if (s->statement != NULL) 486 | free(s->statement); 487 | if (s->db_name != NULL) 488 | free(s->db_name); 489 | if (s->status_var != NULL) 490 | free(s->status_var); 491 | free(s); 492 | } 493 | 494 | void ybp_dispose_safe_re(struct ybp_rotate_event_safe* s) 495 | { 496 | if (s == NULL) { 497 | return; 498 | } 499 | if (s->file_name != NULL) 500 | free(s->file_name); 501 | free(s); 502 | } 503 | 504 | struct ybp_rotate_event_safe* ybp_event_to_safe_re(struct ybp_event* restrict e) { 505 | struct ybp_rotate_event_safe* s; 506 | if (e->type_code != ROTATE_EVENT) { 507 | fprintf(stderr, "Illegal conversion attempted: %d -> %d\n", e->type_code, ROTATE_EVENT); 508 | } else { 509 | assert(e->data != NULL); 510 | struct ybp_rotate_event* re = (struct ybp_rotate_event*)(e->data); 511 | s = malloc(sizeof(struct ybp_rotate_event_safe)); 512 | s->next_position = re->next_position; 513 | s->file_name_len = rotate_event_file_name_len(e); 514 | s->file_name = strndup((char*)rotate_event_file_name(e), s->file_name_len); 515 | } 516 | return s; 517 | } 518 | 519 | struct ybp_xid_event* ybp_event_to_safe_xe(struct ybp_event* restrict e) { 520 | struct ybp_xid_event* s; 521 | if (e->type_code != XID_EVENT) { 522 | fprintf(stderr, "Illegal conversion attempted: %d -> %d\n", e->type_code, ROTATE_EVENT); 523 | } else { 524 | assert(e->data != NULL); 525 | struct ybp_xid_event* xe = (struct ybp_xid_event*)(e->data); 526 | s = malloc(sizeof(struct ybp_xid_event)); 527 | if (s == NULL) 528 | return NULL; 529 | memcpy(s, xe, sizeof(struct ybp_xid_event)); 530 | } 531 | return s; 532 | } 533 | 534 | void ybp_dispose_safe_xe(struct ybp_xid_event* xe) 535 | { 536 | free(xe); 537 | } 538 | 539 | const char* ybp_event_type(struct ybp_event* restrict evbuf) { 540 | Dprintf("Looking up type string for %d\n", evbuf->type_code); 541 | return ybpi_event_types[evbuf->type_code]; 542 | } 543 | 544 | void ybp_print_event_simple(struct ybp_event* restrict e, 545 | struct ybp_binlog_parser* restrict p, 546 | FILE* restrict stream) 547 | { 548 | ybp_print_event(e, p, stream, 0, 0, NULL); 549 | } 550 | 551 | void ybp_print_event(struct ybp_event* restrict e, 552 | struct ybp_binlog_parser* restrict p, 553 | FILE* restrict stream, 554 | bool q_mode, 555 | bool v_mode, 556 | char* database_limit) 557 | { 558 | (void) p; 559 | int i; 560 | const time_t t = e->timestamp; 561 | if (stream == NULL) { 562 | stream = stdout; 563 | } 564 | /* TODO: implement abbreviated parsing mode 565 | if (p->Q_mode) { 566 | print_statement_event(e); 567 | return; 568 | } 569 | */ 570 | fprintf(stream, "BYTE OFFSET %llu\n", (long long)e->offset); 571 | fprintf(stream, "------------------------\n"); 572 | fprintf(stream, "timestamp: %d = %s", e->timestamp, ctime(&t)); 573 | fprintf(stream, "type_code: %s\n", ybpi_event_types[e->type_code]); 574 | if (q_mode > 1) 575 | return; 576 | fprintf(stream, "server id: %u\n", e->server_id); 577 | if (v_mode) { 578 | fprintf(stream, "length: %d\n", e->length); 579 | fprintf(stream, "next pos: %llu\n", (unsigned long long)e->next_position); 580 | } 581 | fprintf(stream, "flags: "); 582 | for(i=16; i > 0; --i) 583 | { 584 | fprintf(stream, "%hhd", GET_BIT(e->flags, i)); 585 | } 586 | fprintf(stream, "\n"); 587 | for(i=16; i > 0; --i) 588 | { 589 | if (GET_BIT(e->flags, i)) 590 | fprintf(stream, " %s\n", ybpi_flags[i-1]); 591 | } 592 | if (e->data == NULL) { 593 | return; 594 | } 595 | switch ((enum ybp_event_types)e->type_code) { 596 | case QUERY_EVENT: 597 | { 598 | struct ybp_query_event* q = ybp_event_as_qe(e); 599 | char* db_name = query_event_db_name(e); 600 | size_t statement_len = query_event_statement_len(e); 601 | /* Duplicate the statement because the binlog 602 | * doesn't NUL-terminate it. */ 603 | char* statement; 604 | if ((database_limit != NULL) && (strncmp(db_name, database_limit, strlen(database_limit)) != 0)) 605 | return; 606 | if ((statement = strndup((const char*)query_event_statement(e), statement_len)) == NULL) { 607 | perror("strndup"); 608 | return; 609 | } 610 | fprintf(stream, "thread id: %d\n", q->thread_id); 611 | fprintf(stream, "query time (s): %d\n", q->query_time); 612 | if (q->error_code == 0) { 613 | fprintf(stream, "error code: %d\n", q->error_code); 614 | } 615 | else { 616 | fprintf(stream, "ERROR CODE: %d\n", q->error_code); 617 | } 618 | fprintf(stream, "status var length: %d\n", q->status_var_len); 619 | fprintf(stream, "db_name: %s\n", db_name); 620 | if (v_mode) { 621 | fprintf(stream, "status var length: %d\n", q->status_var_len); 622 | } 623 | if (q->status_var_len > 0) { 624 | char* status_var_start = query_event_status_vars(e); 625 | char* status_var_ptr = status_var_start; 626 | while((status_var_ptr - status_var_start) < q->status_var_len) { 627 | enum ybpi_e_status_var_types status_var_type = *status_var_ptr; 628 | status_var_ptr++; 629 | assert(status_var_type < 10); 630 | switch (status_var_type) { 631 | case Q_FLAGS2_CODE: 632 | { 633 | uint32_t val = *((uint32_t*)status_var_ptr); 634 | status_var_ptr += 4; 635 | fprintf(stream, "Q_FLAGS2: "); 636 | for(i=32; i > 0; --i) 637 | { 638 | fprintf(stream, "%hhd", GET_BIT(val, i)); 639 | } 640 | fprintf(stream, "\n"); 641 | for(i=32; i > 0; --i) 642 | { 643 | if (GET_BIT(val, i)) 644 | fprintf(stream, " %s\n", ybpi_flags2[i-1]); 645 | } 646 | break; 647 | } 648 | case Q_SQL_MODE_CODE: 649 | { 650 | uint64_t val = *((uint64_t*)status_var_ptr); 651 | status_var_ptr += 8; 652 | fprintf(stream, "Q_SQL_MODE: 0x%0llu\n", (unsigned long long)val); 653 | break; 654 | } 655 | case Q_CATALOG_CODE: 656 | { 657 | uint8_t size = *(status_var_ptr++); 658 | char* str = strndup(status_var_ptr, size+1); 659 | status_var_ptr += size + 1; 660 | fprintf(stream, "Q_CATALOG: %s\n", str); 661 | free(str); 662 | break; 663 | } 664 | case Q_AUTO_INCREMENT: 665 | { 666 | uint16_t byte_1 = *(uint16_t*)status_var_ptr; 667 | status_var_ptr += 2; 668 | uint16_t byte_2 = *(uint16_t*)status_var_ptr; 669 | status_var_ptr += 2; 670 | fprintf(stream, "Q_AUTO_INCREMENT: (%hu,%hu)\n", byte_1, byte_2); 671 | break; 672 | } 673 | case Q_CHARSET_CODE: 674 | { 675 | uint16_t byte_1 = *(uint16_t*)status_var_ptr; 676 | status_var_ptr += 2; 677 | uint16_t byte_2 = *(uint16_t*)status_var_ptr; 678 | status_var_ptr += 2; 679 | uint16_t byte_3 = *(uint16_t*)status_var_ptr; 680 | status_var_ptr += 2; 681 | fprintf(stream, "Q_CHARSET: (%hu,%hu,%hu)\n", byte_1, byte_2, byte_3); 682 | break; 683 | } 684 | case Q_TIME_ZONE_CODE: 685 | { 686 | uint8_t size = *(status_var_ptr++); 687 | char* str = strndup(status_var_ptr, size); 688 | status_var_ptr += size; 689 | fprintf(stream, "Q_TIME_ZONE: %s\n", str); 690 | free(str); 691 | break; 692 | } 693 | case Q_CATALOG_NZ_CODE: 694 | { 695 | uint8_t size = *(status_var_ptr++); 696 | char* str = strndup(status_var_ptr, size); 697 | status_var_ptr += size; 698 | fprintf(stream, "Q_CATALOG_NZ: %s\n", str); 699 | free(str); 700 | break; 701 | } 702 | case Q_LC_TIME_NAMES_CODE: 703 | { 704 | uint16_t code = *(uint16_t*)status_var_ptr; 705 | status_var_ptr += 2; 706 | fprintf(stream, "Q_LC_TIME_NAMES: %hu\n", code); 707 | break; 708 | } 709 | case Q_CHARSET_DATABASE_CODE: 710 | { 711 | uint16_t code = *(uint16_t*)status_var_ptr; 712 | status_var_ptr += 2; 713 | fprintf(stream, "Q_CHARSET_DATABASE: %hu\n", code); 714 | break; 715 | } 716 | default: 717 | { 718 | int incr = ybpi_status_var_data_len_by_type[status_var_type]; 719 | fprintf(stream, "%s\n", ybpi_variable_types[status_var_type]); 720 | if (incr > 0) { 721 | status_var_ptr += incr; 722 | } 723 | else if (incr == -1) { 724 | uint8_t size = *status_var_ptr; 725 | status_var_ptr += size + 1; 726 | } 727 | else if (incr == -2) { 728 | uint8_t size = *status_var_ptr; 729 | status_var_ptr += size + 2; 730 | } 731 | else { 732 | assert(0); 733 | } 734 | fprintf(stream, " %s\n", ybpi_status_var_types[status_var_type]); 735 | break; 736 | } 737 | } 738 | } 739 | } 740 | fprintf(stream, "statement length: %zd\n", statement_len); 741 | if (q_mode == 0) 742 | fprintf(stream, "statement: %s\n", statement); 743 | free(statement); 744 | } 745 | break; 746 | case ROTATE_EVENT: 747 | { 748 | struct ybp_rotate_event *r = (struct ybp_rotate_event*)e->data; 749 | char *file_name = strndup((const char*)rotate_event_file_name(e), rotate_event_file_name_len(e)); 750 | fprintf(stream, "next log position: %llu\n", (unsigned long long)r->next_position); 751 | fprintf(stream, "next file name: %s\n", file_name); 752 | free(file_name); 753 | } 754 | break; 755 | case INTVAR_EVENT: 756 | { 757 | struct ybp_intvar_event *i = (struct ybp_intvar_event*)e->data; 758 | fprintf(stream, "variable type: %s\n", ybpi_intvar_types[i->type]); 759 | fprintf(stream, "value: %llu\n", (unsigned long long) i->value); 760 | } 761 | break; 762 | case RAND_EVENT: 763 | { 764 | struct ybp_rand_event *r = (struct ybp_rand_event*)e->data; 765 | fprintf(stream, "seed 1: %llu\n", (unsigned long long) r->seed_1); 766 | fprintf(stream, "seed 2: %llu\n", (unsigned long long) r->seed_2); 767 | } 768 | break; 769 | case FORMAT_DESCRIPTION_EVENT: 770 | { 771 | struct ybp_format_description_event *f = ybp_event_as_fde(e); 772 | fprintf(stream, "binlog version: %d\n", f->format_version); 773 | fprintf(stream, "server version: %s\n", f->server_version); 774 | } 775 | break; 776 | case XID_EVENT: 777 | { 778 | struct ybp_xid_event *x = (struct ybp_xid_event*)e->data; 779 | fprintf(stream, "xid id: %llu\n", (unsigned long long)x->id); 780 | } 781 | break; 782 | default: 783 | fprintf(stream, "event type: %s\n", ybp_event_type(e)); 784 | break; 785 | } 786 | } 787 | 788 | /* vim: set sts=0 sw=4 ts=4 noexpandtab: */ 789 | --------------------------------------------------------------------------------