├── .coveragerc ├── .coveragerc-opensource ├── .dockerignore ├── .gitignore ├── .pre-commit-config.yaml ├── .rat-excludes ├── Dockerfile ├── Dockerfile-opensource ├── LICENSE ├── Makefile ├── Makefile-opensource ├── MySQLStreamerWorking.gif ├── NOTICE ├── README.md ├── acceptance ├── complex_statements.feature ├── configs │ ├── Dockerfile │ ├── clog.yaml │ ├── data_pipeline │ │ └── key-1.key │ ├── nail-etc │ │ ├── datapipe.yaml │ │ ├── ecosystem │ │ ├── services.yaml │ │ ├── teams.yaml │ │ ├── uswest2devc.yaml │ │ └── yelp │ │ │ ├── replicationhandler_secret_table.yaml │ │ │ └── replicationhandleropensource_secret_table.yaml │ ├── topology.yaml │ └── yelp_conn_generic.yaml ├── environment.py ├── simple_statements.feature └── steps │ └── save_state.py ├── bin └── venv-update ├── config-env-changelog-itest.yaml ├── config-env-dev.yaml ├── config-env-itest-opensource.yaml ├── config-env-itest.yaml ├── config.yaml ├── connection_sets.yaml ├── devdbs ├── rbr_source │ ├── Dockerfile │ ├── my.cnf │ ├── setup.sh │ ├── setup.sql │ ├── start.sh │ ├── startup.sh │ └── tables │ │ ├── business.sql │ │ └── heartbeat.sql ├── rbr_source_with_gtid │ ├── Dockerfile │ ├── my.cnf │ ├── setup.sh │ ├── setup.sql │ ├── start.sh │ ├── startup.sh │ └── tables │ │ └── business.sql ├── rbr_state │ ├── Dockerfile │ ├── my.cnf │ ├── setup.sh │ ├── setup.sql │ ├── start.sh │ ├── startup.sh │ └── tables │ │ ├── data_event_checkpoint.sql │ │ ├── global_event_state.sql │ │ ├── mysql_dumps.sql │ │ └── schema_event_state.sql └── schema_tracker │ ├── Dockerfile │ ├── my.cnf │ ├── setup.sh │ ├── setup.sql │ ├── start.sh │ ├── startup.sh │ └── tables │ ├── business.sql │ └── heartbeat.sql ├── docker-compose-opensource.yml ├── docker-compose.yml ├── docs ├── Makefile └── source │ ├── conf.py │ └── index.rst ├── fig.yml ├── interactive_streamer.py ├── logs └── .placeholder ├── replication_handler ├── __init__.py ├── batch │ ├── __init__.py │ ├── base_parse_replication_stream.py │ ├── mysql_heartbeat_search.py │ ├── parse_replication_stream.py │ └── parse_replication_stream_internal.py ├── components │ ├── __init__.py │ ├── base_binlog_stream_reader_wrapper.py │ ├── base_event_handler.py │ ├── change_log_data_event_handler.py │ ├── data_event_handler.py │ ├── heartbeat_searcher.py │ ├── low_level_binlog_stream_reader_wrapper.py │ ├── mysql_dump_handler.py │ ├── mysql_parser.py │ ├── mysql_tools.py │ ├── position_finder.py │ ├── recovery_handler.py │ ├── replication_stream_restarter.py │ ├── schema_event_handler.py │ ├── schema_tracker.py │ ├── schema_wrapper.py │ ├── simple_binlog_stream_reader_wrapper.py │ └── sql_handler.py ├── config.py ├── environment_configs.py ├── helpers │ ├── __init__.py │ ├── dates.py │ └── lists.py ├── models │ ├── __init__.py │ ├── connections │ │ ├── __init__.py │ │ ├── base_connection.py │ │ ├── rh_connection.py │ │ └── yelp_conn_connection.py │ ├── data_event_checkpoint.py │ ├── database.py │ ├── global_event_state.py │ └── mysql_dumps.py ├── schema │ └── changelog.v2.yaml ├── servlib │ ├── __init__.py │ ├── clog_util.py │ ├── config_util.py │ └── logging_util.py ├── testing_helper │ ├── __init__.py │ ├── config_revamp.py │ ├── restart_helper.py │ └── util.py └── util │ ├── __init__.py │ ├── change_log_message_builder.py │ ├── message_builder.py │ ├── misc.py │ ├── position.py │ └── transaction_id.py ├── replication_handler_testing ├── __init__.py ├── db_sandbox.py └── events.py ├── requirements-opensource.txt ├── requirements.d ├── dev.txt ├── docs.txt └── pre_commit.txt ├── requirements.txt ├── schema ├── avro_schema │ ├── global_transaction_id_v1.avsc │ └── log_transaction_id_v1.avsc ├── migrations │ ├── data_event_checkpoint.xml │ ├── global_event_state.xml │ ├── master.xml │ ├── mysql_dumps.xml │ ├── schema_event_state.xml │ └── tools │ │ ├── liquibase.jar │ │ └── mysql-connector-java-5.1.29-bin.jar └── tables │ ├── data_event_checkpoint.sql │ ├── global_event_state.sql │ ├── mysql_dumps.sql │ └── schema_event_state.sql ├── setup.py ├── tests ├── __init__.py ├── batch │ ├── __init__.py │ ├── base_parse_replication_stream_test.py │ ├── internal │ │ └── parse_replication_stream_internal_test.py │ └── parse_replication_stream_test.py ├── components │ ├── __init__.py │ ├── base_event_handler_test.py │ ├── change_log_data_event_handler_test.py │ ├── data_event_handlers_test.py │ ├── heartbeat_searcher_test.py │ ├── low_level_binlog_stream_reader_wrapper_test.py │ ├── mysql_dump_handler_test.py │ ├── mysql_parser_test.py │ ├── position_finder_test.py │ ├── recovery_handler_test.py │ ├── replication_stream_restarter_test.py │ ├── schema_event_handlers_test.py │ ├── schema_tracker_test.py │ ├── schema_wrapper_test.py │ ├── simple_binlog_stream_reader_wrapper_test.py │ └── sql_handler_test.py ├── config_test.py ├── conftest.py ├── integration │ ├── __init__.py │ ├── conftest.py │ ├── end_to_end_test.py │ ├── failure_recovery_test.py │ └── table_change_log_end_to_end_test.py ├── models │ ├── __init__.py │ ├── conftest.py │ ├── connections │ │ ├── __init__.py │ │ ├── base_connection_test.py │ │ ├── rh_connection_test.py │ │ └── yelp_conn_connection_test.py │ ├── data_event_checkpoint_test.py │ ├── global_event_state_test.py │ └── mysql_dumps_test.py ├── servlib │ ├── clog_util_test.py │ ├── config_util_test.py │ └── logging_util_test.py └── util │ ├── __init__.py │ ├── change_log_message_builder_test.py │ ├── message_builder_test.py │ ├── position_test.py │ └── transaction_id_test.py ├── tox-opensource.ini ├── tox.ini └── yelp_conn_generic.yaml /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = replication_handler/models/*,replication_handler/components/stubs/*,replication_handler/testing_helper/* 3 | 4 | [report] 5 | fail_under = 85 6 | show_missing = true 7 | -------------------------------------------------------------------------------- /.coveragerc-opensource: -------------------------------------------------------------------------------- 1 | [run] 2 | omit = replication_handler/batch/parse_replication_stream_internal.py,replication_handler/models/*,replication_handler/components/stubs/*,replication_handler/testing_helper/* 3 | 4 | [report] 5 | fail_under = 87 6 | show_missing = true 7 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | .git 2 | .tox 3 | virtualenv_run 4 | build 5 | dist 6 | *.pyc 7 | *.log 8 | *.swp 9 | Dockerfile 10 | venv 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.py[co] 2 | *.so 3 | *.sw[nop] 4 | .#* 5 | .DS_Store 6 | ._* 7 | .idea 8 | \#*\# 9 | build 10 | dist 11 | *~ 12 | *.log 13 | precomputed 14 | .pydevproject 15 | .project 16 | *.sublime-* 17 | virtualenv_run 18 | venv 19 | .tox 20 | replication_handler.egg-info/ 21 | __pycache__ 22 | .idea 23 | .coverage 24 | .cache/ 25 | repl.vmprof 26 | tags 27 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | - repo: https://github.com/pre-commit/pre-commit-hooks 2 | sha: v0.6.0 3 | hooks: 4 | - id: trailing-whitespace 5 | - id: end-of-file-fixer 6 | - id: autopep8-wrapper 7 | - id: check-yaml 8 | - id: debug-statements 9 | language_version: python2.7 10 | - id: flake8 11 | language_version: python2.7 12 | # E265 deals with spacing inside of comments - breaks human formatting 13 | # E309 puts a blank line after class declarations - doesn't work well with docstrings 14 | # E501 reformats lines to fit in --max-line-length poorly 15 | # E265 block comment should start with '# ' 16 | # E402 module level import not at top of file 17 | args: [--ignore=E265,E309,E501,E265] 18 | - id: check-yaml 19 | - id: check-json 20 | - id: check-merge-conflict 21 | - id: name-tests-test 22 | exclude: tests/helpers/(.+).py 23 | - id: fix-encoding-pragma 24 | - id: check-added-large-files 25 | - id: check-byte-order-marker 26 | - repo: https://github.com/asottile/reorder_python_imports 27 | sha: v0.3.0 28 | hooks: 29 | - id: reorder-python-imports 30 | language_version: python2.7 31 | args: 32 | - --add-import 33 | - from __future__ import absolute_import 34 | - --add-import 35 | - from __future__ import unicode_literals 36 | -------------------------------------------------------------------------------- /.rat-excludes: -------------------------------------------------------------------------------- 1 | build 2 | dist 3 | venv 4 | .tox 5 | replication_handler.egg-info/ 6 | __pycache__ 7 | .cache/ 8 | logs 9 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:14.04.1 2 | 3 | ENV DEBIAN_FRONTEND noninteractive 4 | 5 | run apt-get update && apt-get upgrade -y && apt-get install -y \ 6 | build-essential \ 7 | python-dev \ 8 | libmysqlclient-dev \ 9 | python-pkg-resources \ 10 | python-setuptools \ 11 | python-virtualenv \ 12 | python-pip \ 13 | libpq5 \ 14 | libpq-dev \ 15 | wget \ 16 | language-pack-en-base \ 17 | uuid-dev \ 18 | git-core \ 19 | mysql-client-5.5 20 | 21 | run locale-gen en_US en_US.UTF-8 && dpkg-reconfigure locales 22 | 23 | # Setup pypy 24 | run mkdir /src 25 | workdir /src 26 | run wget https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.4.0-linux64.tar.bz2 --no-check-certificate 27 | run bunzip2 pypy2-v5.4.0-linux64.tar.bz2 28 | run tar xvf pypy2-v5.4.0-linux64.tar 29 | ENV PATH $PATH:/src/pypy2-v5.4.0-linux64/bin/ 30 | run wget https://bootstrap.pypa.io/get-pip.py --no-check-certificate 31 | run pypy get-pip.py 32 | 33 | run ln -s /usr/bin/gcc /usr/local/bin/cc 34 | 35 | # Use https://github.com/Yelp/dumb-init to make sure signals propogate 36 | RUN wget -O /usr/local/bin/dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.0.1/dumb-init_1.0.1_amd64 37 | RUN chmod +x /usr/local/bin/dumb-init 38 | 39 | # Add the service code 40 | WORKDIR /code 41 | ADD requirements.txt /code/requirements.txt 42 | ADD setup.py /code/setup.py 43 | RUN virtualenv -p pypy /code/virtualenv_run 44 | RUN /code/virtualenv_run/bin/pip install \ 45 | -i https://pypi.yelpcorp.com/simple/ \ 46 | -r /code/requirements.txt 47 | 48 | ADD . /code 49 | 50 | RUN useradd batch 51 | RUN chown -R batch /code 52 | 53 | USER batch 54 | 55 | # Share the logging directory as a volume 56 | RUN mkdir /tmp/logs 57 | VOLUME /tmp/logs 58 | 59 | WORKDIR /code 60 | ENV BASEPATH /code 61 | CMD ["/usr/local/bin/dumb-init", "/code/virtualenv_run/bin/pypy", "/code/replication_handler/batch/parse_replication_stream_internal.py", "--no-notification"] 62 | -------------------------------------------------------------------------------- /Dockerfile-opensource: -------------------------------------------------------------------------------- 1 | FROM ubuntu:14.04.1 2 | 3 | ENV DEBIAN_FRONTEND noninteractive 4 | 5 | run apt-get update && apt-get upgrade -y && apt-get install -y \ 6 | build-essential \ 7 | python-dev \ 8 | libmysqlclient-dev \ 9 | python-pkg-resources \ 10 | python-setuptools \ 11 | python-virtualenv \ 12 | python-pip \ 13 | libpq5 \ 14 | libpq-dev \ 15 | wget \ 16 | language-pack-en-base \ 17 | uuid-dev \ 18 | git-core \ 19 | mysql-client-5.5 20 | 21 | run locale-gen en_US en_US.UTF-8 && dpkg-reconfigure locales 22 | 23 | # Setup pypy 24 | run mkdir /src 25 | workdir /src 26 | run wget https://bitbucket.org/pypy/pypy/downloads/pypy2-v5.4.0-linux64.tar.bz2 --no-check-certificate 27 | run bunzip2 pypy2-v5.4.0-linux64.tar.bz2 28 | run tar xvf pypy2-v5.4.0-linux64.tar 29 | ENV PATH $PATH:/src/pypy2-v5.4.0-linux64/bin/ 30 | run wget https://bootstrap.pypa.io/get-pip.py --no-check-certificate 31 | run pypy get-pip.py 32 | 33 | run ln -s /usr/bin/gcc /usr/local/bin/cc 34 | 35 | # Use https://github.com/Yelp/dumb-init to make sure signals propogate 36 | RUN wget -O /usr/local/bin/dumb-init https://github.com/Yelp/dumb-init/releases/download/v1.0.1/dumb-init_1.0.1_amd64 37 | RUN chmod +x /usr/local/bin/dumb-init 38 | 39 | # Add the service code 40 | WORKDIR /code 41 | ADD requirements.txt /code/requirements-opensource.txt 42 | ADD setup.py /code/setup.py 43 | RUN virtualenv -p pypy /code/virtualenv_run 44 | RUN /code/virtualenv_run/bin/pip install \ 45 | -i https://pypi.yelpcorp.com/simple/ \ 46 | -r /code/requirements-opensource.txt 47 | 48 | ADD . /code 49 | 50 | RUN useradd batch 51 | RUN chown -R batch /code 52 | 53 | USER batch 54 | 55 | # Share the logging directory as a volume 56 | RUN mkdir /tmp/logs 57 | VOLUME /tmp/logs 58 | 59 | WORKDIR /code 60 | ENV BASEPATH /code 61 | CMD ["/usr/local/bin/dumb-init", "/code/virtualenv_run/bin/pypy", "/code/replication_handler/batch/parse_replication_stream.py"] 62 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean venv-dev test itest build-image compose-prefix 2 | 3 | DOCKER_TAG ?= replication-handler-dev-$(USER) 4 | 5 | test: 6 | tox 7 | 8 | itest: cook-image 9 | DOCKER_TAG=$(DOCKER_TAG) tox -e itest 10 | 11 | itest_db: 12 | tox -e itest_db 13 | 14 | cook-image: 15 | docker build -t $(DOCKER_TAG) . 16 | 17 | clean: 18 | find . -name '*.pyc' -delete 19 | find . -name '__pycache__' -delete 20 | 21 | venv-dev: 22 | virtualenv --python=python2.7 ./virtualenv_run 23 | ./virtualenv_run/bin/pip install -i https://pypi.yelpcorp.com/simple/ -r requirements.d/dev.txt -r requirements.txt 24 | 25 | install-hooks: 26 | tox -e pre-commit -- install -f --install-hooks 27 | 28 | compose-prefix: 29 | @echo "DOCKER_TAG=$(DOCKER_TAG) `python -c "from data_pipeline.testing_helpers.containers import Containers; print Containers.compose_prefix()"`" 30 | 31 | interactive-streamer: cook-image 32 | DOCKER_TAG=$(DOCKER_TAG) python interactive_streamer.py 33 | -------------------------------------------------------------------------------- /Makefile-opensource: -------------------------------------------------------------------------------- 1 | .PHONY: clean venv-dev test itest build-image compose-prefix 2 | 3 | DOCKER_TAG ?= replication-handler-opensource-dev-$(USER) 4 | 5 | test: 6 | OPEN_SOURCE_MODE=True FORCE_AVOID_INTERNAL_PACKAGES=True tox -c tox-opensource.ini 7 | 8 | itest: cook-image 9 | OPEN_SOURCE_MODE=True FORCE_AVOID_INTERNAL_PACKAGES=True DOCKER_TAG=$(DOCKER_TAG) tox -c tox-opensource.ini -e itest 10 | 11 | itest_db: 12 | tox -c tox-opensource.ini -e itest_db 13 | 14 | cook-image: 15 | docker build -f Dockerfile-opensource -t $(DOCKER_TAG) . 16 | 17 | clean: 18 | find . -name '*.pyc' -delete 19 | find . -name '__pycache__' -delete 20 | 21 | venv-dev: 22 | virtualenv --python=python2.7 ./virtualenv_run 23 | ./virtualenv_run/bin/pip install -i https://pypi.yelpcorp.com/simple/ -r requirements.d/dev.txt 24 | 25 | install-hooks: 26 | tox -e pre-commit -- install -f --install-hooks 27 | 28 | compose-prefix: 29 | @OPEN_SOURCE_MODE=True echo "DOCKER_TAG=$(DOCKER_TAG) `python -c "from data_pipeline.testing_helpers.containers import Containers; print Containers.compose_prefix()"`" 30 | 31 | interactive-streamer: cook-image 32 | OPEN_SOURCE_MODE=True DOCKER_TAG=$(DOCKER_TAG) python interactive_streamer.py 33 | -------------------------------------------------------------------------------- /MySQLStreamerWorking.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yelp/mysql_streamer/b88183ac00b88f5dff9c01ad87a46da9e3615d9e/MySQLStreamerWorking.gif -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Apache MySQL Streamer 2 | Copyright 2016 The Apache Software Foundation 3 | 4 | This product includes software developed at 5 | The Apache Software Foundation (http://www.apache.org/). 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | **Deprecation Notice** 2 | MySQL Streamer is now archived, but we're glad you're checking out Yelp Engineering's work on GitHub! If you'd like to learn more about what we're 3 | up to now, please visit our [tech blog](https://engineeringblog.yelp.com/) to get the latest scoop on Yelp Engineering. 4 | 5 | If you'd like to setup a MySQL CDC connector to Kafka, check out [Debezium](https://debezium.io/documentation/reference/stable/connectors/mysql.html), 6 | which is an open-source solution for Change Data Capture (and not just from MySQL!) 7 | 8 | # MySQL Streamer 9 | 10 | 11 | What is it? 12 | ----------- 13 | MySQLStreamer is a database change data capture and publish system. 14 | It’s responsible for capturing each individual database change, 15 | enveloping them into messages and publishing to Kafka. 16 | 17 | 18 | [Read More](https://engineeringblog.yelp.com/2016/08/streaming-mysql-tables-in-real-time-to-kafka.html) 19 | 20 | 21 | How to download 22 | --------------- 23 | ``` 24 | git clone git@github.com:Yelp/mysql_streamer.git 25 | ``` 26 | 27 | 28 | Requirements 29 | ------------ 30 | For Ubuntu 14.04 31 | ``` 32 | run apt-get update && apt-get upgrade -y && apt-get install -y \ 33 | build-essential \ 34 | python-dev \ 35 | libmysqlclient-dev \ 36 | python-pkg-resources \ 37 | python-setuptools \ 38 | python-virtualenv \ 39 | python-pip \ 40 | libpq5 \ 41 | libpq-dev \ 42 | wget \ 43 | language-pack-en-base \ 44 | uuid-dev \ 45 | git-core \ 46 | mysql-client-5.5 47 | ``` 48 | 49 | 50 | Tests 51 | ----- 52 | Running unit tests 53 | ``` 54 | make -f Makefile-opensource test 55 | ``` 56 | 57 | 58 | Running integration tests 59 | ``` 60 | make -f Makefile-opensource itest 61 | ``` 62 | 63 | 64 | Demo 65 | ---- 66 | ``` 67 | make -f Makefile-opensource interactive-streamer 68 | ``` 69 | ![MySQL Streamer Demo](MySQLStreamerWorking.gif) 70 | 71 | In the bottom pane, you can execute any mysql statement on a barebones structure we’ve set up. Try creating a table and inserting into it to see it be sent (seen in the replication handler logs in the top right), and then received messages in the top left (seen in the kafka tailer’s logs). The received messages will probably look a little funky - this is because they’re avro-encoded. 72 | 73 | 74 | Disclaimer 75 | ------- 76 | We're still in the process of setting up this service as a stand-alone. There may be additional work required to run MySQL Streamer instances and integrate with other applications. 77 | 78 | 79 | License 80 | ------- 81 | MySQL Streamer is licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0 82 | 83 | 84 | Contributing 85 | ------------ 86 | Everyone is encouraged to contribute to MySQL Streamer by forking the Github repository and making a pull request or opening an issue. 87 | -------------------------------------------------------------------------------- /acceptance/complex_statements.feature: -------------------------------------------------------------------------------- 1 | Feature: Complex Statements 2 | 3 | Scenario: Apply a series of schema changing queries 4 | Given a query to execute for table employee 5 | """ 6 | CREATE TABLE `employee` ( 7 | `id` int(11) DEFAULT NULL, 8 | `name` varchar(64) DEFAULT NULL, 9 | `is_active` tinyint(1) NOT NULL DEFAULT 0, 10 | `salary` float(10, 2) NOT NULL DEFAULT 0.00, 11 | `job_description` text DEFAULT NULL, 12 | `created_at` int(11) NOT NULL, 13 | `update_at` int(11) NOT NULL 14 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8 15 | """ 16 | When we execute the statement in rbrsource database 17 | Given a query to execute for table employee 18 | """ 19 | ALTER TABLE `employee` ADD `office` varchar(64) DEFAULT NULL AFTER `job_description` 20 | """ 21 | When we execute the statement in rbrsource database 22 | Given a query to execute for table employee_backup 23 | """ 24 | RENAME TABLE `employee` TO `employee_backup` 25 | """ 26 | When we execute the statement in rbrsource database 27 | Given a query to execute for table employee_new 28 | """ 29 | ALTER TABLE `employee_backup` RENAME TO `employee_new` 30 | """ 31 | When we execute the statement in rbrsource database 32 | Given a query to execute for table employee_new 33 | """ 34 | ALTER TABLE `employee_new` MODIFY office varchar(128) 35 | """ 36 | When we execute the statement in rbrsource database 37 | Given an expected create table statement for table employee_new 38 | """ 39 | CREATE TABLE `employee_new` ( 40 | `id` int(11) DEFAULT NULL, 41 | `name` varchar(64) DEFAULT NULL, 42 | `is_active` tinyint(1) NOT NULL DEFAULT '0', 43 | `salary` float(10,2) NOT NULL DEFAULT '0.00', 44 | `job_description` text, 45 | `office` varchar(128) DEFAULT NULL, 46 | `created_at` int(11) NOT NULL, 47 | `update_at` int(11) NOT NULL 48 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8 49 | """ 50 | Given an expected avro schema for table employee_new 51 | """ 52 | {"fields": [{"default": null, "type": ["null", "int"], "name": "id"}, {"default": null, "maxlen": "64", "type": ["null", "string"], "name": "name"}, {"default": 0, "type": "int", "name": "is_active"}, {"default": 0.0, "scale": "2", "type": "float", "name": "salary", "precision": "10"}, {"default": null, "type": ["null", "string"], "name": "job_description"}, {"default": null, "maxlen": "128", "type": ["null", "string"], "name": "office"}, {"type": "int", "name": "created_at"}, {"type": "int", "name": "update_at"}], "namespace": "", "name": "employee_new", "type": "record"} 53 | """ 54 | Then schematracker should have correct schema information 55 | And schematizer should have correct info 56 | -------------------------------------------------------------------------------- /acceptance/configs/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM busybox:latest 2 | 3 | ADD . /nail/srv/configs/ 4 | VOLUME /nail/srv/configs/ 5 | 6 | ADD nail-etc/services.yaml /nail/etc/services/services.yaml 7 | VOLUME /nail/etc/services/ 8 | 9 | Add nail-etc/uswest2devc.yaml /nail/etc/zookeeper_discovery/generic/uswest2devc.yaml 10 | VOLUME /nail/etc/zookeeper_discovery/generic 11 | 12 | Add nail-etc/ecosystem /nail/etc/ecosystem 13 | VOLUME /nail/etc/ 14 | 15 | Add nail-etc/yelp/ /nail/etc/services/database_pii/yelp/ 16 | VOLUME /nail/etc/services/database_pii/ 17 | 18 | 19 | Add nail-etc/teams.yaml /nail/etc/services/data_pipeline/teams.yaml 20 | VOLUME /nail/etc/services/data_pipeline 21 | 22 | 23 | Add nail-etc/datapipe.yaml /nail/etc/kafka_discovery/datapipe.yaml 24 | VOLUME /nail/etc/kafka_discovery 25 | 26 | # safer_docker expects a command 27 | CMD echo 28 | -------------------------------------------------------------------------------- /acceptance/configs/clog.yaml: -------------------------------------------------------------------------------- 1 | clog_enable_file_logging: true 2 | scribe_disable: true 3 | log_dir: /tmp/logs 4 | -------------------------------------------------------------------------------- /acceptance/configs/data_pipeline/key-1.key: -------------------------------------------------------------------------------- 1 | MIICXQIBAAKBgQCGXI1TsqL0x/48ARdvy2AOXivaGhjgGh7qjK6HMSrXWznNy6WW 2 | 5D+GBfHezIdD8pCHv9XldO7rdWjXWqMVcCYn1ncl5d9hRsg8jzJBEbpyLVAfjHFf 3 | 15xL/0dt57yR04PFdt331bEEH80Vsoe5Kay1N7QbAUgNiVB7+yYVMbBtuwIDAQAB 4 | AoGAGTaA5rXc0mY9FPF3fmkBi3Csde7zwAnmx0jC72eQjHz36eBPIHOQYAv5iT+E 5 | GoIIERE0iCxbMSAFHnRJqhozbnyhl6D01oL4tCTdiJzgZW2t4c7/C3c8k/ECgPQ4 6 | bhKpNNHymEjdSNzRBBXtalIm5k/54sBGPhnVNVqd4P+hCbECQQDDsaC4KheJgp1y 7 | svQsTd4LsWZijtLn7XVwc4ytnSFHk2lfTBAbHCXdpuGY49drJJMmC4B/REaUTn9K 8 | 7fNI6DMFAkEAr8RkBwWGhvUfRBjFCjTeHHjOdhjJ1tPIk/Bwza6meuJsbgMEQjA/ 9 | WAz0jIPxvYHWPMk6f9REL9lpenzAVRF5vwJBAKMHqkODHZS7sfvKRk1ama/YHNni 10 | 7qW1V+9U9E/25bDBiDmSHqDXNFySGMPMnLxv/f7l+ikMclg/hSChV9rXRxkCQQCI 11 | IWzvW0Iz6ElM7no9S3DzUEVnhg9dSS3CFqjnRGtAJXmhQsjbko6c3XvSEKb8BUEu 12 | VxPtyTL+dD6TVWeI+z31AkBj4S31hagCCsBzPeKfEPPYke4OJFEJM84mtwtCYisX 13 | 7y5eQ0H0/kbwy1WCHArHLRudzlUJw1RllZBCg2ZicRUb 14 | -------------------------------------------------------------------------------- /acceptance/configs/nail-etc/datapipe.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | clusters: 3 | uswest2-devc: 4 | broker_list: 5 | - "kafka:9092" 6 | zookeeper: "zk:2181" 7 | local_config: 8 | cluster: uswest2-devc 9 | -------------------------------------------------------------------------------- /acceptance/configs/nail-etc/ecosystem: -------------------------------------------------------------------------------- 1 | devc 2 | -------------------------------------------------------------------------------- /acceptance/configs/nail-etc/services.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | schematizer.main: 3 | host: schematizer 4 | port: 8888 5 | kafka_discovery.main: 6 | host: kafkadiscovery 7 | port: 8888 8 | -------------------------------------------------------------------------------- /acceptance/configs/nail-etc/teams.yaml: -------------------------------------------------------------------------------- 1 | 2 | # This file is generated from y/sensu-teams. To add a team, add a team with 3 | # a notification_email to sense_handlers::teams 4 | --- 5 | teams: 6 | bam: 7 | email: "bam+replication+handler+testing@yelp.com" 8 | -------------------------------------------------------------------------------- /acceptance/configs/nail-etc/uswest2devc.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | - - "zk" 3 | - 2181 4 | -------------------------------------------------------------------------------- /acceptance/configs/nail-etc/yelp/replicationhandler_secret_table.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yelp/mysql_streamer/b88183ac00b88f5dff9c01ad87a46da9e3615d9e/acceptance/configs/nail-etc/yelp/replicationhandler_secret_table.yaml -------------------------------------------------------------------------------- /acceptance/configs/nail-etc/yelp/replicationhandleropensource_secret_table.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yelp/mysql_streamer/b88183ac00b88f5dff9c01ad87a46da9e3615d9e/acceptance/configs/nail-etc/yelp/replicationhandleropensource_secret_table.yaml -------------------------------------------------------------------------------- /acceptance/configs/topology.yaml: -------------------------------------------------------------------------------- 1 | topology: 2 | - cluster: refresh_primary 3 | replica: master 4 | entries: 5 | - charset: utf8 6 | use_unicode: true 7 | host: rbrsource 8 | db: yelp 9 | user: yelpdev 10 | passwd: "" 11 | port: 3306 12 | - cluster: repltracker 13 | replica: master 14 | entries: 15 | - charset: utf8 16 | use_unicode: true 17 | host: schematracker 18 | db: yelp 19 | user: yelpdev 20 | passwd: "" 21 | port: 3306 22 | - cluster: replhandler 23 | replica: master 24 | entries: 25 | - charset: utf8 26 | use_unicode: true 27 | host: rbrstate 28 | db: yelp 29 | user: yelpdev 30 | passwd: "" 31 | port: 3306 32 | -------------------------------------------------------------------------------- /acceptance/configs/yelp_conn_generic.yaml: -------------------------------------------------------------------------------- 1 | topology: "/nail/srv/configs/topology.yaml" 2 | -------------------------------------------------------------------------------- /acceptance/environment.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | import os 20 | from distutils.util import strtobool as bool_ 21 | 22 | import docker 23 | import pymysql 24 | from compose.cli.command import Command 25 | from data_pipeline.testing_helpers.kafka_docker import create_kafka_docker_topic 26 | from data_pipeline.testing_helpers.kafka_docker import KafkaDocker 27 | 28 | 29 | def get_service_host(service_name): 30 | client = docker.Client() 31 | project = Command().get_project_name('replicationhandler') 32 | container = client.inspect_container("%s_%s_1" % (project, service_name)) 33 | return container['NetworkSettings']['IPAddress'] 34 | 35 | 36 | def get_db_connection(db_name): 37 | db_host = get_service_host(db_name) 38 | return pymysql.connect( 39 | host=db_host, 40 | user='yelpdev', 41 | password='', 42 | db='yelp', 43 | charset='utf8mb4', 44 | cursorclass=pymysql.cursors.DictCursor 45 | ) 46 | 47 | 48 | def execute_query(db_name, query): 49 | # TODO(SRV-2217|cheng): change this into a context manager 50 | connection = get_db_connection(db_name) 51 | cursor = connection.cursor() 52 | cursor.execute(query) 53 | result = cursor.fetchone() 54 | connection.commit() 55 | connection.close() 56 | return result 57 | 58 | 59 | def setup_kafka_topic(topic_name): 60 | create_kafka_docker_topic( 61 | kafka_docker=KafkaDocker.get_connection(), 62 | topic=str(topic_name), 63 | project='replicationhandler' 64 | ) 65 | 66 | 67 | def before_feature(context, _): 68 | # Add a heartbeat event and clear out context. 69 | _set_heartbeat(0, 123) 70 | context.data = { 71 | 'heartbeat_serial': 123, 72 | 'offset': 0, 73 | 'namespace': 'refresh_primary.yelp', 74 | } 75 | 76 | 77 | def after_scenario(context, _): 78 | context.data['offset'] += 1 79 | context.data['expected_create_table_statement'] = None 80 | 81 | 82 | def after_feature(context, _): 83 | # Clean up all states in rbrstate 84 | state_tables = ['data_event_checkpoint', 'schema_event_state', 'global_event_state'] 85 | for table in state_tables: 86 | cleanup_query = 'delete from {table}'.format(table=table) 87 | execute_query('rbrstate', cleanup_query) 88 | # Drop table created in schematracker 89 | if 'table_name' in context.data: 90 | execute_query('schematracker', 'drop table {table}'.format( 91 | table=context.data['table_name']) 92 | ) 93 | # Revert the heartbeat. 94 | _set_heartbeat(123, 0) 95 | 96 | 97 | def _set_heartbeat(before, after): 98 | heartbeat_query = 'update yelp_heartbeat.replication_heartbeat set serial={after} where serial={before}'.format( 99 | before=before, 100 | after=after 101 | ) 102 | execute_query('rbrsource', heartbeat_query) 103 | 104 | BEHAVE_DEBUG_ON_ERROR = bool_(os.environ.get("BEHAVE_DEBUG_ON_ERROR", "yes")) 105 | 106 | 107 | def after_step(context, step): 108 | if BEHAVE_DEBUG_ON_ERROR and step.status == "failed": 109 | pass 110 | -------------------------------------------------------------------------------- /acceptance/simple_statements.feature: -------------------------------------------------------------------------------- 1 | Feature: Save States 2 | 3 | Scenario: Execute create table query 4 | Given a query to execute for table biz 5 | """ 6 | CREATE TABLE `biz` ( 7 | `id` int(11) DEFAULT NULL, 8 | `name` varchar(64) DEFAULT NULL 9 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8 10 | """ 11 | Given an expected create table statement for table biz 12 | """ 13 | CREATE TABLE `biz` ( 14 | `id` int(11) DEFAULT NULL, 15 | `name` varchar(64) DEFAULT NULL 16 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8 17 | """ 18 | Given an expected avro schema for table biz 19 | """ 20 | {"fields": [{"default": null, "type": ["null", "int"], "name": "id"}, {"default": null, "maxlen": "64", "type": ["null", "string"], "name": "name"}], "namespace": "", "name": "biz", "type": "record"} 21 | """ 22 | When we execute the statement in rbrsource database 23 | Then schematracker should have correct schema information 24 | And rbrstate.schema_event_state should have correct state information 25 | And rbrstate.global_event_state should have correct state information 26 | And schematizer should have correct info 27 | 28 | Scenario: Execute an alter statement 29 | Given a query to execute for table biz 30 | """ 31 | ALTER TABLE `biz` ADD `location` varchar(128) DEFAULT NULL 32 | """ 33 | Given an expected create table statement for table biz 34 | """ 35 | CREATE TABLE `biz` ( 36 | `id` int(11) DEFAULT NULL, 37 | `name` varchar(64) DEFAULT NULL, 38 | `location` varchar(128) DEFAULT NULL 39 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8 40 | """ 41 | Given an expected avro schema for table biz 42 | """ 43 | {"fields": [{"default": null, "type": ["null", "int"], "name": "id"}, {"default": null, "maxlen": "64", "type": ["null", "string"], "name": "name"}, {"default": null, "maxlen": "128", "type": ["null", "string"], "name": "location"}], "namespace": "", "name": "biz", "type": "record"} 44 | """ 45 | When we execute the statement in rbrsource database 46 | Then schematracker should have correct schema information 47 | And rbrstate.schema_event_state should have correct state information 48 | And rbrstate.global_event_state should have correct state information 49 | And schematizer should have correct info 50 | 51 | Scenario: add data 52 | Given a query to insert data for table biz 53 | """ 54 | INSERT INTO `biz` VALUES (1, 'yelp', 'SF') 55 | """ 56 | When we execute the statement in rbrsource database 57 | Then rbrstate.data_event_checkpoint should have correct state information 58 | -------------------------------------------------------------------------------- /config-env-changelog-itest.yaml: -------------------------------------------------------------------------------- 1 | changelog_mode: True 2 | 3 | namespace: dev_changelog 4 | 5 | rbr_source_cluster: refresh_primary 6 | 7 | schema_tracker_cluster: repltracker 8 | 9 | rbr_state_cluster: replhandler 10 | 11 | register_dry_run: False 12 | 13 | publish_dry_run: False 14 | 15 | disable_sensu: True 16 | 17 | topology_path: /nail/srv/configs/topology.yaml 18 | 19 | resume_stream: False 20 | 21 | container_name: itest_changelog 22 | 23 | container_env: docker-compose 24 | 25 | force_exit: True 26 | module_env_config: 27 | - namespace: data_pipeline 28 | config: 29 | load_schematizer_host_and_port_from_smartstack: True 30 | kafka_cluster_type: datapipe 31 | kafka_cluster_name: local-cluster 32 | skip_messages_with_pii: False 33 | kafka_producer_buffer_size: 500 34 | -------------------------------------------------------------------------------- /config-env-dev.yaml: -------------------------------------------------------------------------------- 1 | rbr_source_cluster: refresh_primary 2 | 3 | schema_tracker_cluster: repltracker 4 | 5 | rbr_state_cluster: replhandler 6 | 7 | register_dry_run: True 8 | 9 | publish_dry_run: True 10 | 11 | disable_sensu: True 12 | 13 | topology_path: /nail/srv/configs/topology.yaml 14 | 15 | namespace: dev 16 | 17 | disable_meteorite: False 18 | 19 | container_name: dev 20 | 21 | container_env: raw 22 | 23 | schema_blacklist: 24 | - information_schema 25 | - mysql 26 | - performance_schema 27 | - test 28 | - yelp_heartbeat 29 | 30 | table_whitelist: 31 | - advertiser 32 | - business_payment_account_entity 33 | - payment_account_entity 34 | - payment_feature 35 | - payment_program 36 | - salesperson 37 | 38 | module_env_config: 39 | - namespace: data_pipeline 40 | config: 41 | load_schematizer_host_and_port_from_smartstack: False 42 | -------------------------------------------------------------------------------- /config-env-itest-opensource.yaml: -------------------------------------------------------------------------------- 1 | rbr_source_cluster: refresh_primary 2 | 3 | schema_tracker_cluster: repltracker 4 | 5 | rbr_state_cluster: replhandler 6 | 7 | register_dry_run: False 8 | 9 | publish_dry_run: False 10 | 11 | disable_sensu: True 12 | 13 | topology_path: /nail/srv/configs/topology.yaml 14 | 15 | resume_stream: False 16 | 17 | namespace: dev 18 | 19 | container_name: itest 20 | 21 | container_env: docker-compose 22 | 23 | force_exit: True 24 | 25 | gtid_enabled: True 26 | 27 | pii_yaml_path: '/nail/etc/services/database_pii' 28 | 29 | schema_blacklist: 30 | - information_schema 31 | - mysql 32 | - performance_schema 33 | - test 34 | - yelp_heartbeat 35 | 36 | module_env_config: 37 | - namespace: data_pipeline 38 | config: 39 | kafka_broker_list: 40 | - kafka:9092 41 | kafka_zookeeper: zk:2181 42 | schematizer_host_and_port: schematizer:8888 43 | skip_messages_with_pii: False 44 | kafka_producer_buffer_size: 500 45 | encryption_type: AES_MODE_CBC-1 46 | -------------------------------------------------------------------------------- /config-env-itest.yaml: -------------------------------------------------------------------------------- 1 | rbr_source_cluster: refresh_primary 2 | 3 | schema_tracker_cluster: repltracker 4 | 5 | rbr_state_cluster: replhandler 6 | 7 | register_dry_run: False 8 | 9 | publish_dry_run: False 10 | 11 | disable_sensu: True 12 | 13 | topology_path: /nail/srv/configs/topology.yaml 14 | 15 | resume_stream: False 16 | 17 | namespace: dev 18 | 19 | container_name: itest 20 | 21 | container_env: docker-compose 22 | 23 | force_exit: True 24 | 25 | pii_yaml_path: '/nail/etc/services/database_pii' 26 | 27 | schema_blacklist: 28 | - information_schema 29 | - mysql 30 | - performance_schema 31 | - test 32 | - yelp_heartbeat 33 | 34 | module_env_config: 35 | - namespace: data_pipeline 36 | config: 37 | load_schematizer_host_and_port_from_smartstack: True 38 | kafka_cluster_type: datapipe 39 | kafka_cluster_name: local-cluster 40 | skip_messages_with_pii: False 41 | kafka_producer_buffer_size: 500 42 | encryption_type: AES_MODE_CBC-1 43 | -------------------------------------------------------------------------------- /config.yaml: -------------------------------------------------------------------------------- 1 | rbr_source_cluster: refresh_primary 2 | 3 | schema_tracker_cluster: repltracker 4 | 5 | rbr_state_cluster: replhandler 6 | 7 | register_dry_run: True 8 | 9 | publish_dry_run: True 10 | 11 | topology_path: topology.yaml 12 | 13 | changelog_schemaname: changelog.v2 14 | 15 | changelog_mode: False 16 | 17 | schema_blacklist: 18 | - information_schema 19 | - mysql 20 | - performance_schema 21 | - test 22 | - yelp_heartbeat 23 | 24 | producer_name: replication_handler 25 | 26 | team_name: bam 27 | 28 | zookeeper_discovery_path: '/nail/etc/zookeeper_discovery/generic/uswest2{ecosystem}.yaml' 29 | 30 | pii_yaml_path: '/nail/etc/services/database_pii' 31 | 32 | max_delay_allowed_in_seconds: 600 33 | 34 | sensu_host: 169.254.255.254 35 | 36 | disable_sensu: False 37 | 38 | disable_meteorite: False 39 | 40 | recovery_queue_size: 1500 41 | 42 | namespace: dev 43 | 44 | container_name: none 45 | 46 | container_env: raw 47 | 48 | force_exit: False 49 | 50 | module_config: 51 | - namespace: yelp_conn 52 | file: /nail/srv/configs/yelp_conn_generic.yaml 53 | config: 54 | connection_set_file: connection_sets.yaml 55 | 56 | connection_wrappers: 57 | - yelp_conn.sqllogger.LoggingConnectionWrapper 58 | - yelp_conn.wrappers.AnnotatingConnectionWrapper 59 | 60 | query_logging_listeners: 61 | - ['yelp_conn.sqllogger.ClogCursorListener', 'tmp_query_log'] 62 | 63 | # Log queries to all listeners in `query_logging_listeners` 64 | enable_query_logging: True 65 | # Print queries to stdout 66 | enable_query_printing: True 67 | 68 | db: 69 | db_pool_listeners: 70 | - yelp_conn.engine.DisconnectedPoolListener 71 | 72 | - namespace: clog 73 | config: 74 | log_stream_name: tmp_replication_handler_logs 75 | # Works around https://bitbucket.org/pypy/pypy/issues/2280 76 | scribe_errors_to_syslog: false 77 | file: /nail/srv/configs/clog.yaml 78 | 79 | - namespace: smartstack_services 80 | file: /nail/etc/services/services.yaml 81 | 82 | - namespace: data_pipeline 83 | config: 84 | load_schematizer_host_and_port_from_smartstack: True 85 | -------------------------------------------------------------------------------- /connection_sets.yaml: -------------------------------------------------------------------------------- 1 | connection_sets: 2 | rbr_source_ro: 3 | connection_configuration: 4 | read_only: true 5 | refresh_primary: "master" 6 | schema_tracker_rw: 7 | repltracker: "master" 8 | rbr_state_rw: 9 | replhandler: "master" 10 | rbr_state_ro: 11 | connection_configuration: 12 | read_only: true 13 | replhandler: "master" 14 | -------------------------------------------------------------------------------- /devdbs/rbr_source/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM centos:latest 2 | RUN rpm -Uhv https://www.percona.com/redir/downloads/percona-release/redhat/0.0-1/percona-release-0.0-1.x86_64.rpm 3 | 4 | RUN yum install -y Percona-Server-server-56 Percona-Server-client-56 Percona-Server-shared-56 5 | 6 | ADD . /code/ 7 | ADD my.cnf /etc/my.cnf 8 | RUN chown mysql /etc/my.cnf 9 | RUN bash /code/startup.sh 10 | RUN cd code; bash setup.sh 11 | 12 | USER mysql 13 | # Convert 48bit hostname hash to a 32bit int 14 | CMD [ \ 15 | "bash", "-c", \ 16 | "exec mysqld --server-id=$((0x$(hostname) >> 16))" \ 17 | ] 18 | -------------------------------------------------------------------------------- /devdbs/rbr_source/my.cnf: -------------------------------------------------------------------------------- 1 | [mysqld] 2 | skip-external-locking 3 | max-allowed-packet = 16M 4 | sync-binlog = 0 5 | log-bin = /var/lib/mysql/mysql-bin.log 6 | 7 | log-warnings = 1 8 | log-warnings-suppress = 1592 9 | 10 | innodb-file-format = Barracuda 11 | innodb-file-per-table = TRUE 12 | innodb-flush-log-at-trx-commit = 0 13 | innodb-buffer-pool-size = 128M 14 | innodb-fast-shutdown = 0 15 | binlog-format = row 16 | log_slave_updates 17 | 18 | # GTID specific, needs version 5.6 of percona/mysql 19 | #enforce_gtid_consistency 20 | #gtid_mode = ON 21 | 22 | # DNS resolution can cause significant delays in connecting to the server 23 | skip-name-resolve 24 | -------------------------------------------------------------------------------- /devdbs/rbr_source/setup.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Yelp Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, 10 | # software distributed under the License is distributed on an 11 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, either express or implied. See the License for the 13 | # specific language governing permissions and limitations 14 | # under the License. 15 | # This script comes from the base image. It starts mysqld in the background 16 | bash /code/startup.sh 17 | 18 | # Create our database and users 19 | cat setup.sql | mysql 20 | 21 | # Create tables 22 | cat tables/business.sql | mysql yelp 23 | cat tables/heartbeat.sql | mysql yelp_heartbeat 24 | 25 | mysqladmin shutdown 26 | -------------------------------------------------------------------------------- /devdbs/rbr_source/setup.sql: -------------------------------------------------------------------------------- 1 | 2 | -- Setup database and users 3 | -- Reference http://y/runbook-add-new-db 4 | 5 | 6 | CREATE DATABASE yelp DEFAULT CHARACTER SET utf8; 7 | CREATE DATABASE yelp_heartbeat DEFAULT CHARACTER SET utf8; 8 | 9 | GRANT ALL ON *.* TO 'yelpdev'@'%'; 10 | -------------------------------------------------------------------------------- /devdbs/rbr_source/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | 17 | set -eu 18 | 19 | # Convert 48bit hostname hash to a 32bit int 20 | # also skip name resolution because it causes significant delays in connecting 21 | # to the server 22 | mysqld --server-id=$((0x$(hostname) >> 16)) 23 | -------------------------------------------------------------------------------- /devdbs/rbr_source/startup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | 17 | set -e 18 | 19 | /usr/bin/mysqld_safe & 20 | 21 | # Wait for mysqld to start 22 | delay=1 23 | timeout=5 24 | while ! mysqladmin ping >/dev/null 2>&1; do 25 | timeout=$(expr $timeout - $delay) 26 | 27 | if [ $timeout -eq 0 ]; then 28 | echo "Timeout error occurred trying to start MySQL Daemon." 29 | exit 1 30 | fi 31 | sleep $delay 32 | done 33 | 34 | echo "GRANT ALL ON *.* TO mysql@'169.254.%.%' WITH GRANT OPTION; FLUSH PRIVILEGES" | mysql 35 | echo "GRANT ALL ON *.* TO mysql@'localhost' WITH GRANT OPTION; FLUSH PRIVILEGES" | mysql 36 | -------------------------------------------------------------------------------- /devdbs/rbr_source/tables/business.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE `business` ( 2 | `id` int(11) NOT NULL AUTO_INCREMENT, 3 | `acxiom_id` int(11) DEFAULT NULL, 4 | `name` varchar(64) COLLATE utf8_unicode_ci DEFAULT NULL, 5 | `address1` varchar(128) COLLATE utf8_unicode_ci DEFAULT NULL, 6 | `address2` varchar(128) COLLATE utf8_unicode_ci DEFAULT NULL, 7 | `address3` varchar(128) COLLATE utf8_unicode_ci DEFAULT NULL, 8 | `city` varchar(64) COLLATE utf8_unicode_ci DEFAULT NULL, 9 | `county` varchar(64) COLLATE utf8_unicode_ci DEFAULT NULL, 10 | `state` varchar(3) COLLATE utf8_unicode_ci DEFAULT NULL, 11 | `country` varchar(2) COLLATE utf8_unicode_ci DEFAULT NULL, 12 | `zip` varchar(12) COLLATE utf8_unicode_ci DEFAULT NULL, 13 | `phone` varchar(32) COLLATE utf8_unicode_ci DEFAULT NULL, 14 | `fax` varchar(32) COLLATE utf8_unicode_ci DEFAULT NULL, 15 | `url` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL, 16 | `email` varchar(64) COLLATE utf8_unicode_ci DEFAULT NULL, 17 | `flags` int(11) NOT NULL DEFAULT '0', 18 | `latitude` double DEFAULT NULL, 19 | `longitude` double DEFAULT NULL, 20 | `accuracy` double DEFAULT NULL, 21 | `time_created` int(11) NOT NULL DEFAULT '0', 22 | `score` double DEFAULT NULL, 23 | `rating` double DEFAULT NULL, 24 | `review_count` int(11) NOT NULL DEFAULT '0', 25 | `photo_id` int(11) DEFAULT NULL, 26 | `alias` varchar(96) COLLATE utf8_unicode_ci DEFAULT NULL, 27 | `geoquad` int(10) unsigned DEFAULT NULL, 28 | `data_source_type` tinyint(3) unsigned DEFAULT NULL, 29 | PRIMARY KEY (`id`), 30 | KEY `zip` (`zip`,`phone`), 31 | KEY `longitude` (`longitude`,`latitude`), 32 | KEY `phone` (`phone`), 33 | KEY `review_count` (`review_count`), 34 | KEY `geoquad` (`geoquad`) 35 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; 36 | -------------------------------------------------------------------------------- /devdbs/rbr_source/tables/heartbeat.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE `replication_heartbeat` ( 2 | `serial` bigint(20) NOT NULL, 3 | `timestamp` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP 4 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; 5 | 6 | INSERT INTO `replication_heartbeat` (`serial`) VALUES (0); 7 | -------------------------------------------------------------------------------- /devdbs/rbr_source_with_gtid/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM centos:latest 2 | RUN rpm -Uhv https://www.percona.com/redir/downloads/percona-release/redhat/0.0-1/percona-release-0.0-1.x86_64.rpm 3 | 4 | RUN yum install -y Percona-Server-server-56 Percona-Server-client-56 Percona-Server-shared-56 5 | 6 | ADD . /code/ 7 | ADD my.cnf /etc/my.cnf 8 | RUN chown mysql /etc/my.cnf 9 | RUN bash /code/startup.sh 10 | RUN cd code; bash setup.sh 11 | 12 | USER mysql 13 | # Convert 48bit hostname hash to a 32bit int 14 | CMD [ \ 15 | "bash", "-c", \ 16 | "exec mysqld --server-id=$((0x$(hostname) >> 16))" \ 17 | ] 18 | -------------------------------------------------------------------------------- /devdbs/rbr_source_with_gtid/my.cnf: -------------------------------------------------------------------------------- 1 | [mysqld] 2 | skip-external-locking 3 | max-allowed-packet = 16M 4 | sync-binlog = 0 5 | log-bin = /var/lib/mysql/mysql-bin.log 6 | 7 | log-warnings = 1 8 | log-warnings-suppress = 1592 9 | 10 | innodb-file-format = Barracuda 11 | innodb-file-per-table = TRUE 12 | innodb-flush-log-at-trx-commit = 0 13 | innodb-buffer-pool-size = 128M 14 | innodb-fast-shutdown = 0 15 | binlog-format = row 16 | log_slave_updates 17 | 18 | # GTID specific, needs version 5.6 of percona/mysql 19 | enforce_gtid_consistency 20 | gtid_mode = ON 21 | 22 | # DNS resolution can cause significant delays in connecting to the server 23 | skip-name-resolve 24 | -------------------------------------------------------------------------------- /devdbs/rbr_source_with_gtid/setup.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Yelp Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, 10 | # software distributed under the License is distributed on an 11 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, either express or implied. See the License for the 13 | # specific language governing permissions and limitations 14 | # under the License. 15 | # This script comes from the base image. It starts mysqld in the background 16 | bash /code/startup.sh 17 | 18 | # Create our database and users 19 | cat setup.sql | mysql 20 | 21 | # Create tables 22 | cat tables/business.sql | mysql yelp 23 | 24 | mysqladmin shutdown 25 | -------------------------------------------------------------------------------- /devdbs/rbr_source_with_gtid/setup.sql: -------------------------------------------------------------------------------- 1 | 2 | -- Setup database and users 3 | -- Reference http://y/runbook-add-new-db 4 | 5 | 6 | CREATE DATABASE yelp DEFAULT CHARACTER SET utf8; 7 | 8 | GRANT ALL ON *.* TO 'yelpdev'@'%'; 9 | -------------------------------------------------------------------------------- /devdbs/rbr_source_with_gtid/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | 17 | set -eu 18 | 19 | # Convert 48bit hostname hash to a 32bit int 20 | # also skip name resolution because it causes significant delays in connecting 21 | # to the server 22 | mysqld --server-id=$((0x$(hostname) >> 16)) 23 | -------------------------------------------------------------------------------- /devdbs/rbr_source_with_gtid/startup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | 17 | set -e 18 | 19 | /usr/bin/mysqld_safe & 20 | 21 | # Wait for mysqld to start 22 | delay=1 23 | timeout=5 24 | while ! mysqladmin ping >/dev/null 2>&1; do 25 | timeout=$(expr $timeout - $delay) 26 | 27 | if [ $timeout -eq 0 ]; then 28 | echo "Timeout error occurred trying to start MySQL Daemon." 29 | exit 1 30 | fi 31 | sleep $delay 32 | done 33 | 34 | echo "GRANT ALL ON *.* TO mysql@'169.254.%.%' WITH GRANT OPTION; FLUSH PRIVILEGES" | mysql 35 | echo "GRANT ALL ON *.* TO mysql@'localhost' WITH GRANT OPTION; FLUSH PRIVILEGES" | mysql 36 | -------------------------------------------------------------------------------- /devdbs/rbr_source_with_gtid/tables/business.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE `business` ( 2 | `id` int(11) NOT NULL AUTO_INCREMENT, 3 | `acxiom_id` int(11) DEFAULT NULL, 4 | `name` varchar(64) COLLATE utf8_unicode_ci DEFAULT NULL, 5 | `address1` varchar(128) COLLATE utf8_unicode_ci DEFAULT NULL, 6 | `address2` varchar(128) COLLATE utf8_unicode_ci DEFAULT NULL, 7 | `address3` varchar(128) COLLATE utf8_unicode_ci DEFAULT NULL, 8 | `city` varchar(64) COLLATE utf8_unicode_ci DEFAULT NULL, 9 | `county` varchar(64) COLLATE utf8_unicode_ci DEFAULT NULL, 10 | `state` varchar(3) COLLATE utf8_unicode_ci DEFAULT NULL, 11 | `country` varchar(2) COLLATE utf8_unicode_ci DEFAULT NULL, 12 | `zip` varchar(12) COLLATE utf8_unicode_ci DEFAULT NULL, 13 | `phone` varchar(32) COLLATE utf8_unicode_ci DEFAULT NULL, 14 | `fax` varchar(32) COLLATE utf8_unicode_ci DEFAULT NULL, 15 | `url` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL, 16 | `email` varchar(64) COLLATE utf8_unicode_ci DEFAULT NULL, 17 | `flags` int(11) NOT NULL DEFAULT '0', 18 | `latitude` double DEFAULT NULL, 19 | `longitude` double DEFAULT NULL, 20 | `accuracy` double DEFAULT NULL, 21 | `time_created` int(11) NOT NULL DEFAULT '0', 22 | `score` double DEFAULT NULL, 23 | `rating` double DEFAULT NULL, 24 | `review_count` int(11) NOT NULL DEFAULT '0', 25 | `photo_id` int(11) DEFAULT NULL, 26 | `alias` varchar(96) COLLATE utf8_unicode_ci DEFAULT NULL, 27 | `geoquad` int(10) unsigned DEFAULT NULL, 28 | `data_source_type` tinyint(3) unsigned DEFAULT NULL, 29 | PRIMARY KEY (`id`), 30 | KEY `zip` (`zip`,`phone`), 31 | KEY `longitude` (`longitude`,`latitude`), 32 | KEY `phone` (`phone`), 33 | KEY `review_count` (`review_count`), 34 | KEY `geoquad` (`geoquad`) 35 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; 36 | -------------------------------------------------------------------------------- /devdbs/rbr_state/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM centos:latest 2 | RUN rpm -Uhv https://www.percona.com/redir/downloads/percona-release/redhat/0.0-1/percona-release-0.0-1.x86_64.rpm 3 | 4 | RUN yum install -y Percona-Server-server-56 Percona-Server-client-56 Percona-Server-shared-56 5 | 6 | ADD . /code/ 7 | ADD my.cnf /etc/my.cnf 8 | RUN chown mysql /etc/my.cnf 9 | RUN bash /code/startup.sh 10 | RUN cd code; bash setup.sh 11 | 12 | USER mysql 13 | # Convert 48bit hostname hash to a 32bit int 14 | CMD [ \ 15 | "bash", "-c", \ 16 | "exec mysqld --server-id=$((0x$(hostname) >> 16))" \ 17 | ] 18 | -------------------------------------------------------------------------------- /devdbs/rbr_state/my.cnf: -------------------------------------------------------------------------------- 1 | [mysqld] 2 | skip-external-locking 3 | max-allowed-packet = 16M 4 | sync-binlog = 0 5 | log-bin = /var/lib/mysql/mysql-bin.log 6 | 7 | log-warnings = 1 8 | log-warnings-suppress = 1592 9 | 10 | innodb-file-format = Barracuda 11 | innodb-file-per-table = TRUE 12 | innodb-flush-log-at-trx-commit = 0 13 | innodb-buffer-pool-size = 128M 14 | innodb-fast-shutdown = 0 15 | binlog-format = row 16 | log_slave_updates 17 | 18 | # GTID specific, needs version 5.6 of percona/mysql 19 | enforce_gtid_consistency 20 | gtid_mode = ON 21 | 22 | # DNS resolution can cause significant delays in connecting to the server 23 | skip-name-resolve 24 | -------------------------------------------------------------------------------- /devdbs/rbr_state/setup.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Yelp Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, 10 | # software distributed under the License is distributed on an 11 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, either express or implied. See the License for the 13 | # specific language governing permissions and limitations 14 | # under the License. 15 | # This script comes from the base image. It starts mysqld in the background 16 | bash /code/startup.sh 17 | 18 | # Create our database and users 19 | cat setup.sql | mysql 20 | 21 | # Create tables 22 | cat tables/* | mysql yelp 23 | 24 | mysqladmin shutdown 25 | -------------------------------------------------------------------------------- /devdbs/rbr_state/setup.sql: -------------------------------------------------------------------------------- 1 | 2 | -- Setup database and users 3 | -- Reference http://y/runbook-add-new-db 4 | 5 | 6 | CREATE DATABASE yelp DEFAULT CHARACTER SET utf8; 7 | 8 | GRANT ALL ON *.* TO 'yelpdev'@'%'; 9 | 10 | use yelp; 11 | -------------------------------------------------------------------------------- /devdbs/rbr_state/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | 17 | set -eu 18 | 19 | # Convert 48bit hostname hash to a 32bit int 20 | # also skip name resolution because it causes significant delays in connecting 21 | # to the server 22 | mysqld --server-id=$((0x$(hostname) >> 16)) 23 | -------------------------------------------------------------------------------- /devdbs/rbr_state/startup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | 17 | set -e 18 | 19 | /usr/bin/mysqld_safe & 20 | 21 | # Wait for mysqld to start 22 | delay=1 23 | timeout=5 24 | while ! mysqladmin ping >/dev/null 2>&1; do 25 | timeout=$(expr $timeout - $delay) 26 | 27 | if [ $timeout -eq 0 ]; then 28 | echo "Timeout error occurred trying to start MySQL Daemon." 29 | exit 1 30 | fi 31 | sleep $delay 32 | done 33 | 34 | echo "GRANT ALL ON *.* TO mysql@'169.254.%.%' WITH GRANT OPTION; FLUSH PRIVILEGES" | mysql 35 | echo "GRANT ALL ON *.* TO mysql@'localhost' WITH GRANT OPTION; FLUSH PRIVILEGES" | mysql 36 | -------------------------------------------------------------------------------- /devdbs/rbr_state/tables/data_event_checkpoint.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE `data_event_checkpoint` ( 2 | `id` int(11) NOT NULL AUTO_INCREMENT, 3 | `kafka_topic` varchar(255) NOT NULL, 4 | `kafka_offset` int(11) NOT NULL, 5 | `cluster_name` varchar(255) NOT NULL, 6 | `time_created` int(11) NOT NULL, 7 | `time_updated` int(11) NOT NULL, 8 | PRIMARY KEY (`id`) 9 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; 10 | -------------------------------------------------------------------------------- /devdbs/rbr_state/tables/global_event_state.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE `global_event_state` ( 2 | `id` int(11) NOT NULL AUTO_INCREMENT, 3 | `position` text NOT NULL, 4 | `is_clean_shutdown` tinyint(1) DEFAULT 0 NOT NULL, 5 | `event_type` varchar(20) NOT NULL, 6 | `cluster_name` varchar(255) NOT NULL, 7 | `database_name` varchar(255), 8 | `table_name` varchar(255), 9 | `time_updated` int(11) NOT NULL, 10 | PRIMARY KEY (`id`) 11 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; 12 | -------------------------------------------------------------------------------- /devdbs/rbr_state/tables/mysql_dumps.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE `mysql_dumps` ( 2 | `cluster_name` varchar(255) NOT NULL, 3 | `database_dump` longtext NOT NULL, 4 | PRIMARY KEY (`cluster_name`) 5 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; -------------------------------------------------------------------------------- /devdbs/rbr_state/tables/schema_event_state.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE `schema_event_state` ( 2 | `id` int(11) NOT NULL AUTO_INCREMENT, 3 | `position` text NOT NULL, 4 | `status` varchar(20) NOT NULL DEFAULT 'Pending', 5 | `query` text NOT NULL, 6 | `create_table_statement` text NOT NULL, 7 | `cluster_name` varchar(255) NOT NULL, 8 | `database_name` varchar(255) NOT NULL, 9 | `table_name` varchar(255) NOT NULL, 10 | `time_created` int(11) NOT NULL, 11 | `time_updated` int(11) NOT NULL, 12 | PRIMARY KEY (`id`) 13 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; 14 | -------------------------------------------------------------------------------- /devdbs/schema_tracker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM centos:latest 2 | RUN rpm -Uhv https://www.percona.com/redir/downloads/percona-release/redhat/0.0-1/percona-release-0.0-1.x86_64.rpm 3 | 4 | RUN yum install -y Percona-Server-server-56 Percona-Server-client-56 Percona-Server-shared-56 5 | 6 | ADD . /code/ 7 | ADD my.cnf /etc/my.cnf 8 | RUN chown mysql /etc/my.cnf 9 | RUN bash /code/startup.sh 10 | RUN cd code; bash setup.sh 11 | 12 | USER mysql 13 | # Convert 48bit hostname hash to a 32bit int 14 | CMD [ \ 15 | "bash", "-c", \ 16 | "exec mysqld --server-id=$((0x$(hostname) >> 16))" \ 17 | ] 18 | -------------------------------------------------------------------------------- /devdbs/schema_tracker/my.cnf: -------------------------------------------------------------------------------- 1 | [mysqld] 2 | skip-external-locking 3 | max-allowed-packet = 16M 4 | sync-binlog = 0 5 | log-bin = /var/lib/mysql/mysql-bin.log 6 | 7 | log-warnings = 1 8 | log-warnings-suppress = 1592 9 | 10 | innodb-file-format = Barracuda 11 | innodb-file-per-table = TRUE 12 | innodb-flush-log-at-trx-commit = 0 13 | innodb-buffer-pool-size = 128M 14 | innodb-fast-shutdown = 0 15 | binlog-format = row 16 | log_slave_updates 17 | 18 | # GTID specific, needs version 5.6 of percona/mysql 19 | enforce_gtid_consistency 20 | gtid_mode = ON 21 | 22 | # DNS resolution can cause significant delays in connecting to the server 23 | skip-name-resolve 24 | -------------------------------------------------------------------------------- /devdbs/schema_tracker/setup.sh: -------------------------------------------------------------------------------- 1 | # Copyright 2016 Yelp Inc. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, 10 | # software distributed under the License is distributed on an 11 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 12 | # KIND, either express or implied. See the License for the 13 | # specific language governing permissions and limitations 14 | # under the License. 15 | # This script comes from the base image. It starts mysqld in the background 16 | bash /code/startup.sh 17 | 18 | # Create our database and users 19 | cat setup.sql | mysql 20 | 21 | # Create tables 22 | cat tables/business.sql | mysql yelp 23 | cat tables/heartbeat.sql | mysql yelp_heartbeat 24 | 25 | mysqladmin shutdown 26 | -------------------------------------------------------------------------------- /devdbs/schema_tracker/setup.sql: -------------------------------------------------------------------------------- 1 | 2 | -- Setup database and users 3 | -- Reference http://y/runbook-add-new-db 4 | 5 | 6 | CREATE DATABASE yelp DEFAULT CHARACTER SET utf8; 7 | CREATE DATABASE yelp_heartbeat DEFAULT CHARACTER SET utf8; 8 | 9 | GRANT ALL ON *.* TO 'yelpdev'@'%'; 10 | -------------------------------------------------------------------------------- /devdbs/schema_tracker/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | 17 | set -eu 18 | 19 | # Convert 48bit hostname hash to a 32bit int 20 | # also skip name resolution because it causes significant delays in connecting 21 | # to the server 22 | mysqld --server-id=$((0x$(hostname) >> 16)) 23 | -------------------------------------------------------------------------------- /devdbs/schema_tracker/startup.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | 17 | set -e 18 | 19 | /usr/bin/mysqld_safe & 20 | 21 | # Wait for mysqld to start 22 | delay=1 23 | timeout=5 24 | while ! mysqladmin ping >/dev/null 2>&1; do 25 | timeout=$(expr $timeout - $delay) 26 | 27 | if [ $timeout -eq 0 ]; then 28 | echo "Timeout error occurred trying to start MySQL Daemon." 29 | exit 1 30 | fi 31 | sleep $delay 32 | done 33 | 34 | echo "GRANT ALL ON *.* TO mysql@'169.254.%.%' WITH GRANT OPTION; FLUSH PRIVILEGES" | mysql 35 | echo "GRANT ALL ON *.* TO mysql@'localhost' WITH GRANT OPTION; FLUSH PRIVILEGES" | mysql 36 | -------------------------------------------------------------------------------- /devdbs/schema_tracker/tables/business.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE `business` ( 2 | `id` int(11) NOT NULL AUTO_INCREMENT, 3 | `acxiom_id` int(11) DEFAULT NULL, 4 | `name` varchar(64) COLLATE utf8_unicode_ci DEFAULT NULL, 5 | `address1` varchar(128) COLLATE utf8_unicode_ci DEFAULT NULL, 6 | `address2` varchar(128) COLLATE utf8_unicode_ci DEFAULT NULL, 7 | `address3` varchar(128) COLLATE utf8_unicode_ci DEFAULT NULL, 8 | `city` varchar(64) COLLATE utf8_unicode_ci DEFAULT NULL, 9 | `county` varchar(64) COLLATE utf8_unicode_ci DEFAULT NULL, 10 | `state` varchar(3) COLLATE utf8_unicode_ci DEFAULT NULL, 11 | `country` varchar(2) COLLATE utf8_unicode_ci DEFAULT NULL, 12 | `zip` varchar(12) COLLATE utf8_unicode_ci DEFAULT NULL, 13 | `phone` varchar(32) COLLATE utf8_unicode_ci DEFAULT NULL, 14 | `fax` varchar(32) COLLATE utf8_unicode_ci DEFAULT NULL, 15 | `url` varchar(255) COLLATE utf8_unicode_ci DEFAULT NULL, 16 | `email` varchar(64) COLLATE utf8_unicode_ci DEFAULT NULL, 17 | `flags` int(11) NOT NULL DEFAULT '0', 18 | `latitude` double DEFAULT NULL, 19 | `longitude` double DEFAULT NULL, 20 | `accuracy` double DEFAULT NULL, 21 | `time_created` int(11) NOT NULL DEFAULT '0', 22 | `score` double DEFAULT NULL, 23 | `rating` double DEFAULT NULL, 24 | `review_count` int(11) NOT NULL DEFAULT '0', 25 | `photo_id` int(11) DEFAULT NULL, 26 | `alias` varchar(96) COLLATE utf8_unicode_ci DEFAULT NULL, 27 | `geoquad` int(10) unsigned DEFAULT NULL, 28 | `data_source_type` tinyint(3) unsigned DEFAULT NULL, 29 | PRIMARY KEY (`id`), 30 | KEY `zip` (`zip`,`phone`), 31 | KEY `longitude` (`longitude`,`latitude`), 32 | KEY `phone` (`phone`), 33 | KEY `review_count` (`review_count`), 34 | KEY `geoquad` (`geoquad`) 35 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; 36 | -------------------------------------------------------------------------------- /devdbs/schema_tracker/tables/heartbeat.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE `replication_heartbeat` ( 2 | `serial` bigint(20) NOT NULL, 3 | `timestamp` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP 4 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; 5 | 6 | INSERT INTO `replication_heartbeat` (`serial`) VALUES (0); 7 | -------------------------------------------------------------------------------- /docker-compose-opensource.yml: -------------------------------------------------------------------------------- 1 | replicationhandler: 2 | build: . 3 | image: $DOCKER_TAG 4 | links: 5 | - kafka 6 | - zookeeper:zk 7 | - rbrsource 8 | - schematracker 9 | - rbrstate 10 | - schematizer 11 | volumes_from: 12 | - replicationhandlerconfigs 13 | command: /code/virtualenv_run/bin/pypy /code/replication_handler/batch/parse_replication_stream.py 14 | environment: 15 | - SERVICE_ENV_CONFIG_PATH=config-env-itest-opensource.yaml 16 | replicationhandlerconfigs: 17 | build: acceptance/configs/ 18 | # Replication Handler databases 19 | rbrsource: 20 | build: devdbs/rbr_source_with_gtid/ 21 | schematracker: 22 | build: devdbs/schema_tracker/ 23 | rbrstate: 24 | build: devdbs/rbr_state/ 25 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | replicationhandler: 2 | build: . 3 | image: $DOCKER_TAG 4 | links: 5 | - kafka 6 | - kafkadiscovery 7 | - zookeeper:zk 8 | - rbrsource 9 | - schematracker 10 | - rbrstate 11 | - schematizer 12 | volumes_from: 13 | - replicationhandlerconfigs 14 | command: /code/virtualenv_run/bin/pypy /code/replication_handler/batch/parse_replication_stream_internal.py -vv --no-notification 15 | environment: 16 | - SERVICE_ENV_CONFIG_PATH=config-env-itest.yaml 17 | replicationhandlerchangelog: 18 | build: . 19 | image: $DOCKER_TAG 20 | links: 21 | - kafka 22 | - kafkadiscovery 23 | - zookeeper:zk 24 | - rbrsource 25 | - schematracker 26 | - rbrstate 27 | - schematizer 28 | volumes_from: 29 | - replicationhandlerconfigs 30 | command: /code/virtualenv_run/bin/pypy /code/replication_handler/batch/parse_replication_stream_internal.py -vv --no-notification 31 | environment: 32 | - SERVICE_ENV_CONFIG_PATH=config-env-changelog-itest.yaml 33 | replicationhandlerconfigs: 34 | build: acceptance/configs/ 35 | # Replication Handler databases 36 | rbrsource: 37 | build: devdbs/rbr_source/ 38 | schematracker: 39 | build: devdbs/schema_tracker/ 40 | rbrstate: 41 | build: devdbs/rbr_state/ 42 | 43 | kafkadiscovery: 44 | image: docker-dev.yelpcorp.com/kafka-discovery-testing:latest 45 | expose: 46 | - "8888" 47 | volumes_from: 48 | - kafkadiscoveryconfigs 49 | links: 50 | - kafka 51 | - zookeeper 52 | 53 | kafkadiscoveryconfigs: 54 | image: docker-dev.yelpcorp.com/kafka-discovery-configs-testing:latest 55 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | .. Replication Handler documentation master file, created by 2 | sphinx-quickstart on Thu Mar 12 21:49:04 2015. 3 | You can adapt this file completely to your liking, but it should at least 4 | contain the root `toctree` directive. 5 | 6 | Welcome to Replication Handler's documentation! 7 | =============================================== 8 | 9 | Contents: 10 | 11 | .. toctree:: 12 | :maxdepth: 2 13 | 14 | 15 | 16 | Indices and tables 17 | ================== 18 | 19 | * :ref:`genindex` 20 | * :ref:`modindex` 21 | * :ref:`search` 22 | 23 | -------------------------------------------------------------------------------- /fig.yml: -------------------------------------------------------------------------------- 1 | rbrsource: 2 | build: devdbs/rbr_source/ 3 | 4 | schematracker: 5 | build: devdbs/schema_tracker/ 6 | 7 | rbrstate: 8 | build: devdbs/rbr_state/ 9 | 10 | replicationhandlerconfigs: 11 | build: acceptance/configs/ 12 | volumes: 13 | - /nail/srv/configs 14 | 15 | replicationhandlerservice: 16 | build: . 17 | links: 18 | - rbrsource 19 | - schematracker 20 | - rbrstate 21 | volumes_from: 22 | - replicationhandlerconfigs 23 | -------------------------------------------------------------------------------- /interactive_streamer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | import os 20 | import subprocess 21 | from contextlib import contextmanager 22 | 23 | from data_pipeline.testing_helpers.containers import Containers 24 | 25 | from replication_handler.environment_configs import is_envvar_set 26 | from replication_handler.testing_helper.util import db_health_check 27 | from replication_handler.testing_helper.util import replication_handler_health_check 28 | 29 | 30 | class InteractiveStreamer(object): 31 | def __init__(self): 32 | pass 33 | 34 | @property 35 | def gtid_enabled(self): 36 | if is_envvar_set('OPEN_SOURCE_MODE'): 37 | return True 38 | else: 39 | return False 40 | 41 | @property 42 | def compose_file(self): 43 | return os.path.abspath( 44 | os.path.join( 45 | os.path.split( 46 | os.path.dirname(__file__) 47 | )[0], 48 | "docker-compose-opensource.yml" 49 | if is_envvar_set('OPEN_SOURCE_MODE') else "docker-compose.yml" 50 | ) 51 | ) 52 | 53 | @property 54 | def services(self): 55 | return [ 56 | 'replicationhandler', 57 | 'rbrsource', 58 | 'schematracker', 59 | 'rbrstate' 60 | ] 61 | 62 | @property 63 | def dbs(self): 64 | return ["rbrsource", "schematracker", "rbrstate"] 65 | 66 | @contextmanager 67 | def setup_containers(self): 68 | with Containers(self.compose_file, self.services) as self.containers: 69 | for db in self.dbs: 70 | db_health_check(containers=self.containers, db_name=db, timeout_seconds=120) 71 | replication_handler_health_check( 72 | containers=self.containers, 73 | rbrsource='rbrsource', 74 | schematracker='schematracker', 75 | timeout_seconds=120 76 | ) 77 | yield 78 | 79 | def _tmux_send_keys(self, paneid, cmd): 80 | subprocess.call('tmux send-keys -t {} "{}" C-m'.format(paneid, cmd), shell=True) 81 | 82 | def setup_rh_logs(self, pane_id): 83 | container_info = Containers.get_container_info(self.containers.project, 'replicationhandler') 84 | self._tmux_send_keys(pane_id, 'docker logs -f {}'.format(container_info.get('Id'))) 85 | 86 | def setup_kafka_tailer(self, pane_id): 87 | kafka_container_info = Containers.get_container_info(self.containers.project, 'kafka') 88 | zk_ip_address = Containers.get_container_ip_address(self.containers.project, 'zookeeper') 89 | self._tmux_send_keys(pane_id, "docker exec -it {} bash".format(kafka_container_info.get('Id'))) 90 | self._tmux_send_keys( 91 | pane_id, 92 | "/opt/kafka_2.10-0.8.2.1/bin/kafka-console-consumer.sh --from-beginning --zookeeper {}:2181 --blacklist None".format(zk_ip_address) 93 | ) 94 | 95 | def setup_mysql_shell(self, pane_id): 96 | ip_address = Containers.get_container_ip_address(self.containers.project, 'rbrsource') 97 | self._tmux_send_keys(pane_id, 'mysql -uyelpdev -h{} --database=yelp'.format(ip_address)) 98 | 99 | @contextmanager 100 | def setup_tmux(self): 101 | subprocess.call('tmux new-session -d', shell=True) 102 | subprocess.call('tmux set -g mouse-select-pane on', shell=True) 103 | 104 | subprocess.call('tmux split-window -d -t 0 -v', shell=True) 105 | subprocess.call('tmux split-window -d -t 0 -h', shell=True) 106 | 107 | self.setup_kafka_tailer('0') 108 | self.setup_rh_logs('1') 109 | self.setup_mysql_shell('2') 110 | yield 111 | 112 | if __name__ == "__main__": 113 | streamer = InteractiveStreamer() 114 | with streamer.setup_containers(), streamer.setup_tmux(): 115 | subprocess.call('tmux attach', shell=True) 116 | pass 117 | -------------------------------------------------------------------------------- /logs/.placeholder: -------------------------------------------------------------------------------- 1 | Do not remove me until DATAPIPE-790 is resolved. 2 | -------------------------------------------------------------------------------- /replication_handler/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yelp/mysql_streamer/b88183ac00b88f5dff9c01ad87a46da9e3615d9e/replication_handler/__init__.py -------------------------------------------------------------------------------- /replication_handler/batch/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | -------------------------------------------------------------------------------- /replication_handler/batch/mysql_heartbeat_search.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | import optparse 20 | 21 | import yaml 22 | from yelp_batch import Batch 23 | from yelp_batch.batch import batch_command_line_options 24 | from yelp_batch.batch import batch_configure 25 | 26 | from replication_handler.components.heartbeat_searcher import DBConfig 27 | from replication_handler.components.heartbeat_searcher import HeartbeatSearcher 28 | 29 | 30 | # TODO(justinc|DATAPIPE-2098) Add tests for this batch 31 | class MySQLHeartbeatSearchBatch(Batch): 32 | """Batch which runs the heartbeat searcher component from the command line. 33 | Useful for manual testing. 34 | 35 | To use from the command line: 36 | python -m replication_handler.batch.mysql_heartbeat_search \ 37 | {heartbeat_timestamp} {heartbeat_serial} 38 | Note that the heartbeat_timestamp should be utc timestamp, eg, 1447354877 39 | Prints information about the heartbeat or None if the heartbeat could 40 | not be found. 41 | """ 42 | 43 | notify_emails = [ 44 | "bam+replication+handler@yelp.com" 45 | ] 46 | 47 | def run(self): 48 | """Runs the batch by calling out to the heartbeat searcher component""" 49 | print HeartbeatSearcher( 50 | db_config=self.db_config 51 | ).get_position(self.hb_timestamp, self.hb_serial) 52 | 53 | @batch_command_line_options 54 | def parse_options(self, option_parser): 55 | option_parser.set_usage("%prog [options] HEARTBEAT_TIMESTAMP HEARTBEAT_SERIAL") 56 | opt_group = optparse.OptionGroup(option_parser, "DB Options") 57 | opt_group.add_option( 58 | '--topology-file', 59 | default='/nail/srv/configs/topology.yaml', 60 | help='Path to topology file. Default is %default.', 61 | ) 62 | opt_group.add_option( 63 | '--cluster', 64 | default='refresh_primary', 65 | help='Topology cluster to connect to. Default is %default.', 66 | ) 67 | opt_group.add_option( 68 | '--replica', 69 | default='master', 70 | help='Replica to connect to. Default is %default.', 71 | ) 72 | 73 | return opt_group 74 | 75 | @batch_configure 76 | def configure(self): 77 | if len(self.args) != 2: 78 | self.option_parser.error( 79 | "Two arguments are required, HEARTBEAT_TIMESTAMP and " 80 | "HEARTBEAT_SERIAL. See --help." 81 | ) 82 | self.hb_timestamp, self.hb_serial = [int(a) for a in self.args] 83 | self.db_config = self._get_db_config( 84 | self.options.topology_file, 85 | self.options.cluster, 86 | self.options.replica, 87 | ) 88 | if not self.db_config: 89 | self.option_parser.error( 90 | "Cluster and replica couldn't be found in topology file" 91 | ) 92 | 93 | def _get_db_config(self, topology_file, cluster, replica): 94 | # Parsing here, because we don't want to pull the topology parser in 95 | # from yelp_conn because of OS 96 | topology = yaml.load(file(topology_file, 'r')) 97 | for topo_item in topology.get('topology'): 98 | if ( 99 | topo_item.get('cluster') == cluster and 100 | topo_item.get('replica') == replica 101 | ): 102 | entry = topo_item['entries'][0] 103 | return DBConfig( 104 | user=entry['user'], 105 | host=entry['host'], 106 | port=entry['port'], 107 | passwd=entry['passwd'], 108 | db=entry['db'], 109 | ) 110 | 111 | if __name__ == '__main__': 112 | MySQLHeartbeatSearchBatch().start() 113 | -------------------------------------------------------------------------------- /replication_handler/batch/parse_replication_stream.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | import logging 20 | 21 | from replication_handler.batch.base_parse_replication_stream import BaseParseReplicationStream 22 | 23 | 24 | CONSOLE_FORMAT = '%(asctime)s - %(name)-12s:%(lineno)d: %(levelname)-8s %(message)s' 25 | 26 | 27 | class ParseReplicationStream(BaseParseReplicationStream): 28 | 29 | def __init__(self): 30 | # setup logging before doing anything else to ensure 31 | # we dont miss any logs. 32 | self.setup_console_logging() 33 | super(ParseReplicationStream, self).__init__() 34 | 35 | def setup_console_logging(self): 36 | self.setup_logger( 37 | logger=None, 38 | handler=logging.StreamHandler(), 39 | log_level=logging.DEBUG, 40 | formatter=logging.Formatter(CONSOLE_FORMAT)) 41 | 42 | def setup_logger(self, logger, handler, log_level, formatter=None): 43 | """Setup a logger by attaching a handler, and optionally setting a formatter 44 | and log_level for the handler. 45 | 46 | :param logger: name of the logger 47 | :param handler: a :class:`logging.Handler` to attach to the logger 48 | :param log_level: the logging level to set on the handler 49 | :param formatter: a :class:`logging.Formatter` to attach to the handler 50 | """ 51 | if formatter is not None: 52 | handler.setFormatter(formatter) 53 | 54 | logger_obj = logging.getLogger() 55 | # reduce the logger log level when necessary based on handlers need 56 | logger_obj.setLevel(min(logger_obj.getEffectiveLevel(), log_level)) 57 | handler.setLevel(log_level) 58 | logging.getLogger(logger).addHandler(handler) 59 | 60 | 61 | if __name__ == '__main__': 62 | ParseReplicationStream().run() 63 | -------------------------------------------------------------------------------- /replication_handler/components/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | -------------------------------------------------------------------------------- /replication_handler/components/base_binlog_stream_reader_wrapper.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | from collections import deque 20 | 21 | 22 | class BaseBinlogStreamReaderWrapper(object): 23 | """ This class is base class which implements peek/pop function, and subclass needs 24 | to implement _refill_current_events and use self.current_events as a buffer. 25 | """ 26 | 27 | def __init__(self): 28 | self.current_events = deque() 29 | 30 | def peek(self): 31 | """ Peek at the next event without actually taking it out of the stream. 32 | """ 33 | while not self.current_events: 34 | self._refill_current_events() 35 | return self.current_events[0] 36 | 37 | def pop(self): 38 | """ Takes the next event out from the stream, and return that event. 39 | Note that each data event contains exactly one row. 40 | """ 41 | while not self.current_events: 42 | self._refill_current_events() 43 | return self.current_events.popleft() 44 | 45 | def _refill_current_events(self): 46 | raise NotImplementedError 47 | 48 | def _seek(self): 49 | raise NotImplementedError 50 | -------------------------------------------------------------------------------- /replication_handler/components/base_event_handler.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | import logging 20 | from collections import namedtuple 21 | 22 | from replication_handler.config import env_config 23 | 24 | 25 | Table = namedtuple('Table', ('cluster_name', 'database_name', 'table_name')) 26 | 27 | 28 | log = logging.getLogger('replication_handler.component.base_event_handler') 29 | 30 | 31 | class BaseEventHandler(object): 32 | """ Base class for handling binlog events for the Replication Handler 33 | 34 | Args: 35 | producer(data_pipe.producer.Producer object): producer object from data pipeline 36 | clientlib, since both schema and data event handling involve publishing. 37 | schema_wrapper(SchemaWrapper object): a wrapper for communication with schematizer. 38 | stats_counter(StatsCounter object): a wrapper for communication with meteorite. 39 | """ 40 | 41 | def __init__(self, db_connections, producer, schema_wrapper, stats_counter=None): 42 | self.db_connections = db_connections 43 | self.schema_wrapper = schema_wrapper 44 | self.producer = producer 45 | self.stats_counter = stats_counter 46 | 47 | def handle_event(self, event, position): 48 | """ All subclasses need to define how they want to handle an evnet.""" 49 | raise NotImplementedError 50 | 51 | def is_blacklisted(self, event, schema): 52 | if schema in env_config.schema_blacklist: 53 | self.log_blacklisted_schema(event, schema) 54 | return True 55 | return False 56 | 57 | def log_blacklisted_schema(self, event, schema): 58 | log.info( 59 | "Skipping {event}, reason: schema: {schema} is blacklisted.".format( 60 | event=str(type(event)), 61 | schema=schema 62 | ) 63 | ) 64 | -------------------------------------------------------------------------------- /replication_handler/components/change_log_data_event_handler.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | import logging 20 | import os 21 | 22 | import yaml 23 | from cached_property import cached_property 24 | 25 | from replication_handler import config 26 | from replication_handler.components.data_event_handler import DataEventHandler 27 | from replication_handler.components.schema_wrapper import SchemaWrapperEntry 28 | from replication_handler.util.change_log_message_builder import ChangeLogMessageBuilder 29 | 30 | 31 | log = logging.getLogger(__name__) 32 | 33 | CURR_FILEPATH = os.path.dirname(__file__) 34 | CHANGELOG_SCHEMANAME = config.env_config.changelog_schemaname 35 | SCHEMA_FILEPATH = os.path.join( 36 | CURR_FILEPATH, '../schema/{}.yaml'.format(CHANGELOG_SCHEMANAME)) 37 | OWNER_EMAIL = 'distsys-data+changelog@yelp.com' 38 | 39 | 40 | class ChangeLogDataEventHandler(DataEventHandler): 41 | """Handles data change events: add, update and delete""" 42 | 43 | def __init__(self, *args, **kwargs): 44 | super(ChangeLogDataEventHandler, self).__init__(*args, **kwargs) 45 | self.schema_wrapper_entry = SchemaWrapperEntry( 46 | schema_id=self.schema_id, transformation_map={}) 47 | 48 | @cached_property 49 | def schema_id(self): 50 | schematizer = self.schema_wrapper.schematizer_client 51 | with open(SCHEMA_FILEPATH, 'r') as schema_file: 52 | schema_dict = yaml.load(schema_file.read()) 53 | schema = schematizer.register_schema_from_schema_json( 54 | namespace=schema_dict['namespace'], 55 | source=schema_dict['name'], 56 | schema_json=schema_dict, 57 | source_owner_email=OWNER_EMAIL, 58 | contains_pii=False, 59 | ) 60 | return schema.schema_id 61 | 62 | def handle_event(self, event, position): 63 | """Make sure that the schema wrapper has the table, publish to Kafka. 64 | """ 65 | if self.is_blacklisted(event, event.schema): 66 | return 67 | self._handle_row(self.schema_wrapper_entry, event, position) 68 | 69 | def _handle_row(self, schema_wrapper_entry, event, position): 70 | builder = ChangeLogMessageBuilder( 71 | schema_wrapper_entry, 72 | event, 73 | self.transaction_id_schema_id, 74 | position, 75 | self.register_dry_run 76 | ) 77 | message = builder.build_message( 78 | self.db_connections.source_cluster_name, 79 | ) 80 | self.producer.publish(message) 81 | if self.stats_counter: 82 | self.stats_counter.increment(event.table) 83 | -------------------------------------------------------------------------------- /replication_handler/components/data_event_handler.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | import logging 20 | 21 | from replication_handler.components.base_event_handler import BaseEventHandler 22 | from replication_handler.components.base_event_handler import Table 23 | from replication_handler.util.message_builder import MessageBuilder 24 | from replication_handler.util.misc import get_transaction_id_schema_id 25 | 26 | 27 | log = logging.getLogger('replication_handler.parse_replication_stream') 28 | 29 | 30 | class DataEventHandler(BaseEventHandler): 31 | """Handles data change events: add, update and delete""" 32 | 33 | def __init__(self, *args, **kwargs): 34 | self.register_dry_run = kwargs.pop('register_dry_run') 35 | self.transaction_id_schema_id = get_transaction_id_schema_id( 36 | kwargs.pop('gtid_enabled') 37 | ) 38 | super(DataEventHandler, self).__init__(*args, **kwargs) 39 | 40 | def handle_event(self, event, position): 41 | """Make sure that the schema wrapper has the table, publish to Kafka. 42 | """ 43 | if self.is_blacklisted(event, event.schema): 44 | return 45 | schema_wrapper_entry = self._get_payload_schema( 46 | Table( 47 | cluster_name=self.db_connections.source_cluster_name, 48 | database_name=event.schema, 49 | table_name=event.table 50 | ) 51 | ) 52 | self._handle_row(schema_wrapper_entry, event, position) 53 | 54 | def _handle_row(self, schema_wrapper_entry, event, position): 55 | builder = MessageBuilder( 56 | schema_wrapper_entry, 57 | event, 58 | self.transaction_id_schema_id, 59 | position, 60 | self.register_dry_run 61 | ) 62 | message = builder.build_message( 63 | self.db_connections.source_cluster_name 64 | ) 65 | self.producer.publish(message) 66 | if self.stats_counter: 67 | self.stats_counter.increment(event.table) 68 | 69 | def _get_payload_schema(self, table): 70 | """Get payload avro schema from schema wrapper or from schema store""" 71 | return self.schema_wrapper[table] 72 | -------------------------------------------------------------------------------- /replication_handler/components/mysql_tools.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | import logging 20 | import os 21 | import uuid 22 | from subprocess import Popen 23 | 24 | from replication_handler.util.misc import delete_file_if_exists 25 | 26 | 27 | logger = logging.getLogger('replication_handler.components.mysql_tools') 28 | EMPTY_WAITING_OPTIONS = 0 29 | 30 | 31 | def restore_mysql_dump(db_creds, dump_file): 32 | restore_cmd = "mysql --host={h} --port={p} --user={u} --password={pa} < {dump_file_path}".format( 33 | h=db_creds['host'], 34 | p=db_creds['port'], 35 | u=db_creds['user'], 36 | pa=db_creds['passwd'], 37 | dump_file_path=dump_file 38 | ) 39 | 40 | logger.info("Running restore on host {h} as user {u}".format( 41 | h=db_creds['host'], 42 | u=db_creds['user'] 43 | )) 44 | p = Popen(restore_cmd, shell=True) 45 | os.waitpid(p.pid, EMPTY_WAITING_OPTIONS) 46 | 47 | 48 | def create_mysql_dump(db_creds, databases): 49 | temp_file = _get_dump_file() 50 | dump_cmd = "mysqldump --host={} --port={} --user={} --password={} {} {} {} {} --databases {} > {}".format( 51 | db_creds['host'], 52 | db_creds['port'], 53 | db_creds['user'], 54 | db_creds['passwd'], 55 | '--no-data', 56 | '--single-transaction', 57 | '--add-drop-database', 58 | '--add-drop-table', 59 | databases, 60 | temp_file 61 | ) 62 | logger.info("Running mysqldump to create dump of {db}".format( 63 | db=databases 64 | )) 65 | p = Popen(dump_cmd, shell=True) 66 | os.waitpid(p.pid, EMPTY_WAITING_OPTIONS) 67 | mysql_dump = _read_dump_content(temp_file) 68 | delete_file_if_exists(temp_file) 69 | return mysql_dump 70 | 71 | 72 | def _get_dump_file(): 73 | rand = uuid.uuid1().hex 74 | return "mysql_dump.{}".format(rand) 75 | 76 | 77 | def _read_dump_content(dump_file): 78 | with open(dump_file, 'r') as f: 79 | content = f.read() 80 | return content 81 | 82 | 83 | def _write_dump_content(dump_file, content): 84 | with open(dump_file, 'w') as f: 85 | f.write(content) 86 | -------------------------------------------------------------------------------- /replication_handler/components/position_finder.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | import logging 20 | 21 | from replication_handler.util.position import construct_position 22 | from replication_handler.util.position import GtidPosition 23 | from replication_handler.util.position import LogPosition 24 | 25 | 26 | log = logging.getLogger('replication_handler.components.position_finder') 27 | 28 | 29 | class PositionFinder(object): 30 | """ This class uses the saved state info from db to figure out 31 | a postion for binlog stream reader to resume tailing. 32 | 33 | Args: 34 | global_event_state(GlobalEventState object): stores the global state, including 35 | position information. 36 | """ 37 | 38 | def __init__(self, gtid_enabled, global_event_state): 39 | self.gtid_enabled = gtid_enabled 40 | self.global_event_state = global_event_state 41 | 42 | def get_position_to_resume_tailing_from(self): 43 | if self.global_event_state: 44 | return construct_position(self.global_event_state.position) 45 | return GtidPosition() if self.gtid_enabled else LogPosition() 46 | -------------------------------------------------------------------------------- /replication_handler/components/replication_stream_restarter.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | import copy 20 | import logging 21 | 22 | from replication_handler.components.position_finder import PositionFinder 23 | from replication_handler.components.recovery_handler import RecoveryHandler 24 | from replication_handler.components.simple_binlog_stream_reader_wrapper import SimpleBinlogStreamReaderWrapper 25 | from replication_handler.models.global_event_state import GlobalEventState 26 | 27 | 28 | log = logging.getLogger('replication_handler.components.replication_stream_restarter') 29 | 30 | 31 | class ReplicationStreamRestarter(object): 32 | """ This class delegates the restarting process of replication stream. 33 | including put stream to a saved position, and perform recovery procedure 34 | if needed. 35 | 36 | Args: 37 | db_connections(BaseConnection object): a wrapper for communication with mysql db. 38 | schema_wrapper(SchemaWrapper object): a wrapper for communication with schematizer. 39 | """ 40 | 41 | def __init__(self, db_connections, schema_wrapper, activate_mysql_dump_recovery, gtid_enabled=False): 42 | # global_event_state is information about 43 | # last shutdown, we need it to do recovery process. 44 | self.db_connections = db_connections 45 | self.global_event_state = self._get_global_event_state( 46 | self.db_connections.source_cluster_name 47 | ) 48 | self.position_finder = PositionFinder( 49 | gtid_enabled, 50 | self.global_event_state 51 | ) 52 | self.schema_wrapper = schema_wrapper 53 | self.activate_mysql_dump_recovery = activate_mysql_dump_recovery 54 | self.gtid_enabled = gtid_enabled 55 | 56 | def restart(self, producer, register_dry_run=True, changelog_mode=False): 57 | """ This function retrive the saved position from database, and init 58 | stream with that position, and perform recovery procedure, like recreating 59 | tables, or publish unpublished messages. 60 | 61 | register_dry_run(boolean): whether a schema has to be registered for a message to be published. 62 | changelog_mode(boolean): If True, executes change_log flow (default: false) 63 | """ 64 | position = self.position_finder.get_position_to_resume_tailing_from() 65 | log.info("Restarting replication: %s" % repr(position)) 66 | self.stream = SimpleBinlogStreamReaderWrapper( 67 | source_database_config=self.db_connections.source_database_config, 68 | tracker_database_config=self.db_connections.tracker_database_config, 69 | position=position, 70 | gtid_enabled=self.gtid_enabled 71 | ) 72 | log.info("Created replication stream.") 73 | if self.global_event_state: 74 | recovery_handler = RecoveryHandler( 75 | stream=self.stream, 76 | producer=producer, 77 | schema_wrapper=self.schema_wrapper, 78 | db_connections=self.db_connections, 79 | is_clean_shutdown=self.global_event_state.is_clean_shutdown, 80 | register_dry_run=register_dry_run, 81 | changelog_mode=changelog_mode, 82 | gtid_enabled=self.gtid_enabled 83 | ) 84 | 85 | if recovery_handler.need_recovery: 86 | log.info("Recovery required, starting recovery process") 87 | recovery_handler.recover() 88 | 89 | def get_stream(self): 90 | """ This function returns the replication stream""" 91 | return self.stream 92 | 93 | def _get_global_event_state(self, cluster_name): 94 | with self.db_connections.state_session.connect_begin(ro=True) as session: 95 | return copy.copy( 96 | GlobalEventState.get( 97 | session, 98 | cluster_name=cluster_name, 99 | ) 100 | ) 101 | -------------------------------------------------------------------------------- /replication_handler/environment_configs.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | import os 20 | from distutils.util import strtobool 21 | 22 | 23 | def is_envvar_set(envvar): 24 | return strtobool(os.getenv(envvar, 'false')) 25 | -------------------------------------------------------------------------------- /replication_handler/helpers/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | -------------------------------------------------------------------------------- /replication_handler/helpers/dates.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | import datetime 20 | import sys 21 | import time 22 | 23 | from six import integer_types 24 | 25 | 26 | def to_timestamp(datetime_val): 27 | if datetime_val is None: 28 | return None 29 | 30 | # If we don't have full datetime granularity, translate 31 | if isinstance(datetime_val, datetime.datetime): 32 | datetime_val_date = datetime_val.date() 33 | else: 34 | datetime_val_date = datetime_val 35 | 36 | if datetime_val_date >= datetime.date.max: 37 | return sys.maxsize 38 | 39 | return int(time.mktime(datetime_val.timetuple())) 40 | 41 | 42 | def get_datetime(t, preserve_max=False): 43 | try: 44 | return to_datetime(t, preserve_max=preserve_max) 45 | except ValueError: 46 | return None 47 | 48 | 49 | def to_datetime(value, preserve_max=False): 50 | if value is None: 51 | return None 52 | if isinstance(value, datetime.datetime): 53 | return value 54 | elif isinstance(value, datetime.date): 55 | return date_to_datetime(value, preserve_max=preserve_max) 56 | elif isinstance(value, float) or isinstance(value, integer_types): 57 | return from_timestamp(value) 58 | raise ValueError("Can't convert %r to a datetime" % (value,)) 59 | 60 | 61 | def from_timestamp(timestamp_val): 62 | if timestamp_val is None: 63 | return None 64 | return datetime.datetime.fromtimestamp(timestamp_val) 65 | 66 | 67 | def date_to_datetime(dt, preserve_max=False): 68 | if preserve_max and datetime.date.max == dt: 69 | return datetime.datetime.max 70 | return datetime.datetime(*dt.timetuple()[:3]) 71 | 72 | 73 | def default_now(context): 74 | return datetime.datetime.now().replace(microsecond=0) 75 | -------------------------------------------------------------------------------- /replication_handler/helpers/lists.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | """ 17 | Utility methods for manipulating lists. 18 | """ 19 | from __future__ import absolute_import 20 | from __future__ import unicode_literals 21 | 22 | 23 | def unlist(a_list): 24 | """Convert the (possibly) single item list into a single item""" 25 | if len(a_list) > 1: 26 | raise ValueError(len(a_list)) 27 | 28 | if len(a_list) == 0: 29 | return None 30 | else: 31 | return a_list[0] 32 | -------------------------------------------------------------------------------- /replication_handler/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yelp/mysql_streamer/b88183ac00b88f5dff9c01ad87a46da9e3615d9e/replication_handler/models/__init__.py -------------------------------------------------------------------------------- /replication_handler/models/connections/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yelp/mysql_streamer/b88183ac00b88f5dff9c01ad87a46da9e3615d9e/replication_handler/models/connections/__init__.py -------------------------------------------------------------------------------- /replication_handler/models/connections/base_connection.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | from contextlib import contextmanager 20 | 21 | import yaml 22 | 23 | 24 | class BaseConnection(object): 25 | 26 | def __init__( 27 | self, 28 | topology_path, 29 | source_cluster_name, 30 | tracker_cluster_name, 31 | state_cluster_name, 32 | source_cluster_topology_name=None, 33 | ): 34 | self.topology = yaml.load( 35 | file(topology_path, 'r') 36 | ) 37 | 38 | self.source_cluster_name = source_cluster_name 39 | self.source_cluster_topology_name = source_cluster_topology_name 40 | self.tracker_cluster_name = tracker_cluster_name 41 | self.state_cluster_name = state_cluster_name 42 | 43 | self.source_database_config = self._get_cluster_config( 44 | self.get_source_database_topology_key() 45 | ) 46 | self.tracker_database_config = self._get_cluster_config( 47 | self.tracker_cluster_name 48 | ) 49 | self.state_database_config = self._get_cluster_config( 50 | self.state_cluster_name 51 | ) 52 | 53 | self.set_sessions() 54 | 55 | def __del__(self): 56 | self.topology = {} 57 | 58 | def set_sessions(self): 59 | self._set_source_session() 60 | self._set_tracker_session() 61 | self._set_state_session() 62 | 63 | @property 64 | def source_session(self): 65 | return self._source_session 66 | 67 | @property 68 | def tracker_session(self): 69 | return self._tracker_session 70 | 71 | @property 72 | def state_session(self): 73 | return self._state_session 74 | 75 | def _set_source_session(self): 76 | raise NotImplementedError 77 | 78 | def _set_tracker_session(self): 79 | raise NotImplementedError 80 | 81 | def _set_state_session(self): 82 | raise NotImplementedError 83 | 84 | @contextmanager 85 | def get_tracker_cursor(self): 86 | raise NotImplementedError 87 | 88 | @contextmanager 89 | def get_state_cursor(self): 90 | raise NotImplementedError 91 | 92 | @contextmanager 93 | def get_source_cursor(self): 94 | raise NotImplementedError 95 | 96 | def get_source_database_topology_key(self): 97 | """This is used so that the name of the source cluster can differ from 98 | the key used to identify the cluster inside of the topology. This is 99 | necessary to support changing the underlying cluster that the 100 | replication handler would point to. 101 | """ 102 | if self.source_cluster_topology_name: 103 | return self.source_cluster_topology_name 104 | else: 105 | return self.source_cluster_name 106 | 107 | def _get_cluster_config(self, cluster_name): 108 | for topo_item in self.topology.get('topology'): 109 | if topo_item.get('cluster') == cluster_name: 110 | return topo_item['entries'][0] 111 | raise ValueError("Database configuration for {cluster_name} not found.".format( 112 | cluster_name=cluster_name)) 113 | -------------------------------------------------------------------------------- /replication_handler/models/connections/rh_connection.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | from contextlib import contextmanager 20 | 21 | import MySQLdb 22 | from sqlalchemy import create_engine 23 | from sqlalchemy.orm import sessionmaker 24 | from sqlalchemy.orm.scoping import ScopedSession 25 | 26 | from replication_handler.models.connections.base_connection import BaseConnection 27 | 28 | 29 | class RHConnection(BaseConnection): 30 | 31 | def _set_source_session(self): 32 | self._source_session = _RHScopedSession(sessionmaker( 33 | bind=self._get_engine(self.source_database_config)) 34 | ) 35 | 36 | def _set_tracker_session(self): 37 | self._tracker_session = _RHScopedSession(sessionmaker( 38 | bind=self._get_engine(self.tracker_database_config)) 39 | ) 40 | 41 | def _set_state_session(self): 42 | self._state_session = _RHScopedSession(sessionmaker( 43 | bind=self._get_engine(self.state_database_config)) 44 | ) 45 | 46 | @contextmanager 47 | def get_tracker_cursor(self): 48 | connection = self._get_connection(self.tracker_database_config) 49 | cursor = connection.cursor() 50 | yield cursor 51 | cursor.close() 52 | connection.close() 53 | 54 | @contextmanager 55 | def get_state_cursor(self): 56 | connection = self._get_connection(self.state_database_config) 57 | cursor = connection.cursor() 58 | yield cursor 59 | cursor.close() 60 | connection.close() 61 | 62 | @contextmanager 63 | def get_source_cursor(self): 64 | connection = self._get_connection(self.source_database_config) 65 | cursor = connection.cursor() 66 | yield cursor 67 | cursor.close() 68 | connection.close() 69 | 70 | def _get_connection(self, config): 71 | return MySQLdb.connect( 72 | host=config['host'], 73 | user=config['user'], 74 | passwd=config['passwd'], 75 | port=config['port'] 76 | ) 77 | 78 | def _get_engine(self, config): 79 | return create_engine( 80 | 'mysql://{db_user}@{db_host}:{port}/{db_database}'.format( 81 | db_user=config['user'], 82 | db_host=config['host'], 83 | db_database=config['db'], 84 | port=config['port'] 85 | ) 86 | ) 87 | 88 | 89 | class _RHScopedSession(ScopedSession): 90 | """ This is a wrapper over sqlalchamy ScopedSession that 91 | that does sql operations in a context manager. Commits 92 | happens on exit of context manager, rollback if there 93 | is an exception inside the context manager. Safely close the 94 | session in the end. 95 | """ 96 | @contextmanager 97 | def connect_begin(self, *args, **kwargs): 98 | session = self() 99 | try: 100 | yield session 101 | session.commit() 102 | except: 103 | session.rollback() 104 | raise 105 | finally: 106 | session.close() 107 | self.remove() 108 | -------------------------------------------------------------------------------- /replication_handler/models/connections/yelp_conn_connection.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | from contextlib import contextmanager 20 | 21 | import yelp_conn 22 | from yelp_conn.connection_set import ConnectionSet 23 | from yelp_conn.session import scoped_session 24 | from yelp_conn.session import sessionmaker 25 | 26 | from replication_handler.models.connections.base_connection import BaseConnection 27 | 28 | 29 | class YelpConnConnection(BaseConnection): 30 | 31 | def __init__(self, *args, **kwargs): 32 | yelp_conn.reset_module() 33 | yelp_conn.initialize() 34 | super(YelpConnConnection, self).__init__(*args, **kwargs) 35 | 36 | def _set_source_session(self): 37 | self._source_session = scoped_session( 38 | sessionmaker(slave_connection_set_name=str("rbr_source_ro")) 39 | ) 40 | 41 | def _set_tracker_session(self): 42 | self._tracker_session = scoped_session( 43 | sessionmaker(master_connection_set_name=str("schema_tracker_rw")) 44 | ) 45 | 46 | def _set_state_session(self): 47 | self._state_session = scoped_session( 48 | sessionmaker( 49 | master_connection_set_name=str("rbr_state_rw"), 50 | slave_connection_set_name=str("rbr_state_ro") 51 | ) 52 | ) 53 | 54 | @contextmanager 55 | def get_source_cursor(self): 56 | connection_set = ConnectionSet.rbr_source_ro() 57 | connection = getattr(connection_set, self.get_source_database_topology_key()) 58 | cursor = connection.cursor() 59 | yield cursor 60 | cursor.close() 61 | connection.close() 62 | 63 | @contextmanager 64 | def get_tracker_cursor(self): 65 | schema_tracker_cluster = self.tracker_cluster_name 66 | connection_set = ConnectionSet.schema_tracker_rw() 67 | connection = getattr(connection_set, schema_tracker_cluster) 68 | cursor = connection.cursor() 69 | yield cursor 70 | cursor.close() 71 | connection.close() 72 | 73 | @contextmanager 74 | def get_state_cursor(self): 75 | rbr_state_cluster = self.state_cluster_name 76 | connection_set = ConnectionSet.rbr_state_rw() 77 | connection = getattr(connection_set, rbr_state_cluster) 78 | cursor = connection.cursor() 79 | yield cursor 80 | cursor.close() 81 | connection.close() 82 | -------------------------------------------------------------------------------- /replication_handler/models/database.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | import simplejson as json 20 | from sqlalchemy import types 21 | 22 | from replication_handler.config import env_config 23 | from replication_handler.helpers import dates 24 | 25 | 26 | def get_base_model(): 27 | try: 28 | from yelp_conn.session import declarative_base 29 | return declarative_base() 30 | except ImportError: 31 | from sqlalchemy.ext.declarative import declarative_base 32 | return declarative_base() 33 | 34 | 35 | CLUSTER_NAME = env_config.rbr_state_cluster 36 | 37 | # The common declarative base used by every data model. 38 | Base = get_base_model() 39 | Base.__cluster__ = CLUSTER_NAME 40 | 41 | 42 | def get_connection( 43 | topology_path, 44 | source_cluster_name, 45 | tracker_cluster_name, 46 | state_cluster_name, 47 | source_cluster_topology_name=None, 48 | ): 49 | try: 50 | from replication_handler.models.connections.yelp_conn_connection import YelpConnConnection 51 | return YelpConnConnection( 52 | topology_path, 53 | source_cluster_name, 54 | tracker_cluster_name, 55 | state_cluster_name, 56 | source_cluster_topology_name 57 | ) 58 | except ImportError: 59 | from replication_handler.models.connections.rh_connection import RHConnection 60 | return RHConnection( 61 | topology_path, 62 | source_cluster_name, 63 | tracker_cluster_name, 64 | state_cluster_name, 65 | source_cluster_topology_name 66 | ) 67 | 68 | 69 | class UnixTimeStampType(types.TypeDecorator): 70 | """ A datetime.datetime that is stored as a unix timestamp.""" 71 | impl = types.Integer 72 | 73 | def process_bind_param(self, value, dialect=None): 74 | if value is None: 75 | return None 76 | return int(dates.to_timestamp(dates.get_datetime(value))) 77 | 78 | def process_result_value(self, value, dialect): 79 | if value is None: 80 | return None 81 | return dates.from_timestamp(value) 82 | 83 | 84 | class JSONType(types.TypeDecorator): 85 | """ A JSONType is stored in the db as a string and we interact with it like a 86 | dict. 87 | """ 88 | impl = types.Text 89 | separators = (',', ':') 90 | 91 | def process_bind_param(self, value, dialect=None): 92 | """ Dump our value to a form our db recognizes (a string).""" 93 | if value is None: 94 | return None 95 | 96 | return json.dumps(value, separators=self.separators) 97 | 98 | def process_result_value(self, value, dialect=None): 99 | """ Convert what we get from the db into a json dict""" 100 | if value is None: 101 | return None 102 | return json.loads(value) 103 | -------------------------------------------------------------------------------- /replication_handler/models/global_event_state.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | from sqlalchemy import Column 20 | from sqlalchemy import Integer 21 | from sqlalchemy import String 22 | from sqlalchemy.types import Enum 23 | 24 | from replication_handler.helpers.dates import default_now 25 | from replication_handler.helpers.lists import unlist 26 | from replication_handler.models.database import Base 27 | from replication_handler.models.database import JSONType 28 | from replication_handler.models.database import UnixTimeStampType 29 | 30 | 31 | class EventType(object): 32 | 33 | SCHEMA_EVENT = 'schema_event' 34 | DATA_EVENT = 'data_event' 35 | 36 | 37 | class GlobalEventState(Base): 38 | """GlobalEventState is used to save information about latest event for recovery. 39 | For clean shutdowns, we will just resume from the recorded gtid, otherwise, 40 | we will perform recovery procedures for schema event or data event 41 | according to the event type. 42 | """ 43 | 44 | __tablename__ = 'global_event_state' 45 | 46 | id = Column(Integer, primary_key=True) 47 | position = Column(JSONType, nullable=False) 48 | is_clean_shutdown = Column(Integer, nullable=False, default=0) 49 | event_type = Column( 50 | Enum( 51 | EventType.SCHEMA_EVENT, 52 | EventType.DATA_EVENT, 53 | name='event_type' 54 | ), 55 | nullable=False 56 | ) 57 | cluster_name = Column(String, nullable=False) 58 | database_name = Column(String) 59 | table_name = Column(String) 60 | time_updated = Column(UnixTimeStampType, default=default_now, onupdate=default_now) 61 | 62 | @classmethod 63 | def upsert( 64 | cls, 65 | session, 66 | position, 67 | event_type, 68 | cluster_name, 69 | database_name, 70 | table_name, 71 | is_clean_shutdown=False 72 | ): 73 | global_event_state = cls.get(session, cluster_name) 74 | if global_event_state is None: 75 | global_event_state = GlobalEventState() 76 | global_event_state.position = position 77 | global_event_state.event_type = event_type 78 | global_event_state.is_clean_shutdown = is_clean_shutdown 79 | global_event_state.cluster_name = cluster_name 80 | global_event_state.database_name = database_name 81 | global_event_state.table_name = table_name 82 | session.add(global_event_state) 83 | return global_event_state 84 | 85 | @classmethod 86 | def get(cls, session, cluster_name): 87 | result = session.query( 88 | GlobalEventState 89 | ).filter( 90 | GlobalEventState.cluster_name == cluster_name, 91 | ).all() 92 | return unlist(result) 93 | -------------------------------------------------------------------------------- /replication_handler/models/mysql_dumps.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | import copy 20 | import logging 21 | 22 | from sqlalchemy import Column 23 | from sqlalchemy import exists 24 | from sqlalchemy import String 25 | from sqlalchemy import UnicodeText 26 | 27 | from replication_handler.models.database import Base 28 | 29 | 30 | logger = logging.getLogger('replication_handler.models.mysql_dumps') 31 | 32 | 33 | class DumpUnavailableError(Exception): 34 | def __init__(self, cluster_name): 35 | Exception.__init__(self, "MySQL Dump unavailable for cluster {c}".format( 36 | c=cluster_name 37 | )) 38 | 39 | 40 | class MySQLDumps(Base): 41 | __tablename__ = 'mysql_dumps' 42 | 43 | database_dump = Column(UnicodeText, nullable=False) 44 | cluster_name = Column(String, primary_key=True) 45 | 46 | @classmethod 47 | def get_latest_mysql_dump(cls, session, cluster_name): 48 | logger.info("Retrieving the latest MySQL dump for cluster {c}".format( 49 | c=cluster_name 50 | )) 51 | with session.connect_begin(ro=True) as s: 52 | ret = s.query( 53 | MySQLDumps 54 | ).filter( 55 | MySQLDumps.cluster_name == cluster_name 56 | ).first() 57 | latest_dump = copy.copy(ret) 58 | logger.info("Fetched the latest MySQL dump") 59 | try: 60 | return latest_dump.database_dump 61 | except AttributeError: 62 | raise DumpUnavailableError(cluster_name=cluster_name) 63 | 64 | @classmethod 65 | def dump_exists(cls, session, cluster_name): 66 | logger.info("Checking for MySQL dump for cluster {c}".format( 67 | c=cluster_name 68 | )) 69 | with session.connect_begin(ro=True) as s: 70 | mysql_dump_exists = s.query( 71 | exists().where( 72 | MySQLDumps.cluster_name == cluster_name 73 | ) 74 | ).scalar() 75 | logger.info("MySQL dump exists") if mysql_dump_exists else \ 76 | logger.info("MySQL dump doesn't exist") 77 | return mysql_dump_exists 78 | 79 | @classmethod 80 | def update_mysql_dump(cls, session, database_dump, cluster_name): 81 | logger.info("Replacing MySQL dump for cluster {c}".format( 82 | c=cluster_name 83 | )) 84 | with session.connect_begin(ro=False) as s: 85 | s.query(MySQLDumps).filter( 86 | MySQLDumps.cluster_name == cluster_name 87 | ).delete() 88 | new_dump = MySQLDumps() 89 | new_dump.database_dump = database_dump 90 | new_dump.cluster_name = cluster_name 91 | s.add(new_dump) 92 | logger.info("Replaced the old MySQL dump with new one") 93 | return new_dump 94 | 95 | @classmethod 96 | def delete_mysql_dump(cls, session, cluster_name): 97 | logger.info("Deleting the existing database dump for cluster {c}".format( 98 | c=cluster_name 99 | )) 100 | with session.connect_begin(ro=False) as s: 101 | s.query(MySQLDumps).filter( 102 | MySQLDumps.cluster_name == cluster_name 103 | ).delete() 104 | 105 | @classmethod 106 | def delete_mysql_dump_with_active_session(cls, session, cluster_name): 107 | logger.info("Deleting the existing database dump for cluster {c}".format( 108 | c=cluster_name 109 | )) 110 | session.query(MySQLDumps).filter( 111 | MySQLDumps.cluster_name == cluster_name 112 | ).delete() 113 | -------------------------------------------------------------------------------- /replication_handler/schema/changelog.v2.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | doc: TCL (table_change_log) like schema for consumption by legacy TCL clients 3 | 4 | fields: 5 | 6 | - doc: database name containing the table 7 | name: table_schema 8 | type: string 9 | 10 | - doc: table name 11 | name: table_name 12 | type: string 13 | 14 | - doc: pk of the affected table 15 | name: id 16 | type: int 17 | 18 | # name is the source name 19 | name: changelog_schema 20 | 21 | # namespace should match the filename. 22 | namespace: changelog.v2 23 | 24 | type: record 25 | -------------------------------------------------------------------------------- /replication_handler/servlib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yelp/mysql_streamer/b88183ac00b88f5dff9c01ad87a46da9e3615d9e/replication_handler/servlib/__init__.py -------------------------------------------------------------------------------- /replication_handler/servlib/clog_util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | """Support configuration of clog using staticconf.""" 17 | from __future__ import absolute_import 18 | from __future__ import unicode_literals 19 | 20 | import logging 21 | import os 22 | 23 | import clog 24 | import clog.handlers 25 | import staticconf 26 | 27 | from replication_handler.servlib.logging_util import DETAILED_FORMAT 28 | 29 | _current_pid = None 30 | 31 | 32 | namespace = 'clog' 33 | clog_namespace = staticconf.NamespaceGetters(namespace) 34 | 35 | 36 | log_stream_name = clog_namespace.get_string('log_stream_name') 37 | log_stream_format = clog_namespace.get_string( 38 | 'log_stream_format', default=DETAILED_FORMAT 39 | ) 40 | log_stream_level = clog_namespace.get_string( 41 | 'log_stream_level', default='INFO' 42 | ) 43 | 44 | 45 | def initialize(): 46 | """Initialize clog from staticconf config.""" 47 | add_clog_handler( 48 | name=log_stream_name.value, 49 | level=getattr(logging, log_stream_level.value), 50 | log_format=log_stream_format.value) 51 | 52 | 53 | def add_clog_handler(name, level=logging.INFO, log_format=DETAILED_FORMAT): 54 | """Add a CLog logging handler for the stream 'name'. 55 | 56 | :param name: the name of the log 57 | :type name: string 58 | :param level: the logging level of the handler 59 | :type level: int 60 | """ 61 | clog_handler = clog.handlers.CLogHandler(name) 62 | clog_handler.setLevel(level) 63 | formatter = logging.Formatter(log_format) 64 | clog_handler.setFormatter(formatter) 65 | logging.root.addHandler(clog_handler) 66 | 67 | 68 | def log_line(log_name, data): 69 | """Fork-aware ``log_line``. 70 | 71 | This behaves like normal ``clog.log_line``, but checks the process pid 72 | between calls. If the pid changes, log handlers are reset for you. 73 | 74 | :param log_name: the scribe log stream 75 | :type log_name: string 76 | :param data: the data to log 77 | :type data: basestring/unicode 78 | """ 79 | 80 | global _current_pid 81 | 82 | # check for forking 83 | if os.getpid() != _current_pid: 84 | _current_pid = os.getpid() 85 | clog.reset_default_loggers() 86 | 87 | if isinstance(data, type(u'')): 88 | data = data.encode('utf8') 89 | if not isinstance(data, bytes): 90 | raise TypeError('data must be a basestring') 91 | 92 | clog.log_line(log_name, data) 93 | -------------------------------------------------------------------------------- /replication_handler/servlib/logging_util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | import contextlib 20 | import getpass 21 | import logging 22 | import os 23 | import socket 24 | import tempfile 25 | from logging.handlers import RotatingFileHandler 26 | 27 | # This format controls most service logging, and in docker containers 28 | # socket.gethostname() doesn't mean much, so we look for some magic 29 | # environment vairables and try to build as much of :: 30 | # as we can 31 | if 'MARATHON_HOST' in os.environ or 'HOST' in os.environ: 32 | _hostname = os.environ.get('MARATHON_HOST') or os.environ.get('HOST') 33 | # Service might be running multiple instances on one host, so we include 34 | # the port as well to help developers find the relevant containers 35 | if 'MARATHON_PORT' in os.environ: 36 | _hostname = '{0}:{1}'.format(_hostname, os.environ['MARATHON_PORT']) 37 | _hostname = '{0}:{1}'.format(_hostname, socket.gethostname()) 38 | else: 39 | _hostname = socket.gethostname() 40 | 41 | DETAILED_FORMAT = '\t'.join( 42 | [ 43 | '%(asctime)s', 44 | _hostname, 45 | '%(process)s', 46 | '%(name)s', 47 | '%(levelname)s', 48 | '%(message)s' 49 | ] 50 | ) 51 | 52 | uwsgi_initialized = False 53 | """Make sure we only initialize uwsgi logging once per worker. 54 | 55 | This is set to False prefork and set to True postfork once uwsgi_logging 56 | is initted.""" 57 | 58 | 59 | def initialize_uwsgi_logging(log_name, log_directory, log_suffix): 60 | """Initialize a logger for the `uwsgi` log, sending output to a rotated 61 | file on disk. This is used to log errors in service startup. 62 | 63 | :param log_name: The name of the log file 64 | :param log_directory: The location on disk to write the file to 65 | :param log_suffix: The suffix to be appended to the log_name. This is 66 | useful for doing things like differentiating different users 67 | running the same service. 68 | """ 69 | global uwsgi_initialized 70 | if not uwsgi_initialized: 71 | logger = logging.getLogger('uwsgi') 72 | 73 | complete_log_name = '{0}{1}'.format(log_name, log_suffix) 74 | path = os.path.join(log_directory, complete_log_name) 75 | handler = RotatingFileHandler(path, maxBytes=102400, backupCount=3) 76 | 77 | handler.setLevel(logging.INFO) 78 | handler.setFormatter(logging.Formatter(DETAILED_FORMAT)) 79 | logger.addHandler(handler) 80 | uwsgi_initialized = True 81 | 82 | 83 | @contextlib.contextmanager 84 | def log_create_application(log_name, log_directory=None, log_suffix=None): 85 | if log_directory is None: 86 | log_directory = tempfile.gettempdir() 87 | if log_suffix is None: 88 | log_suffix = '_' + getpass.getuser() 89 | 90 | initialize_uwsgi_logging(log_name, log_directory, log_suffix) 91 | 92 | log = logging.getLogger('uwsgi') 93 | try: 94 | yield 95 | except: 96 | log.exception('Create application failed') 97 | raise 98 | -------------------------------------------------------------------------------- /replication_handler/testing_helper/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yelp/mysql_streamer/b88183ac00b88f5dff9c01ad87a46da9e3615d9e/replication_handler/testing_helper/__init__.py -------------------------------------------------------------------------------- /replication_handler/testing_helper/config_revamp.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | from contextlib import contextmanager 20 | 21 | import staticconf 22 | import staticconf.testing 23 | from data_pipeline.config import namespace 24 | 25 | 26 | @contextmanager 27 | def reconfigure(ns=namespace, **kwargs): 28 | """Reconfigures the given kwargs, restoring the current configuration for 29 | only those kwargs when the contextmanager exits. 30 | 31 | Args: 32 | ns: Namespace of the conf 33 | """ 34 | conf_namespace = staticconf.config.get_namespace(ns) 35 | starting_config = { 36 | k: v for k, v in conf_namespace.get_config_values().iteritems() 37 | if k in kwargs 38 | } 39 | staticconf.DictConfiguration(kwargs, namespace=ns) 40 | try: 41 | yield 42 | finally: 43 | final_config = { 44 | k: v for k, v in conf_namespace.get_config_values().iteritems() 45 | if k not in kwargs 46 | } 47 | final_config.update(starting_config) 48 | staticconf.config.get_namespace(ns).clear() 49 | staticconf.DictConfiguration(final_config, namespace=ns) 50 | -------------------------------------------------------------------------------- /replication_handler/util/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | -------------------------------------------------------------------------------- /replication_handler/util/change_log_message_builder.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | import logging 20 | 21 | from data_pipeline.message import UpdateMessage 22 | 23 | from replication_handler.util.message_builder import MessageBuilder 24 | 25 | 26 | log = logging.getLogger('replication_handler.parse_replication_stream') 27 | 28 | 29 | class ChangeLogMessageBuilder(MessageBuilder): 30 | """ This class knows how to convert a data event into a respective message. 31 | 32 | Args: 33 | schema_info(SchemaInfo object): contain topic/schema_id. 34 | event(ReplicationHandlerEveent object): contains a create/update/delete data event and its position. 35 | transaction_id_schema_id(int): schema id for transaction id meta attribute. 36 | position(Position object): contains position information for this event in binlog. 37 | resgiter_dry_run(boolean): whether a schema has to be registered for a message to be published. 38 | """ 39 | 40 | def __init__( 41 | self, schema_info, event, transaction_id_schema_id, position, register_dry_run=True 42 | ): 43 | self.schema_info = schema_info 44 | self.event = event 45 | self.transaction_id_schema_id = transaction_id_schema_id 46 | self.position = position 47 | self.register_dry_run = register_dry_run 48 | 49 | def _create_payload(self, data): 50 | payload_data = {"table_schema": self.event.schema, 51 | "table_name": self.event.table, 52 | "id": data['id'], 53 | } 54 | return payload_data 55 | 56 | def build_message(self, source_cluster_name): 57 | upstream_position_info = { 58 | "position": self.position.to_dict(), 59 | "cluster_name": source_cluster_name, 60 | "database_name": self.event.schema, 61 | "table_name": self.event.table, 62 | } 63 | message_params = { 64 | "schema_id": self.schema_info.schema_id, 65 | "payload_data": self._create_payload(self._get_values(self.event.row)), 66 | "upstream_position_info": upstream_position_info, 67 | "dry_run": self.register_dry_run, 68 | "timestamp": self.event.timestamp, 69 | "meta": [self.position.get_transaction_id( 70 | self.transaction_id_schema_id, 71 | source_cluster_name 72 | )], 73 | } 74 | 75 | if self.event.message_type == UpdateMessage: 76 | message_params["previous_payload_data"] = self._create_payload( 77 | self.event.row["before_values"]) 78 | 79 | return self.event.message_type(**message_params) 80 | -------------------------------------------------------------------------------- /replication_handler/util/transaction_id.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | from data_pipeline.meta_attribute import MetaAttribute 20 | 21 | 22 | def get_ltid_meta_attribute(transaction_id_schema_id, cluster_name, log_file, log_pos): 23 | """Log Transaction Id MetaAttribute is a MetaAttribute which allows us to 24 | reconstruct the order of messages in replication handler by specifying a 25 | statement's exact position in the binlog file. Its payload consists a dict 26 | of cluster name, log_file name and log_position. 27 | 28 | Args: 29 | transaction_id_schema_id (int): schema_id for transaction_id Meta Attribute 30 | cluster_name (unicode): Name of the cluster from where data was read. 31 | log_file (unicode): Binlog name. 32 | log_pos (int): Log position in the binlog. 33 | """ 34 | if not isinstance(cluster_name, unicode) or not isinstance(log_file, unicode): 35 | raise TypeError('Cluster name and log file must be unicode strings') 36 | if not isinstance(log_pos, int): 37 | raise TypeError('Log position must be an integer') 38 | 39 | return MetaAttribute( 40 | schema_id=transaction_id_schema_id, 41 | payload_data={ 42 | 'cluster_name': cluster_name, 43 | 'log_file': log_file, 44 | 'log_pos': log_pos 45 | } 46 | ) 47 | 48 | 49 | def get_gtid_meta_attribute(transaction_id_schema_id, cluster_name, gtid): 50 | """Global Transaction Id MetaAttribute is a MetaAttribute which allows us 51 | to reconstruct the order of messages in replication handler by specifying a 52 | statement's exact position in the binlog file. Its payload consists a dict of 53 | cluster name and GTID. 54 | 55 | Args: 56 | transaction_id_schema_id (int): schema_id for transaction_id Meta Attribute 57 | cluster_name (unicode): Name of the cluster from where data was read. 58 | gtid (unicode): MySQL GTID. 59 | """ 60 | if not isinstance(cluster_name, unicode) or not isinstance(gtid, unicode): 61 | raise TypeError('Cluster name and gtid must be unicode strings') 62 | return MetaAttribute( 63 | schema_id=transaction_id_schema_id, 64 | payload_data={ 65 | 'cluster_name': cluster_name, 66 | 'gtid': gtid 67 | } 68 | ) 69 | -------------------------------------------------------------------------------- /replication_handler_testing/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yelp/mysql_streamer/b88183ac00b88f5dff9c01ad87a46da9e3615d9e/replication_handler_testing/__init__.py -------------------------------------------------------------------------------- /replication_handler_testing/db_sandbox.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | import atexit 20 | import contextlib 21 | from glob import glob 22 | 23 | import testing.mysqld 24 | from cached_property import cached_property 25 | from sqlalchemy import create_engine 26 | from sqlalchemy.orm import sessionmaker as sessionmaker_sa 27 | 28 | from replication_handler import config 29 | from replication_handler.models.database import get_connection 30 | 31 | 32 | class PerProcessMySQLDaemon(object): 33 | 34 | # Generate Mysqld class which shares the generated database 35 | Mysqld = testing.mysqld.MysqldFactory(cache_initialized_db=True) 36 | 37 | _db_name = 'replication_handler' 38 | 39 | def __init__(self): 40 | self._mysql_daemon = self.Mysqld() 41 | self._create_database() 42 | self._create_tables() 43 | 44 | atexit.register(self.clean_up) 45 | 46 | def _create_tables(self): 47 | fixtures = glob('schema/tables/*.sql') 48 | with self.engine.connect() as conn: 49 | conn.execute('use {db}'.format(db=self._db_name)) 50 | for fixture in fixtures: 51 | with open(fixture, 'r') as fh: 52 | conn.execute(fh.read()) 53 | 54 | def truncate_all_tables(self): 55 | self._session.execute('begin') 56 | for table in self._all_tables: 57 | was_modified = self._session.execute( 58 | "select count(*) from `%s` limit 1" % table 59 | ).scalar() 60 | if was_modified: 61 | self._session.execute('truncate table `%s`' % table) 62 | self._session.execute('commit') 63 | 64 | def clean_up(self): 65 | self._mysql_daemon.stop() 66 | 67 | @cached_property 68 | def engine(self): 69 | return create_engine(self._url) 70 | 71 | @cached_property 72 | def _make_session(self): 73 | # regular sqlalchemy session maker 74 | return sessionmaker_sa(bind=self.engine) 75 | 76 | def _create_database(self): 77 | conn = self._engine_without_db.connect() 78 | conn.execute('create database ' + self._db_name) 79 | conn.close() 80 | 81 | @cached_property 82 | def _session(self): 83 | return self._make_session() 84 | 85 | @property 86 | def _url(self): 87 | return self._mysql_daemon.url(db=self._db_name) 88 | 89 | @property 90 | def _engine_without_db(self): 91 | return create_engine(self._url_without_db) 92 | 93 | @property 94 | def _url_without_db(self): 95 | return self._mysql_daemon.url() 96 | 97 | @property 98 | def _all_tables(self): 99 | return self.engine.table_names() 100 | 101 | 102 | @contextlib.contextmanager 103 | def database_sandbox_session(): 104 | db_connections = get_connection( 105 | config.env_config.topology_path, 106 | config.env_config.rbr_source_cluster, 107 | config.env_config.schema_tracker_cluster, 108 | config.env_config.rbr_state_cluster 109 | ) 110 | _per_process_mysql_daemon = launch_mysql_daemon() 111 | _session_prev_engine = db_connections.state_session.bind 112 | 113 | db_connections.state_session.bind = _per_process_mysql_daemon.engine 114 | db_connections.state_session.enforce_read_only = False 115 | yield db_connections.state_session 116 | db_connections.state_session.bind = _session_prev_engine 117 | 118 | 119 | def launch_mysql_daemon(max_retries=3): 120 | done_making_mysqld = False 121 | retries = 0 122 | while not done_making_mysqld: 123 | # Takes time for mysqld to launch, so we will attempt a few times to it 124 | try: 125 | return PerProcessMySQLDaemon() 126 | done_making_mysqld = True 127 | except RuntimeError: 128 | retries += 1 129 | if retries > max_retries: 130 | raise 131 | -------------------------------------------------------------------------------- /requirements-opensource.txt: -------------------------------------------------------------------------------- 1 | # Need this because mysql-replication on internal pypi is old. 2 | # Since it is a hit, no fallback to python's pypi happens 3 | -e . 4 | MySQL-python==1.2.5 5 | PyMySQL==0.6.7 6 | PyStaticConfiguration==0.10.2 7 | PyYAML==3.11 8 | SQLAlchemy==1.0.12 9 | boto==2.36.0 10 | contextdecorator==0.10.0 11 | data_pipeline==0.9.1 12 | dateglob==0.1 13 | enum34==1.0.4 14 | future==0.14.3 15 | futures==3.0.5 16 | ipaddress==1.0.16 17 | iso8601==0.1.10 18 | kafka-python==0.9.5 19 | kazoo==2.2 20 | mccabe==0.5.0 21 | mrjob==0.4.3 22 | -e git://github.com/kdparker/python-mysql-replication.git#egg=mysql-replication 23 | pep8==1.5.7 24 | ply==3.4 25 | py==1.4.29 26 | pycrypto==2.6.1 27 | pyflakes==0.8.1 28 | pysubnettree==0.23 29 | python-dateutil==2.4.2 30 | pytz==2016.4 31 | requests==2.7.0 32 | send-nsca==0.1.4.1 33 | setproctitle==1.1.8 34 | simplejson==3.6.5 35 | six==1.9.0 36 | -e git://github.com/Yelp/sqlparse.git@4fbab9c395752294225f60393cc0a19d6fb62d54#egg=sqlparse 37 | testing.mysqld 38 | thriftpy==0.1.15 39 | vmprof==0.2.7 40 | wheel==0.24.0 41 | wsgiref==0.1.2 42 | yelp-clog==2.2.10 43 | -------------------------------------------------------------------------------- /requirements.d/dev.txt: -------------------------------------------------------------------------------- 1 | # If there are tools that a developer needs to work on your project. 2 | # Unit testing tools, mocking frameworks, debuggers, proxies, ... 3 | # These will be used for tox and other testing environments. 4 | pytest-cov 5 | docker-compose==1.6.2 6 | flake8 7 | mock 8 | pytest 9 | -------------------------------------------------------------------------------- /requirements.d/docs.txt: -------------------------------------------------------------------------------- 1 | sphinx 2 | -------------------------------------------------------------------------------- /requirements.d/pre_commit.txt: -------------------------------------------------------------------------------- 1 | pre-commit>=0.4.2 -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Need this because mysql-replication on internal pypi is old. 2 | # Since it is a hit, no fallback to python's pypi happens 3 | --extra-index-url https://pypi.python.org/simple 4 | --extra-index-url https://pypi.python.org/pypi 5 | -e . 6 | # TODO(DATAPIPE-1509|abrar) : internal.txt has to be unlinked from requirements.txt 7 | # internal.txt should be built only for internal users 8 | MySQL-python==1.2.5 9 | PyMySQL==0.6.7 10 | PyStaticConfiguration==0.10.2 11 | PyYAML==3.11 12 | SQLAlchemy==1.0.12 13 | boto==2.36.0 14 | contextdecorator==0.10.0 15 | data_pipeline[internal]==0.9.1 16 | dateglob==0.1 17 | enum34==1.0.4 18 | future==0.14.3 19 | futures==3.0.5 20 | geogrid==1.0.8 21 | ipaddress==1.0.16 22 | iso8601==0.1.10 23 | kafka-python==0.9.5.post6 24 | kazoo==2.2 25 | mccabe==0.5.0 26 | mrjob==0.4.3.post7 27 | mysql-replication==0.10.0 28 | pep8==1.5.7 29 | pii_generator==0.1.5 30 | ply==3.4 31 | py==1.4.29 32 | pycrypto==2.6.1 33 | pyflakes==0.8.1 34 | pyramid-yelp-conn==0.4.1 35 | pysensu_yelp==0.2.3 36 | pysubnettree==0.23 37 | python-dateutil==2.4.2 38 | pytz==2016.4 39 | requests==2.7.0 40 | send-nsca==0.1.4.1 41 | setproctitle==1.1.8 42 | simplejson==3.6.5 43 | six==1.9.0 44 | -e git://github.com/Yelp/sqlparse.git@4fbab9c395752294225f60393cc0a19d6fb62d54#egg=sqlparse 45 | testing.mysqld 46 | thriftpy==0.1.15 47 | vmprof==0.2.7 48 | wheel==0.24.0 49 | wsgiref==0.1.2 50 | yelp_batch==0.27.1 51 | yelp-cgeom==1.3.1 52 | yelp-clog==2.5.2 53 | yelp-conn==10.0.2 54 | yelp-logging==1.1.2 55 | yelp_kafka==5.1.1 56 | -------------------------------------------------------------------------------- /schema/avro_schema/global_transaction_id_v1.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "type": "record", 3 | "namespace": "yelp.replication_handler", 4 | "name": "global_transaction_id", 5 | "doc": "Serializes Mysql GTID information.", 6 | "fields": [ 7 | { 8 | "name": "cluster_name", 9 | "type": "string", 10 | "doc": "Name of the database cluster." 11 | }, 12 | { 13 | "name": "gtid", 14 | "type": "string", 15 | "doc": "GTID retrieved from MySQL." 16 | } 17 | ] 18 | } 19 | -------------------------------------------------------------------------------- /schema/avro_schema/log_transaction_id_v1.avsc: -------------------------------------------------------------------------------- 1 | { 2 | "type": "record", 3 | "namespace": "yelp.replication_handler", 4 | "name": "transaction_id", 5 | "doc": "Serializes a Log TransactionId object.", 6 | "fields": [ 7 | { 8 | "name": "cluster_name", 9 | "type": "string", 10 | "doc": "Name of the database cluster." 11 | }, 12 | { 13 | "name": "log_file", 14 | "type": "string", 15 | "doc": "Name of the binlog file." 16 | }, 17 | { 18 | "name": "log_pos", 19 | "type": "int", 20 | "doc": "Position of the statement withing the binlog file." 21 | } 22 | ] 23 | } 24 | -------------------------------------------------------------------------------- /schema/migrations/data_event_checkpoint.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | [2016-10-20] Change kafka_offset column type to bigint. 49 | 50 | 51 | -------------------------------------------------------------------------------- /schema/migrations/global_event_state.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 57 | 61 | 62 | 63 | -------------------------------------------------------------------------------- /schema/migrations/master.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /schema/migrations/mysql_dumps.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | -------------------------------------------------------------------------------- /schema/migrations/schema_event_state.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /schema/migrations/tools/liquibase.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yelp/mysql_streamer/b88183ac00b88f5dff9c01ad87a46da9e3615d9e/schema/migrations/tools/liquibase.jar -------------------------------------------------------------------------------- /schema/migrations/tools/mysql-connector-java-5.1.29-bin.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yelp/mysql_streamer/b88183ac00b88f5dff9c01ad87a46da9e3615d9e/schema/migrations/tools/mysql-connector-java-5.1.29-bin.jar -------------------------------------------------------------------------------- /schema/tables/data_event_checkpoint.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE `data_event_checkpoint` ( 2 | `id` int(11) NOT NULL AUTO_INCREMENT, 3 | `kafka_topic` varchar(255) NOT NULL, 4 | `kafka_offset` bigint(20) NOT NULL, 5 | `cluster_name` varchar(255) NOT NULL, 6 | `time_created` int(11) NOT NULL, 7 | `time_updated` int(11) NOT NULL, 8 | PRIMARY KEY (`id`) 9 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; 10 | -------------------------------------------------------------------------------- /schema/tables/global_event_state.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE `global_event_state` ( 2 | `id` int(11) NOT NULL AUTO_INCREMENT, 3 | `position` text NOT NULL, 4 | `is_clean_shutdown` tinyint(1) DEFAULT 0 NOT NULL, 5 | `event_type` varchar(20) NOT NULL, 6 | `cluster_name` varchar(255) NOT NULL, 7 | `database_name` varchar(255) NOT NULL, 8 | `table_name` varchar(255) NOT NULL, 9 | `time_updated` int(11) NOT NULL, 10 | PRIMARY KEY (`id`) 11 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; 12 | -------------------------------------------------------------------------------- /schema/tables/mysql_dumps.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE `mysql_dumps` ( 2 | `cluster_name` varchar(255) NOT NULL, 3 | `database_dump` longtext NOT NULL, 4 | PRIMARY KEY (`cluster_name`) 5 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; -------------------------------------------------------------------------------- /schema/tables/schema_event_state.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE `schema_event_state` ( 2 | `id` int(11) NOT NULL AUTO_INCREMENT, 3 | `position` text NOT NULL, 4 | `status` varchar(20) NOT NULL DEFAULT 'Pending', 5 | `query` text NOT NULL, 6 | `create_table_statement` text NOT NULL, 7 | `cluster_name` varchar(255) NOT NULL, 8 | `database_name` varchar(255) NOT NULL, 9 | `table_name` varchar(255) NOT NULL, 10 | `time_created` int(11) NOT NULL, 11 | `time_updated` int(11) NOT NULL, 12 | PRIMARY KEY (`id`) 13 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COLLATE=utf8_unicode_ci; 14 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | # Copyright 2016 Yelp Inc. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, 12 | # software distributed under the License is distributed on an 13 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | # KIND, either express or implied. See the License for the 15 | # specific language governing permissions and limitations 16 | # under the License. 17 | from __future__ import absolute_import 18 | from __future__ import unicode_literals 19 | 20 | from setuptools import find_packages 21 | from setuptools import setup 22 | 23 | setup( 24 | name='replication_handler', 25 | version='0.1.0', 26 | description='', 27 | author='BAM', 28 | author_email='bam@yelp.com', 29 | url='https://github.com/Yelp/mysql_streamer', 30 | packages=find_packages(exclude=['tests']), 31 | setup_requires=['setuptools'], 32 | install_requires=[ 33 | 'mysql-replication', 34 | 'MySQL-python', 35 | 'PyMySQL', 36 | 'sqlparse', 37 | 'vmprof', 38 | ], 39 | license='Copyright Yelp 2015, All Rights Reserved' 40 | ) 41 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yelp/mysql_streamer/b88183ac00b88f5dff9c01ad87a46da9e3615d9e/tests/__init__.py -------------------------------------------------------------------------------- /tests/batch/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yelp/mysql_streamer/b88183ac00b88f5dff9c01ad87a46da9e3615d9e/tests/batch/__init__.py -------------------------------------------------------------------------------- /tests/batch/parse_replication_stream_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | from replication_handler.batch.parse_replication_stream import ParseReplicationStream 20 | from tests.batch.base_parse_replication_stream_test import BaseParseReplicationStreamTest 21 | 22 | 23 | class TestParseReplicationStream(BaseParseReplicationStreamTest): 24 | 25 | def _get_parse_replication_stream(self): 26 | return ParseReplicationStream() 27 | -------------------------------------------------------------------------------- /tests/components/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yelp/mysql_streamer/b88183ac00b88f5dff9c01ad87a46da9e3615d9e/tests/components/__init__.py -------------------------------------------------------------------------------- /tests/components/base_event_handler_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | import mock 20 | import pytest 21 | from data_pipeline.producer import Producer 22 | 23 | from replication_handler import config 24 | from replication_handler.components.base_event_handler import BaseEventHandler 25 | from replication_handler.components.schema_wrapper import SchemaWrapper 26 | 27 | 28 | class TestBaseEventHandler(object): 29 | 30 | @pytest.fixture 31 | def producer(self): 32 | return mock.Mock(autospec=Producer) 33 | 34 | @pytest.fixture 35 | def mock_schematizer_client(self): 36 | return mock.Mock() 37 | 38 | @pytest.fixture 39 | def schema_wrapper(self, mock_db_connections, mock_schematizer_client): 40 | return SchemaWrapper( 41 | db_connections=mock_db_connections, 42 | schematizer_client=mock_schematizer_client 43 | ) 44 | 45 | @pytest.fixture 46 | def stats_counter(self, request): 47 | try: 48 | from data_pipeline.tools.meteorite_wrappers import StatsCounter 49 | return mock.Mock(autospec=StatsCounter) 50 | except ImportError: 51 | return None 52 | 53 | @pytest.fixture 54 | def base_event_handler( 55 | self, mock_db_connections, producer, schema_wrapper, stats_counter 56 | ): 57 | return BaseEventHandler( 58 | mock_db_connections, producer, schema_wrapper, stats_counter 59 | ) 60 | 61 | @pytest.yield_fixture 62 | def patch_config(self): 63 | with mock.patch.object( 64 | config.DatabaseConfig, 65 | 'cluster_name', 66 | new_callable=mock.PropertyMock 67 | ) as mock_cluster_name: 68 | mock_cluster_name.return_value = "yelp_main" 69 | yield mock_cluster_name 70 | 71 | def test_handle_event_not_implemented(self, base_event_handler): 72 | with pytest.raises(NotImplementedError): 73 | base_event_handler.handle_event(mock.Mock(), mock.Mock()) 74 | -------------------------------------------------------------------------------- /tests/components/change_log_data_event_handler_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | import mock 20 | import pytest 21 | 22 | from replication_handler.components.change_log_data_event_handler import ChangeLogDataEventHandler 23 | 24 | 25 | class TestChangeLogDataEventHandler(object): 26 | 27 | @pytest.fixture 28 | def event_handler(self, mock_db_connections): 29 | return ChangeLogDataEventHandler( 30 | db_connections=mock_db_connections, 31 | producer=mock.MagicMock(), 32 | schema_wrapper=mock.MagicMock(), 33 | stats_counter=mock.MagicMock(), 34 | register_dry_run=False, 35 | gtid_enabled=False 36 | ) 37 | 38 | def test_get_schema_id(self, mock_db_connections): 39 | schema_wrapper = mock.MagicMock() 40 | schematizer_client = schema_wrapper.schematizer_client 41 | schematizer_client.register_schema_from_schema_json.return_value = ( 42 | mock.MagicMock(schema_id=42)) 43 | with mock.patch( 44 | "replication_handler.components.change_log_data_event_handler.open" 45 | ) as mock_open: 46 | mock_open.return_value = mock.MagicMock(spec=file) 47 | mock_open.return_value.__enter__.return_value.read.return_value = ( 48 | '{"namespace": "foo", "name": "bar"}' 49 | ) 50 | 51 | event_handler = ChangeLogDataEventHandler( 52 | db_connections=mock_db_connections, 53 | producer=mock.MagicMock(), 54 | schema_wrapper=schema_wrapper, 55 | stats_counter=mock.MagicMock(), 56 | register_dry_run=False, 57 | gtid_enabled=False 58 | ) 59 | 60 | assert 42 == event_handler.schema_id 61 | schematizer_client.register_schema_from_schema_json.assert_called_once_with( 62 | contains_pii=False, 63 | namespace='foo', 64 | schema_json={'namespace': 'foo', 'name': 'bar'}, 65 | source=u'bar', 66 | source_owner_email=u'distsys-data+changelog@yelp.com' 67 | ) 68 | 69 | @mock.patch.object(ChangeLogDataEventHandler, '_handle_row') 70 | def test_handle_event(self, mock_row, event_handler): 71 | event = mock.MagicMock(schema="schema") 72 | event_handler.handle_event(event, "position") 73 | mock_row.assert_called_once_with(event_handler.schema_wrapper_entry, event, "position") 74 | 75 | @mock.patch( 76 | 'replication_handler.components.change_log_data_event_handler.ChangeLogMessageBuilder', 77 | autospec=True 78 | ) 79 | def test_handle_row_calls_ChangeLogMessageBuilder( 80 | self, 81 | ChangeLogMessageBuilder, 82 | event_handler, 83 | fake_transaction_id_schema_id 84 | ): 85 | event = mock.MagicMock(table="table") 86 | event_handler._handle_row("schema_wrapper_entry", event, "position") 87 | ChangeLogMessageBuilder.assert_called_once_with( 88 | "schema_wrapper_entry", 89 | event, 90 | fake_transaction_id_schema_id, 91 | "position", 92 | False 93 | ) 94 | -------------------------------------------------------------------------------- /tests/components/position_finder_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | import mock 20 | import pytest 21 | 22 | from replication_handler.components.position_finder import PositionFinder 23 | from replication_handler.models.global_event_state import EventType 24 | from replication_handler.util.position import GtidPosition 25 | 26 | 27 | class TestPositionFinder(object): 28 | 29 | @pytest.fixture 30 | def create_table_statement(self): 31 | return "CREATE TABLE STATEMENT" 32 | 33 | @pytest.fixture 34 | def position_dict(self): 35 | return {"gtid": "sid:12"} 36 | 37 | @pytest.fixture 38 | def schema_event_position(self): 39 | return GtidPosition(gtid="sid:12") 40 | 41 | def test_get_position_to_resume_tailing( 42 | self, 43 | schema_event_position, 44 | position_dict, 45 | gtid_enabled 46 | ): 47 | global_event_state = mock.Mock( 48 | event_type=EventType.SCHEMA_EVENT, 49 | position=position_dict, 50 | ) 51 | position_finder = PositionFinder( 52 | gtid_enabled=gtid_enabled, 53 | global_event_state=global_event_state, 54 | ) 55 | position = position_finder.get_position_to_resume_tailing_from() 56 | assert position.to_dict() == schema_event_position.to_dict() 57 | -------------------------------------------------------------------------------- /tests/components/schema_tracker_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | import mock 20 | import pytest 21 | from MySQLdb.cursors import Cursor 22 | 23 | from replication_handler.components.base_event_handler import Table 24 | from replication_handler.components.schema_tracker import SchemaTracker 25 | 26 | 27 | class TestSchemaTracker(object): 28 | 29 | @pytest.fixture 30 | def base_schema_tracker(self, mock_db_connections): 31 | return SchemaTracker(mock_db_connections) 32 | 33 | @pytest.fixture 34 | def test_table(self): 35 | return "fake_table" 36 | 37 | @pytest.fixture 38 | def test_schema(self): 39 | return "fake_schema" 40 | 41 | @pytest.fixture 42 | def test_cluster(self): 43 | return "yelp_main" 44 | 45 | @pytest.fixture 46 | def show_create_query(self, test_table, test_schema): 47 | return "SHOW CREATE TABLE `{0}`.`{1}`".format(test_schema, test_table) 48 | 49 | @pytest.fixture 50 | def table_with_schema_changes(self, test_cluster, test_schema, test_table): 51 | return Table( 52 | cluster_name=test_cluster, 53 | database_name=test_schema, 54 | table_name=test_table 55 | ) 56 | 57 | @pytest.fixture 58 | def mock_tracker_cursor(self, test_table, show_create_query): 59 | m = mock.Mock(spec=Cursor) 60 | m.fetchone.return_value = [test_table, show_create_query] 61 | return m 62 | 63 | def test_get_show_create_table_statement( 64 | self, 65 | mock_tracker_cursor, 66 | base_schema_tracker, 67 | show_create_query, 68 | test_table, 69 | table_with_schema_changes, 70 | ): 71 | base_schema_tracker.get_show_create_statement(table_with_schema_changes) 72 | assert mock_tracker_cursor.execute.call_count == 3 73 | assert mock_tracker_cursor.execute.call_args_list == [ 74 | mock.call("USE {0}".format(table_with_schema_changes.database_name)), 75 | mock.call("SHOW TABLES LIKE \'{0}\'".format(table_with_schema_changes.table_name)), 76 | mock.call(show_create_query) 77 | ] 78 | assert mock_tracker_cursor.fetchone.call_count == 2 79 | -------------------------------------------------------------------------------- /tests/config_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import unicode_literals 4 | 5 | import pytest 6 | 7 | from replication_handler.config import EnvConfig 8 | 9 | 10 | class TestConfig(object): 11 | @pytest.fixture 12 | def config(self): 13 | return EnvConfig() 14 | 15 | def test_rbr_source_cluster_topology_name_default(self, config): 16 | assert config.rbr_source_cluster_topology_name is None 17 | -------------------------------------------------------------------------------- /tests/integration/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yelp/mysql_streamer/b88183ac00b88f5dff9c01ad87a46da9e3615d9e/tests/integration/__init__.py -------------------------------------------------------------------------------- /tests/integration/table_change_log_end_to_end_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | import pytest 20 | from data_pipeline.message_type import MessageType 21 | 22 | from replication_handler.environment_configs import is_envvar_set 23 | from replication_handler.testing_helper.util import execute_query_get_one_row 24 | from replication_handler.testing_helper.util import increment_heartbeat 25 | from tests.integration.conftest import _fetch_messages 26 | from tests.integration.conftest import _generate_basic_model 27 | from tests.integration.conftest import _verify_messages 28 | 29 | 30 | pytestmark = pytest.mark.usefixtures("cleanup_avro_cache") 31 | 32 | 33 | @pytest.fixture(scope='module') 34 | def replhandler(): 35 | return 'replicationhandlerchangelog' 36 | 37 | 38 | @pytest.fixture(scope='module') 39 | def namespace(): 40 | return 'changelog.v2' 41 | 42 | 43 | @pytest.fixture(scope='module') 44 | def source(): 45 | return 'changelog_schema' 46 | 47 | 48 | @pytest.mark.itest 49 | @pytest.mark.skipif( 50 | is_envvar_set('OPEN_SOURCE_MODE'), 51 | reason="skip this in open source mode." 52 | ) 53 | def test_change_log_messages( 54 | containers, 55 | rbrsource, 56 | create_table_query, 57 | schematizer, 58 | namespace, 59 | source, 60 | rbr_source_session, 61 | gtid_enabled 62 | ): 63 | 64 | if not gtid_enabled: 65 | increment_heartbeat(containers, rbrsource) 66 | 67 | execute_query_get_one_row( 68 | containers, 69 | rbrsource, 70 | create_table_query.format(table_name=source) 71 | ) 72 | 73 | BasicModel = _generate_basic_model(source) 74 | model_1 = BasicModel(id=1, name='insert') 75 | model_2 = BasicModel(id=2, name='insert') 76 | rbr_source_session.add(model_1) 77 | rbr_source_session.add(model_2) 78 | rbr_source_session.commit() 79 | model_1.name = 'update' 80 | rbr_source_session.delete(model_2) 81 | rbr_source_session.commit() 82 | 83 | messages = _fetch_messages( 84 | containers, 85 | schematizer, 86 | namespace, 87 | source, 88 | 4 89 | ) 90 | 91 | expected_messages = [ 92 | { 93 | 'message_type': MessageType.create, 94 | 'payload_data': {'id': 1, 'table_name': source, 'table_schema': 'yelp'} 95 | }, 96 | { 97 | 'message_type': MessageType.create, 98 | 'payload_data': {'id': 2, 'table_name': source, 'table_schema': 'yelp'} 99 | }, 100 | { 101 | 'message_type': MessageType.update, 102 | 'payload_data': {'id': 1, 'table_name': source, 'table_schema': 'yelp'}, 103 | 'previous_payload_data': {'id': 1, 'table_name': source, 'table_schema': 'yelp'} 104 | }, 105 | { 106 | 'message_type': MessageType.delete, 107 | 'payload_data': {'id': 2, 'table_name': source, 'table_schema': 'yelp'} 108 | }, 109 | ] 110 | _verify_messages(messages, expected_messages) 111 | -------------------------------------------------------------------------------- /tests/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yelp/mysql_streamer/b88183ac00b88f5dff9c01ad87a46da9e3615d9e/tests/models/__init__.py -------------------------------------------------------------------------------- /tests/models/conftest.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | import pytest 20 | 21 | from replication_handler_testing import db_sandbox as sandbox 22 | 23 | 24 | @pytest.yield_fixture(scope='module') 25 | def sandbox_session(): 26 | with sandbox.database_sandbox_session() as sandbox_session: 27 | yield sandbox_session 28 | -------------------------------------------------------------------------------- /tests/models/connections/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yelp/mysql_streamer/b88183ac00b88f5dff9c01ad87a46da9e3615d9e/tests/models/connections/__init__.py -------------------------------------------------------------------------------- /tests/models/connections/base_connection_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | import pytest 20 | 21 | from replication_handler_testing.db_sandbox import launch_mysql_daemon 22 | 23 | 24 | @pytest.mark.itest 25 | @pytest.mark.itest_db 26 | class BaseConnectionTest(object): 27 | 28 | @pytest.yield_fixture(scope='module') 29 | def database_sandbox_session(self): 30 | yield launch_mysql_daemon() 31 | 32 | @pytest.fixture 33 | def simple_topology_file( 34 | self, 35 | tmpdir, 36 | database_sandbox_session, 37 | mock_source_cluster_name, 38 | mock_tracker_cluster_name, 39 | mock_state_cluster_name 40 | ): 41 | mysql_url = database_sandbox_session.engine.url 42 | local = tmpdir.mkdir("dummy").join("dummy_topology.yaml") 43 | local.write(''' 44 | topology: 45 | - cluster: {source_cluster_name} 46 | replica: 'master' 47 | entries: 48 | - charset: utf8 49 | host: '{host}' 50 | db: '{db}' 51 | user: '{user}' 52 | passwd: '{passwd}' 53 | port: {port} 54 | use_unicode: true 55 | - cluster: {tracker_cluster_name} 56 | replica: 'master' 57 | entries: 58 | - charset: utf8 59 | host: '{host}' 60 | db: '{db}' 61 | user: '{user}' 62 | passwd: '{passwd}' 63 | port: {port} 64 | use_unicode: true 65 | - cluster: {state_cluster_name} 66 | replica: 'master' 67 | entries: 68 | - charset: utf8 69 | host: '{host}' 70 | db: '{db}' 71 | user: '{user}' 72 | passwd: '{passwd}' 73 | port: {port} 74 | use_unicode: true 75 | '''.format( 76 | source_cluster_name=mock_source_cluster_name, 77 | tracker_cluster_name=mock_tracker_cluster_name, 78 | state_cluster_name=mock_state_cluster_name, 79 | host=mysql_url.host or 'localhost', 80 | db=mysql_url.database, 81 | user=mysql_url.username or '', 82 | port=int(mysql_url.port) or 3306, 83 | passwd=mysql_url.password or '' 84 | )) 85 | return local.strpath 86 | -------------------------------------------------------------------------------- /tests/models/connections/rh_connection_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | import pytest 20 | 21 | from tests.models.connections.base_connection_test import BaseConnectionTest 22 | 23 | 24 | @pytest.mark.itest 25 | @pytest.mark.itest_db 26 | class TestRHConnection(BaseConnectionTest): 27 | 28 | @pytest.fixture 29 | def connection( 30 | self, 31 | simple_topology_file, 32 | mock_source_cluster_name, 33 | mock_tracker_cluster_name, 34 | mock_state_cluster_name 35 | ): 36 | from replication_handler.models.connections.rh_connection import RHConnection 37 | return RHConnection( 38 | simple_topology_file, 39 | mock_source_cluster_name, 40 | mock_tracker_cluster_name, 41 | mock_state_cluster_name 42 | ) 43 | 44 | def test_source_session(self, connection): 45 | with connection.source_session.connect_begin() as session: 46 | assert len(session.execute('SELECT 1;').fetchone()) == 1 47 | 48 | def test_tracker_session(self, connection): 49 | with connection.tracker_session.connect_begin() as session: 50 | assert len(session.execute('SELECT 1;').fetchone()) == 1 51 | 52 | def test_state_session(self, connection): 53 | with connection.state_session.connect_begin() as session: 54 | assert len(session.execute('SELECT 1;').fetchone()) == 1 55 | 56 | def test_cursors(self, connection): 57 | with connection.get_source_cursor() as cursor: 58 | cursor.execute('SELECT 1;') 59 | assert len(cursor.fetchone()) == 1 60 | 61 | with connection.get_tracker_cursor() as cursor: 62 | cursor.execute('SELECT 1;') 63 | assert len(cursor.fetchone()) == 1 64 | 65 | with connection.get_state_cursor() as cursor: 66 | cursor.execute('SELECT 1;') 67 | assert len(cursor.fetchone()) == 1 68 | -------------------------------------------------------------------------------- /tests/models/connections/yelp_conn_connection_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | import pytest 20 | 21 | from replication_handler.environment_configs import is_envvar_set 22 | from tests.models.connections.base_connection_test import BaseConnectionTest 23 | 24 | 25 | @pytest.mark.itest 26 | @pytest.mark.itest_db 27 | @pytest.mark.skipif( 28 | is_envvar_set('OPEN_SOURCE_MODE'), 29 | reason="skip this in open source mode." 30 | ) 31 | class TestYelpConnConnection(BaseConnectionTest): 32 | 33 | @pytest.fixture 34 | def connection( 35 | self, 36 | simple_topology_file, 37 | mock_source_cluster_name, 38 | mock_tracker_cluster_name, 39 | mock_state_cluster_name 40 | ): 41 | from replication_handler.models.connections.yelp_conn_connection import YelpConnConnection 42 | return YelpConnConnection( 43 | simple_topology_file, 44 | mock_source_cluster_name, 45 | mock_tracker_cluster_name, 46 | mock_state_cluster_name 47 | ) 48 | 49 | def test_source_session(self, connection): 50 | with connection.source_session.connect_begin(ro=True): 51 | assert True 52 | 53 | def test_tracker_session(self, connection): 54 | with connection.tracker_session.connect_begin(ro=False): 55 | assert True 56 | 57 | def test_state_session(self, connection): 58 | with connection.state_session.connect_begin(ro=False): 59 | assert True 60 | 61 | with connection.state_session.connect_begin(ro=True): 62 | assert True 63 | 64 | def test_cursors(self, connection): 65 | with connection.get_source_cursor() as cursor: 66 | cursor.execute('SELECT 1;') 67 | assert len(cursor.fetchone()) == 1 68 | 69 | with connection.get_tracker_cursor() as cursor: 70 | cursor.execute('SELECT 1;') 71 | assert len(cursor.fetchone()) == 1 72 | 73 | with connection.get_state_cursor() as cursor: 74 | cursor.execute('SELECT 1;') 75 | assert len(cursor.fetchone()) == 1 76 | 77 | def test_tracker_cursor_regression(self, connection): 78 | for i in range(1000): 79 | with connection.get_source_cursor() as cursor: 80 | cursor.execute('SELECT 1;') 81 | assert len(cursor.fetchone()) == 1 82 | -------------------------------------------------------------------------------- /tests/models/global_event_state_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | import pytest 20 | 21 | from replication_handler.models.global_event_state import EventType 22 | from replication_handler.models.global_event_state import GlobalEventState 23 | 24 | 25 | @pytest.mark.itest 26 | @pytest.mark.itest_db 27 | class TestGlobalEventState(object): 28 | 29 | @pytest.fixture 30 | def cluster_name(self): 31 | return "yelp_main" 32 | 33 | @pytest.fixture 34 | def database_name(self): 35 | return "yelp" 36 | 37 | @pytest.fixture 38 | def table_name(self): 39 | return 'user' 40 | 41 | @pytest.fixture 42 | def gtid_position(self): 43 | return {"gtid": "gtid1"} 44 | 45 | @pytest.fixture 46 | def binlog_position(self): 47 | return {"log_pos": 343, "log_file": "binlog.001"} 48 | 49 | @pytest.yield_fixture 50 | def starting_global_event_state( 51 | self, 52 | sandbox_session, 53 | cluster_name, 54 | database_name, 55 | table_name, 56 | gtid_position, 57 | binlog_position 58 | ): 59 | # No rows in database yet 60 | assert GlobalEventState.get(sandbox_session, cluster_name) is None 61 | first_global_event_state = GlobalEventState.upsert( 62 | session=sandbox_session, 63 | position=gtid_position, 64 | event_type=EventType.DATA_EVENT, 65 | cluster_name=cluster_name, 66 | database_name=database_name, 67 | table_name=table_name, 68 | is_clean_shutdown=0 69 | ) 70 | sandbox_session.flush() 71 | # one row has been created 72 | assert GlobalEventState.get(sandbox_session, cluster_name) == first_global_event_state 73 | yield first_global_event_state 74 | sandbox_session.query( 75 | GlobalEventState 76 | ).filter( 77 | GlobalEventState.cluster_name == cluster_name, 78 | ).delete() 79 | sandbox_session.commit() 80 | assert GlobalEventState.get(sandbox_session, cluster_name) is None 81 | 82 | def test_upsert_global_event_state( 83 | self, 84 | sandbox_session, 85 | cluster_name, 86 | database_name, 87 | table_name, 88 | gtid_position, 89 | binlog_position, 90 | starting_global_event_state 91 | ): 92 | second_global_event_state = GlobalEventState.upsert( 93 | session=sandbox_session, 94 | position=binlog_position, 95 | event_type=EventType.SCHEMA_EVENT, 96 | is_clean_shutdown=1, 97 | cluster_name=cluster_name, 98 | database_name=database_name, 99 | table_name=table_name, 100 | ) 101 | sandbox_session.flush() 102 | # update the one existing row 103 | assert GlobalEventState.get(sandbox_session, cluster_name) == second_global_event_state 104 | -------------------------------------------------------------------------------- /tests/models/mysql_dumps_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | import pytest 20 | 21 | from replication_handler.models.mysql_dumps import MySQLDumps 22 | 23 | 24 | @pytest.mark.itest 25 | @pytest.mark.itest_db 26 | class TestMySQLDumps(object): 27 | 28 | @pytest.fixture 29 | def cluster_name(self): 30 | return 'yelp_main' 31 | 32 | @pytest.fixture 33 | def test_dump(self): 34 | return 'This is a test dump' 35 | 36 | @pytest.yield_fixture 37 | def initialize_dump( 38 | self, 39 | sandbox_session, 40 | cluster_name, 41 | test_dump 42 | ): 43 | assert MySQLDumps.dump_exists(sandbox_session, cluster_name) is False 44 | test_mysql_dump = MySQLDumps.update_mysql_dump( 45 | session=sandbox_session, 46 | database_dump=test_dump, 47 | cluster_name=cluster_name 48 | ) 49 | sandbox_session.flush() 50 | assert MySQLDumps.dump_exists(sandbox_session, cluster_name) is True 51 | yield test_mysql_dump 52 | 53 | def test_get_latest_mysql_dump( 54 | self, 55 | initialize_dump, 56 | cluster_name, 57 | test_dump, 58 | sandbox_session 59 | ): 60 | new_dump = 'This is a new dump' 61 | retrieved_dump = MySQLDumps.get_latest_mysql_dump( 62 | session=sandbox_session, 63 | cluster_name=cluster_name 64 | ) 65 | assert retrieved_dump == test_dump 66 | 67 | MySQLDumps.update_mysql_dump( 68 | session=sandbox_session, 69 | database_dump=new_dump, 70 | cluster_name=cluster_name 71 | ) 72 | returned_new_dump = MySQLDumps.get_latest_mysql_dump( 73 | session=sandbox_session, 74 | cluster_name=cluster_name 75 | ) 76 | assert returned_new_dump == new_dump 77 | 78 | MySQLDumps.delete_mysql_dump( 79 | session=sandbox_session, 80 | cluster_name=cluster_name 81 | ) 82 | 83 | dump_exists = MySQLDumps.dump_exists( 84 | session=sandbox_session, 85 | cluster_name=cluster_name 86 | ) 87 | 88 | assert not dump_exists 89 | -------------------------------------------------------------------------------- /tests/util/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Yelp/mysql_streamer/b88183ac00b88f5dff9c01ad87a46da9e3615d9e/tests/util/__init__.py -------------------------------------------------------------------------------- /tests/util/change_log_message_builder_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | import mock 20 | 21 | from replication_handler.util.change_log_message_builder import ChangeLogMessageBuilder 22 | 23 | 24 | @mock.patch('replication_handler.util.change_log_message_builder.UpdateMessage') 25 | def test_build_message_builds_proper_message( 26 | update_mock, 27 | fake_transaction_id_schema_id, 28 | mock_source_cluster_name 29 | ): 30 | schema_info = mock.MagicMock(topic="topic", schema_id=42) 31 | event = mock.MagicMock(schema="schema", 32 | table="table_name", 33 | timestamp=42, 34 | row={'before_values': {'id': 41}, 'after_values': {'id': 42}}, 35 | message_type=update_mock) 36 | position = mock.MagicMock() 37 | position.to_dict.return_value = {"foo_pos": 42} 38 | position.get_transaction_id.return_value = 'txn_id' 39 | builder = ChangeLogMessageBuilder( 40 | schema_info, event, fake_transaction_id_schema_id, position 41 | ) 42 | builder.build_message(mock_source_cluster_name) 43 | update_mock.assert_called_once_with( 44 | dry_run=True, 45 | meta=['txn_id'], 46 | payload_data={ 47 | 'id': 42, 48 | 'table_schema': 'schema', 49 | 'table_name': 'table_name' 50 | }, 51 | previous_payload_data={ 52 | 'id': 41, 53 | 'table_schema': 'schema', 54 | 'table_name': 'table_name' 55 | }, 56 | schema_id=42, 57 | timestamp=42, 58 | upstream_position_info={ 59 | 'database_name': 'schema', 60 | 'position': { 61 | 'foo_pos': 42 62 | }, 63 | 'table_name': 'table_name', 64 | 'cluster_name': 'refresh_primary' 65 | }) 66 | -------------------------------------------------------------------------------- /tests/util/message_builder_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | import datetime 20 | 21 | import mock 22 | import pytest 23 | import pytz 24 | 25 | from replication_handler.util.message_builder import MessageBuilder 26 | from replication_handler.util.misc import transform_timedelta_to_number_of_microseconds 27 | 28 | 29 | class TestMessageBuilder(object): 30 | 31 | @pytest.fixture 32 | def event_row(self): 33 | return {'values': { 34 | 'test_int': 100, 35 | 'test_set': set(['ONE']), 36 | 'test_timestamp': datetime.datetime(2015, 12, 31, 0, 59, 59, 999999), 37 | 'test_datetime': datetime.datetime(2015, 12, 31, 0, 59, 59, 999999), 38 | 'test_time': datetime.timedelta(0, 59 * 60 + 59, 999999) 39 | }} 40 | 41 | @pytest.fixture 42 | def expected_payload(self): 43 | return { 44 | 'test_int': 100, 45 | 'test_set': ['ONE'], 46 | 'test_timestamp': datetime.datetime( 47 | 2015, 12, 31, 0, 59, 59, 999999, tzinfo=pytz.utc 48 | ), 49 | 'test_datetime': '2015-12-31T00:59:59.999999', 50 | 'test_time': transform_timedelta_to_number_of_microseconds( 51 | datetime.timedelta(0, 59 * 60 + 59, 999999) 52 | ), 53 | } 54 | 55 | def test_build_message_builds_proper_message( 56 | self, 57 | event_row, 58 | expected_payload, 59 | fake_transaction_id_schema_id, 60 | mock_source_cluster_name 61 | ): 62 | schema_info = mock.MagicMock( 63 | schema_id=42, 64 | transformation_map={ 65 | 'test_set': 'set', 66 | 'test_timestamp': 'timestamp(6)', 67 | 'test_datetime': 'datetime(6)', 68 | 'test_time': 'time(6)' 69 | } 70 | ) 71 | with mock.patch( 72 | 'data_pipeline.message.CreateMessage' 73 | ) as create_mock_with_set_datatype: 74 | event = mock.MagicMock( 75 | schema="schema", 76 | table="table_name", 77 | timestamp=42, 78 | row=event_row, 79 | message_type=create_mock_with_set_datatype 80 | ) 81 | position = mock.MagicMock() 82 | position.to_dict.return_value = {"foo_pos": 42} 83 | position.get_transaction_id.return_value = 'txn_id' 84 | builder = MessageBuilder( 85 | schema_info, event, fake_transaction_id_schema_id, position 86 | ) 87 | builder.build_message(mock_source_cluster_name) 88 | create_mock_with_set_datatype.assert_called_once_with( 89 | dry_run=True, 90 | meta=['txn_id'], 91 | payload_data=expected_payload, 92 | schema_id=42, 93 | timestamp=42, 94 | upstream_position_info={ 95 | 'database_name': 'schema', 96 | 'position': { 97 | 'foo_pos': 42 98 | }, 99 | 'table_name': 'table_name', 100 | 'cluster_name': 'refresh_primary' 101 | }) 102 | -------------------------------------------------------------------------------- /tests/util/transaction_id_test.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Copyright 2016 Yelp Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, 11 | # software distributed under the License is distributed on an 12 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | # KIND, either express or implied. See the License for the 14 | # specific language governing permissions and limitations 15 | # under the License. 16 | from __future__ import absolute_import 17 | from __future__ import unicode_literals 18 | 19 | import pytest 20 | 21 | from replication_handler.util.transaction_id import get_gtid_meta_attribute 22 | from replication_handler.util.transaction_id import get_ltid_meta_attribute 23 | 24 | 25 | class TestLogTransactionId(object): 26 | 27 | @pytest.fixture(params=[ 28 | [str('cluster1'), 'bin_log1', 10], 29 | ['cluster1', str('bin_log1'), 10], 30 | ['cluster1', 'bin_log1', '10'], 31 | ]) 32 | def invalid_params(self, request): 33 | return request.param 34 | 35 | def test_transaction_id_rejects_invalid_params( 36 | self, fake_transaction_id_schema_id, invalid_params 37 | ): 38 | invalid_params = [fake_transaction_id_schema_id] + invalid_params 39 | with pytest.raises(TypeError): 40 | get_ltid_meta_attribute(*invalid_params) 41 | 42 | @pytest.fixture(params=[ 43 | ['cluster1', 'bin_log1', 10], 44 | ]) 45 | def valid_params(self, request, fake_transaction_id_schema_id): 46 | params = [fake_transaction_id_schema_id] + request.param 47 | return params 48 | 49 | @pytest.fixture 50 | def transaction_id(self, valid_params): 51 | return get_ltid_meta_attribute(*valid_params) 52 | 53 | @pytest.fixture(params=[ 54 | {'cluster_name': 'cluster1', 'log_file': 'bin_log1', 'log_pos': 10}, 55 | ]) 56 | def expected_to_dict(self, request): 57 | return request.param 58 | 59 | def test_transaction_id_payload_data(self, transaction_id, expected_to_dict): 60 | assert transaction_id.payload_data == expected_to_dict 61 | 62 | 63 | class TestGlobalTransactionId(object): 64 | 65 | @pytest.fixture(params=[ 66 | [str('cluster1'), 'bin_log1'], 67 | ['cluster1', str('bin_log1')], 68 | ]) 69 | def invalid_params(self, request): 70 | return request.param 71 | 72 | def test_transaction_id_rejects_invalid_params( 73 | self, 74 | fake_transaction_id_schema_id, 75 | invalid_params 76 | ): 77 | invalid_params = [fake_transaction_id_schema_id] + invalid_params 78 | with pytest.raises(TypeError): 79 | get_gtid_meta_attribute(*invalid_params) 80 | 81 | @pytest.fixture(params=[ 82 | ['cluster1', 'bin_log1'], 83 | ]) 84 | def valid_params(self, request, fake_transaction_id_schema_id): 85 | params = [fake_transaction_id_schema_id] + request.param 86 | return params 87 | 88 | @pytest.fixture 89 | def transaction_id(self, valid_params): 90 | return get_gtid_meta_attribute(*valid_params) 91 | 92 | @pytest.fixture(params=[ 93 | {'cluster_name': 'cluster1', 'gtid': 'bin_log1'}, 94 | ]) 95 | def expected_to_dict(self, request): 96 | return request.param 97 | 98 | def test_transaction_id_payload_data(self, transaction_id, expected_to_dict): 99 | assert transaction_id.payload_data == expected_to_dict 100 | -------------------------------------------------------------------------------- /tox-opensource.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py27, docs, pre-commit 3 | basepython = python2.7 4 | skipsdist = true 5 | indexserver = 6 | default = https://pypi.python.org/simple 7 | 8 | [testenv] 9 | basepython = python2.7 10 | envdir = venv/py27 11 | setenv = 12 | PIP_INDEX_URL = https://pypi.python.org/simple 13 | venv_update = {toxinidir}/bin/venv-update venv= {envdir} install= 14 | commands = 15 | {[testenv]venv_update} -r {toxinidir}/requirements-opensource.txt 16 | 17 | [testenv:py27] 18 | commands = 19 | {[testenv]venv_update} -r {toxinidir}/requirements.d/dev.txt -r {toxinidir}/requirements-opensource.txt 20 | py.test -c tox-opensource.ini --cov-config .coveragerc-opensource --cov=replication_handler --maxfail=3 tests/ 21 | 22 | [testenv:docs] 23 | envdir = venv/docs 24 | commands = 25 | {[testenv]venv_update} -r {toxinidir}/requirements.d/docs.txt -r {toxinidir}/requirements-opensource.txt 26 | sphinx-build -b html -d docs/build/doctrees docs/source docs/build/html 27 | 28 | [flake8] 29 | ignore = E125, E302, C901 30 | max-line-length = 160 31 | max-complexity = 10 32 | exclude = .svn,CVS,.bzr,.hg,.git,__pycache__,.tox,./build,docs,virtualenv_run 33 | 34 | [testenv:devenv] 35 | commands = 36 | {[testenv]venv_update} -r {toxinidir}/requirements.d/dev.txt -r {toxinidir}/requirements-opensource.txt 37 | 38 | [testenv:devenv-command] 39 | commands = 40 | {[testenv]venv_update} -r {toxinidir}/requirements.d/dev.txt -r {toxinidir}/requirements-opensource.txt 41 | {posargs} 42 | 43 | [testenv:pre-commit] 44 | basepython = python2.7 45 | envdir = venv/pre-commit 46 | commands = 47 | {[testenv]venv_update} -r {toxinidir}/requirements.d/pre_commit.txt 48 | pre-commit {posargs} 49 | 50 | [testenv:itest] 51 | commands = 52 | {[testenv]venv_update} -r {toxinidir}/requirements.d/dev.txt -r {toxinidir}/requirements-opensource.txt 53 | py.test -c tox-opensource.ini -m "itest" --ignore=setup.py -vv {posargs} tests/ 54 | 55 | [testenv:itest_db] 56 | commands = 57 | {[testenv]venv_update} -r {toxinidir}/requirements.d/dev.txt -r {toxinidir}/requirements-opensource.txt 58 | py.test -c tox-opensource.ini -m "itest_db" --ignore=setup.py -vv tests/ 59 | 60 | [pytest] 61 | norecursedirs = tests/batch/internal 62 | addopts = -m "not itest" --ignore=setup.py --doctest-glob=*.rst -vv 63 | 64 | [pep8] 65 | # E265 deals with spacing inside of comments - breaks human formatting 66 | # E309 puts a blank line after class declarations - doesn't work well with docstrings 67 | # E501 reformats lines to fit in --max-line-length poorly 68 | # E265 block comment should start with '# ' 69 | # E402 module level import not at top of file 70 | ignore = E265,E309,E501,E265 71 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py27, docs, pre-commit 3 | basepython = python2.7 4 | skipsdist = true 5 | indexserver = 6 | default = https://pypi.yelpcorp.com/simple/ 7 | 8 | [testenv] 9 | basepython = python2.7 10 | envdir = venv/py27 11 | setenv = 12 | PIP_INDEX_URL = https://pypi.yelpcorp.com/simple 13 | venv_update = {toxinidir}/bin/venv-update venv= {envdir} install= 14 | commands = 15 | {[testenv]venv_update} -r {toxinidir}/requirements.txt 16 | 17 | [testenv:py27] 18 | commands = 19 | {[testenv]venv_update} -r {toxinidir}/requirements.d/dev.txt -r {toxinidir}/requirements.txt 20 | py.test -c tox.ini --cov-config .coveragerc --cov=replication_handler --maxfail=3 tests/ 21 | 22 | [testenv:docs] 23 | envdir = venv/docs 24 | commands = 25 | {[testenv]venv_update} -r {toxinidir}/requirements.d/docs.txt -r {toxinidir}/requirements.txt 26 | sphinx-build -b html -d docs/build/doctrees docs/source docs/build/html 27 | 28 | [flake8] 29 | ignore = E125, E302, C901 30 | max-line-length = 160 31 | max-complexity = 10 32 | exclude = .svn,CVS,.bzr,.hg,.git,__pycache__,.tox,./build,docs,virtualenv_run 33 | 34 | [testenv:devenv] 35 | commands = 36 | {[testenv]venv_update} -r {toxinidir}/requirements.d/dev.txt -r {toxinidir}/requirements.txt 37 | 38 | [testenv:devenv-command] 39 | commands = 40 | {[testenv]venv_update} -r {toxinidir}/requirements.d/dev.txt -r {toxinidir}/requirements.txt 41 | {posargs} 42 | 43 | [testenv:pre-commit] 44 | basepython = python2.7 45 | envdir = venv/pre-commit 46 | commands = 47 | {[testenv]venv_update} -r {toxinidir}/requirements.d/pre_commit.txt 48 | pre-commit {posargs} 49 | 50 | [testenv:itest] 51 | commands = 52 | {[testenv]venv_update} -r {toxinidir}/requirements.d/dev.txt -r {toxinidir}/requirements.txt 53 | py.test -m "itest" --ignore=setup.py -vv {posargs} tests/ 54 | 55 | [testenv:itest_db] 56 | commands = 57 | {[testenv]venv_update} -r {toxinidir}/requirements.d/dev.txt -r {toxinidir}/requirements.txt 58 | py.test -m "itest_db" --ignore=setup.py -vv tests/ 59 | 60 | [pytest] 61 | addopts = -m "not itest" --ignore=setup.py --doctest-glob=*.rst -vv 62 | 63 | [pep8] 64 | # E265 deals with spacing inside of comments - breaks human formatting 65 | # E309 puts a blank line after class declarations - doesn't work well with docstrings 66 | # E501 reformats lines to fit in --max-line-length poorly 67 | # E265 block comment should start with '# ' 68 | # E402 module level import not at top of file 69 | ignore = E265,E309,E501,E265 70 | -------------------------------------------------------------------------------- /yelp_conn_generic.yaml: -------------------------------------------------------------------------------- 1 | topology: "topology.yaml" 2 | --------------------------------------------------------------------------------