├── README.md ├── docker-compose.yml ├── example ├── __init__.py ├── config.yaml └── run.py ├── pymyelarepl-desc.png ├── pymyelarepl.Dockerfile ├── pymyelarepl ├── __init__.py └── pymyelarepl.py ├── setup.py └── test ├── __init__.py ├── config.yaml └── test_basic.py /README.md: -------------------------------------------------------------------------------- 1 | ## Description 2 | 3 | 4 | 5 | This project replicates Mysql to Elasticsearch, reading binlog events and converting them to python objects to jsons. 6 | 7 | Note that the project does not supoort HTTPS when migrating. 8 | 9 | The project is a modified version of [py-mysql-elasticsearch-sync](https://github.com/zhongbiaodev/py-mysql-elasticsearch-sync), using library named [python-mysql-replication](https://github.com/julien-duponchelle/python-mysql-replication). 10 | 11 | 12 |
13 | 14 | ## Give it a try! 15 | 16 | make containers by: 17 | 18 | ``` 19 | docker-compose up 20 | ``` 21 | 22 | 23 |
24 | 25 | stop the elasticsearch container and modify /usr/share/elasticsearch/config/elasticsearch.yml as: 26 | 27 | ``` 28 | (edit true to false) xpack.security.enabled: false 29 | 30 | (at the end of the file add this) action.destructive_requires_name: false 31 | ``` 32 | 33 | 34 |
35 | 36 | restart the elasticsearch container, and in the pymyelarepl container, 37 | 38 | ``` 39 | (for test) cd pymyelarepl && python test/test_basic.py 40 | 41 | 42 | (after executing the following sql in the mysql container as root without password, for example) cd pymyelarepl && python example/run.py 43 | 44 | CREATE DATABASE test; 45 | use test; 46 | CREATE TABLE test4 (id int NOT NULL AUTO_INCREMENT, data VARCHAR(255), data2 VARCHAR(255), PRIMARY KEY(id)); 47 | INSERT INTO test4 (data, data2) VALUES ("Hello", "World"); 48 | UPDATE test4 SET data="World", data2="Hello" WHERE id=1; 49 | DELETE FROM test4 WHERE id=1; 50 | ``` 51 | 52 | 53 |
54 | 55 | the results of the two cases are similar to the below, which means data is replicated successfully: 56 | 57 | ``` 58 | {'errors': False, 'took': 12, 'items': [{'create': {'_index': 'basic_replication', '_id': '1', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 6, '_primary_term': 1, 'status': 201}}, {'create': {'_index': 'basic_replication', '_id': '2', '_version': 1, 'result': 'created', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 7, '_primary_term': 1, 'status': 201}}]} 59 | {'errors': False, 'took': 11, 'items': [{'update': {'_index': 'basic_replication', '_id': '1', '_version': 2, 'result': 'updated', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 8, '_primary_term': 1, 'status': 200}}, {'update': {'_index': 'basic_replication', '_id': '2', '_version': 2, 'result': 'updated', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 9, '_primary_term': 1, 'status': 200}}]} 60 | {'errors': False, 'took': 2, 'items': [{'delete': {'_index': 'basic_replication', '_id': '1', '_version': 3, 'result': 'deleted', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 10, '_primary_term': 1, 'status': 200}}, {'delete': {'_index': 'basic_replication', '_id': '2', '_version': 3, 'result': 'deleted', '_shards': {'total': 2, 'successful': 1, 'failed': 0}, '_seq_no': 11, '_primary_term': 1, 'status': 200}}]} 61 | 62 | (blocked for the example) 63 | ``` 64 | 65 | 66 |
67 | 68 | ## License 69 | 70 | MIT -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.8' 2 | 3 | services: 4 | mysql-8.1.0: 5 | image: mysql:8.1.0 6 | ports: 7 | - "3307:3306" 8 | environment: 9 | MYSQL_ALLOW_EMPTY_PASSWORD: true 10 | command: > 11 | mysqld 12 | --log-bin=mysql-bin.log 13 | --server-id 1 14 | --binlog-format=row 15 | --gtid_mode=on 16 | --enforce-gtid-consistency=on 17 | networks: 18 | network: 19 | ipv4_address: 172.0.0.2 20 | 21 | elasticsearch-8.10.2: 22 | image: elasticsearch:8.10.2 23 | ports: 24 | - "9201:9200" 25 | networks: 26 | network: 27 | ipv4_address: 172.0.0.3 28 | 29 | pymyelarepl: 30 | build: 31 | context: . 32 | dockerfile: pymyelarepl.Dockerfile 33 | args: 34 | BASE_IMAGE: python:3.12.0rc3-bookworm 35 | command: sleep infinity 36 | ports: 37 | - "3001:3000" 38 | networks: 39 | network: 40 | ipv4_address: 172.0.0.4 41 | 42 | networks: 43 | network: 44 | driver: bridge 45 | ipam: 46 | config: 47 | - subnet: 172.0.0.0/24 48 | gateway: 172.0.0.1 -------------------------------------------------------------------------------- /example/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jaehyeonpy/py-mysql-elasticsearch-sync/df99b9f823ced328f6123ae78faa1afe4719269b/example/__init__.py -------------------------------------------------------------------------------- /example/config.yaml: -------------------------------------------------------------------------------- 1 | # all fields must be filled. 2 | 3 | mysql: 4 | host: '172.0.0.2' # or '127.0.0.1' 5 | port: 3306 # or 3307 6 | user: 'root' 7 | password: '' 8 | server_id: 2 # this should be unique. this may be different depending on environment. 9 | log_file: 'mysql-bin.000001' # this may be different depending on environment. 10 | log_pos: 0 11 | blocking: True # if true, mysql waits for new incoming event to send after pymyelarepl reads all the previous events. 12 | 13 | es: 14 | host: '172.0.0.3' # or '127.0.0.1' 15 | port: 9200 # or 9201 -------------------------------------------------------------------------------- /example/run.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from pymyelarepl import PyMyElaRepl 4 | 5 | 6 | config_path = os.path.join(os.path.dirname(__file__), 'config.yaml') 7 | pymyelarepl = PyMyElaRepl(config_path) 8 | pymyelarepl.run() -------------------------------------------------------------------------------- /pymyelarepl-desc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jaehyeonpy/py-mysql-elasticsearch-sync/df99b9f823ced328f6123ae78faa1afe4719269b/pymyelarepl-desc.png -------------------------------------------------------------------------------- /pymyelarepl.Dockerfile: -------------------------------------------------------------------------------- 1 | ARG BASE_IMAGE 2 | FROM ${BASE_IMAGE} 3 | 4 | COPY pymyelarepl pymyelarepl/pymyelarepl 5 | COPY example pymyelarepl/example 6 | COPY test pymyelarepl/test 7 | COPY setup.py pymyelarepl/setup.py 8 | RUN cd pymyelarepl && pip install . -------------------------------------------------------------------------------- /pymyelarepl/__init__.py: -------------------------------------------------------------------------------- 1 | from .pymyelarepl import PyMyElaRepl -------------------------------------------------------------------------------- /pymyelarepl/pymyelarepl.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import decimal 3 | import json 4 | 5 | import requests 6 | import yaml 7 | 8 | from pymysqlreplication import BinLogStreamReader 9 | from pymysqlreplication.event import XidEvent 10 | from pymysqlreplication.row_event import DeleteRowsEvent, UpdateRowsEvent, WriteRowsEvent 11 | 12 | 13 | class PyMyElaRepl: 14 | def get_config_from_file(self, config_path): 15 | try: 16 | with open(config_path) as f: 17 | self.config = yaml.load(f, Loader=yaml.FullLoader) 18 | except IndexError: 19 | raise IndexError('Must specify config file') 20 | except FileNotFoundError: 21 | raise FileNotFoundError('Could not find the config file') 22 | 23 | def __init__(self, config_path): 24 | self.get_config_from_file(config_path) 25 | 26 | self.es_endpoint = 'http://{host}:{port}/_bulk'.format( 27 | host=self.config['es']['host'], 28 | port=self.config['es']['port'] 29 | ) 30 | 31 | self.mysql_conf = dict( 32 | [(key, self.config['mysql'][key]) for key in ['host', 'port', 'user', 'password']] 33 | ) 34 | 35 | self.binlog_stream_reader = BinLogStreamReader( 36 | connection_settings=self.mysql_conf, 37 | server_id=self.config['mysql']['server_id'], 38 | only_events=[DeleteRowsEvent, WriteRowsEvent, UpdateRowsEvent, XidEvent], 39 | log_file=self.config['mysql']['log_file'], 40 | log_pos=self.config['mysql']['log_pos'], 41 | resume_stream=True if self.config['mysql']['log_pos'] != 0 else False, 42 | blocking=self.config['mysql']['blocking'] 43 | ) 44 | 45 | self.if_error = [] 46 | 47 | def send_to_es(self, converted): 48 | resp = requests.post( 49 | url=self.es_endpoint, 50 | data=converted, 51 | verify=False, 52 | headers={'content-type': 'application/json'} 53 | ) 54 | 55 | self.if_error.append(resp.json()['errors']) 56 | print(resp.json()) 57 | 58 | def serialize_not_serializable(self, obj): 59 | if isinstance(obj, datetime.datetime) or isinstance(obj, datetime.date): 60 | return obj.isoformat() 61 | elif isinstance(obj, decimal.Decimal): 62 | return str(obj) 63 | raise TypeError('Type not serializable for obj {obj}'.format(obj=obj)) 64 | 65 | def convert_event_to_valid_es_data_format(self, event): 66 | converted = '' 67 | 68 | for e in event: 69 | meta = json.dumps({e['action']: {'_index': e['index'], '_id': e['id']}}) 70 | 71 | if e['action'] == 'delete': 72 | converted += ''.join([meta, '\n']) 73 | elif e['action'] == 'update': 74 | body = json.dumps({'doc': e['doc']}, default=self.serialize_not_serializable) 75 | converted += ''.join([meta, '\n', body, '\n']) 76 | elif e['action'] == 'create': 77 | body = json.dumps(e['doc'], default=self.serialize_not_serializable) 78 | converted += ''.join([meta, '\n', body, '\n']) 79 | 80 | return converted 81 | 82 | def get_binlog_event(self): 83 | extracted_collection = [] 84 | 85 | for event in self.binlog_stream_reader: 86 | if isinstance(event, XidEvent): 87 | yield extracted_collection 88 | 89 | extracted_collection = [] 90 | continue 91 | 92 | for row in event.rows: 93 | if isinstance(event, DeleteRowsEvent): 94 | extracted = { 95 | 'index': event.table, 96 | 'id': row['values'][event.primary_key], 97 | 'action': 'delete' 98 | } 99 | elif isinstance(event, UpdateRowsEvent): 100 | extracted = { 101 | 'index': event.table, 102 | 'id': row['after_values'][event.primary_key], 103 | 'action': 'update', 104 | 'doc': {k: v for k, v in row['after_values'].items() if k != event.primary_key} 105 | } 106 | elif isinstance(event, WriteRowsEvent): 107 | extracted = { 108 | 'index': event.table, 109 | 'id': row['values'][event.primary_key], 110 | 'action': 'create', 111 | 'doc': {k: v for k, v in row['values'].items() if k != event.primary_key} 112 | } 113 | 114 | extracted_collection.append(extracted) 115 | 116 | self.binlog_stream_reader.close() 117 | print('Info: Mysql connection closed successfully after reading all binlog events.') 118 | 119 | def run(self): 120 | for event in self.get_binlog_event(): 121 | converted = self.convert_event_to_valid_es_data_format(event) 122 | self.send_to_es(converted) -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages, setup 2 | 3 | setup( 4 | name='pymyelarepl', 5 | version='0.2', 6 | packages=find_packages( 7 | include=[ 8 | 'pymyelarepl' 9 | ]), 10 | install_requires = [ 11 | 'certifi==2023.7.22', 12 | 'cffi==1.15.1', 13 | 'charset-normalizer==3.2.0', 14 | 'cryptography==41.0.4', 15 | 'idna==3.4', 16 | 'mysql-replication==0.43.0', 17 | 'pycparser==2.21', 18 | 'PyMySQL==1.1.0', 19 | 'PyYAML==6.0.1', 20 | 'requests==2.31.0', 21 | 'urllib3==2.0.4' 22 | ] 23 | ) -------------------------------------------------------------------------------- /test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jaehyeonpy/py-mysql-elasticsearch-sync/df99b9f823ced328f6123ae78faa1afe4719269b/test/__init__.py -------------------------------------------------------------------------------- /test/config.yaml: -------------------------------------------------------------------------------- 1 | # all fields must be filled. 2 | 3 | mysql: 4 | host: '172.0.0.2' # or '127.0.0.1' 5 | port: 3306 # or 3307 6 | user: 'root' 7 | password: '' 8 | db: 'test_db_for_pymyelarepl' 9 | server_id: 3 # this should be unique. this may be different depending on environment. 10 | log_file: 'mysql-bin.000001' # this shoule be different depending on environment. 11 | log_pos: 0 12 | blocking: False # if true, mysql waits for new incoming event to send after pymyelarepl reads all the previous events. 13 | 14 | es: 15 | host: '172.0.0.3' # or '127.0.0.1' 16 | port: 9200 # or 9201 -------------------------------------------------------------------------------- /test/test_basic.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pymysql 4 | import requests 5 | import unittest 6 | import yaml 7 | 8 | from pymyelarepl import PyMyElaRepl 9 | 10 | 11 | class BasicTestCase(unittest.TestCase): 12 | def execute(self, query): 13 | cursor = self.conn_control.cursor() 14 | cursor.execute(query) 15 | return cursor 16 | 17 | def setUp(self): 18 | config_path = os.path.join(os.path.dirname(__file__), 'config.yaml') 19 | 20 | with open(config_path) as f: 21 | self.config = yaml.load(f, Loader=yaml.FullLoader) 22 | 23 | mysql_config = { 24 | "host": self.config['mysql']['host'], 25 | "user": self.config['mysql']['user'], 26 | "passwd": self.config['mysql']['password'], 27 | "port": self.config['mysql']['port'], 28 | "use_unicode": True, 29 | "charset": "utf8", # regarded as utf8mb4 30 | } 31 | 32 | self.conn_control = pymysql.connect(**mysql_config) 33 | self.execute("DROP DATABASE IF EXISTS {db}".format(db=self.config['mysql']['db'])) 34 | self.execute("CREATE DATABASE {db}".format(db=self.config['mysql']['db'])) 35 | self.execute("USE {db}".format(db=self.config['mysql']['db'])) 36 | self.execute("RESET MASTER") 37 | 38 | self.es_url_for_all_data = 'http://{host}:{port}/_all'.format( 39 | host=self.config['es']['host'], 40 | port=self.config['es']['port'] 41 | ) 42 | 43 | self.pymyelarepl = PyMyElaRepl(config_path) 44 | 45 | def test_basic_replication(self): 46 | self.execute( 47 | """ 48 | CREATE TABLE basic_replication( 49 | id INT PRIMARY KEY AUTO_INCREMENT, 50 | f FLOAT, 51 | t TIMESTAMP) 52 | """ 53 | ) 54 | 55 | self.execute("INSERT INTO basic_replication(id, f, t) VALUES(1, 12.34, '2023-09-25 00:00:00')") 56 | self.execute("INSERT INTO basic_replication(id, f, t) VALUES(2, 12.34, '2023-09-25 00:00:00')") 57 | self.conn_control.commit() 58 | 59 | self.execute("UPDATE basic_replication SET f=56.78 WHERE id=1") 60 | self.execute("UPDATE basic_replication SET f=56.78 WHERE id=2") 61 | self.conn_control.commit() 62 | 63 | self.execute("DELETE FROM basic_replication WHERE id=1") 64 | self.execute("DELETE FROM basic_replication WHERE id=2") 65 | self.conn_control.commit() 66 | 67 | self.pymyelarepl.run() 68 | if_error = True if True in self.pymyelarepl.if_error else False 69 | self.assertEqual(if_error, False) 70 | 71 | def tearDown(self): 72 | self.execute("DROP DATABASE IF EXISTS {db}".format(db=self.config['mysql']['db'])) 73 | self.execute("RESET MASTER") 74 | self.conn_control.close() 75 | requests.delete(self.es_url_for_all_data) 76 | 77 | unittest.main() --------------------------------------------------------------------------------