├── mysql_cluster_manager ├── src │ ├── mcm │ │ ├── __init__.py │ │ ├── utils.py │ │ ├── minio.py │ │ ├── proxysql.py │ │ ├── actions.py │ │ ├── consul.py │ │ └── mysql.py │ └── mysql_cluster_manager.py ├── requirements.txt └── pylintrc ├── .gitignore ├── docs ├── images │ ├── architecture.odg │ ├── architecture.png │ └── architecture.svg ├── deployment-kubernetes.md └── deployment-docker-swarm.md ├── entry-point.sh ├── .github └── workflows │ ├── build.yml │ └── codeql.yml ├── CONTRIBUTING.md ├── deployment ├── mysql-docker-swarm.yml └── mysql-kubernetes-iscsi.yml ├── Dockerfile-mysql ├── CODE_OF_CONDUCT.md ├── README.md └── LICENSE /mysql_cluster_manager/src/mcm/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .venv 2 | .env 3 | settings.json 4 | -------------------------------------------------------------------------------- /docs/images/architecture.odg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jnidzwetzki/mysql-ha-cloud/HEAD/docs/images/architecture.odg -------------------------------------------------------------------------------- /docs/images/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jnidzwetzki/mysql-ha-cloud/HEAD/docs/images/architecture.png -------------------------------------------------------------------------------- /mysql_cluster_manager/requirements.txt: -------------------------------------------------------------------------------- 1 | mysql-connector-python==8.0.22 2 | netifaces==0.10.9 3 | pylint==2.5.3 4 | python-consul2==0.1.4 5 | -------------------------------------------------------------------------------- /entry-point.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # 3 | # Start the MySQL cluster manager 4 | # 5 | ######################## 6 | 7 | # Exit on error 8 | set -e 9 | 10 | ./mysql_cluster_manager.py join_or_bootstrap 11 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Build project 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | compile: 7 | runs-on: ubuntu-22.04 8 | 9 | steps: 10 | - name: Checkout Source Code 11 | uses: actions/checkout@v2 12 | - name: Setup python 3.10 13 | uses: actions/setup-python@v2 14 | with: 15 | python-version: '3.10' 16 | - name: Install dependencies 17 | run: | 18 | python -m pip install -r mysql_cluster_manager/requirements.txt 19 | cd mysql_cluster_manager 20 | pylint src 21 | src/mysql_cluster_manager.py --help 22 | 23 | - name: Build docker image 24 | run: docker build -t jnidzwetzki/mysql-ha-cloud:latest -f Dockerfile-mysql . 25 | 26 | - name: Login at docker hub 27 | run: | 28 | docker login -u ${{secrets.DOCKER_USER}} -p ${{secrets.DOCKER_PASSWORD}} 29 | 30 | - name: Push image 31 | if: github.ref == 'refs/heads/main' 32 | run: | 33 | docker push jnidzwetzki/mysql-ha-cloud:latest 34 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to this project 2 | First of all, thank you so much for being interested in contributing. At the moment, you can contribute to this project by: 3 | 4 | * Finding and reporting issues 5 | * Creating new examples 6 | * Requesting new features 7 | * Open pull requests 8 | 9 | ## New examples 10 | The provided examples for the BBoxDB-Client are very limited at the moment. Feel free to write new examples and open a pull request. 11 | 12 | ## Feature requests 13 | Feature requests are welcome. Please take a moment and describe the details of the new feature and why this is needed. Please provide as much detail and context as possible. 14 | 15 | ## Pull requests 16 | * Open a new issue 17 | * Fork the repository 18 | * Create a new feature branch named _contrib/issue-number_ 19 | * Make your changes 20 | * Test the code (i.e. write some unit tests) 21 | * Create and submit a pull request 22 | * Watch the build state of TravisCI 23 | * Wait for the merge 24 | 25 | ## License Agreement 26 | By contributing your code, you agree to license your contribution under the terms of the Apache 2.0 license. 27 | -------------------------------------------------------------------------------- /mysql_cluster_manager/src/mcm/utils.py: -------------------------------------------------------------------------------- 1 | """This file contains the utils of the cluster manager""" 2 | 3 | import os 4 | import time 5 | 6 | from datetime import datetime 7 | 8 | import netifaces 9 | 10 | from mcm.minio import Minio 11 | 12 | 13 | class Utils: 14 | """ 15 | Utilities for the project 16 | """ 17 | 18 | @staticmethod 19 | def get_local_ip_address(): 20 | """ 21 | Get the local IP Address 22 | """ 23 | 24 | interface = os.getenv('MCM_BIND_INTERFACE', "eth0") 25 | return netifaces.ifaddresses(interface)[netifaces.AF_INET][0]["addr"] 26 | 27 | @staticmethod 28 | def is_refresh_needed(last_execution, max_timedelta): 29 | """ 30 | Is a new execution needed, based on the time delta 31 | """ 32 | if last_execution is None: 33 | return True 34 | 35 | return datetime.now() - last_execution > max_timedelta 36 | 37 | @staticmethod 38 | def wait_for_backup_exists(consul): 39 | """ 40 | Wait for a backup to be occour 41 | """ 42 | 43 | Minio.setup_connection() 44 | 45 | retry_counter = 100 46 | 47 | for _ in range(retry_counter): 48 | backup_exists = Minio.does_backup_exists() 49 | 50 | if backup_exists: 51 | return True 52 | 53 | # Keep consul sessions alive 54 | consul.refresh_sessions() 55 | time.sleep(5000) 56 | 57 | return False 58 | -------------------------------------------------------------------------------- /deployment/mysql-docker-swarm.yml: -------------------------------------------------------------------------------- 1 | version: "3.8" 2 | 3 | networks: 4 | backend: 5 | 6 | volumes: 7 | backup-volume: 8 | 9 | services: 10 | consul: 11 | image: consul:1.9 12 | networks: 13 | backend: 14 | aliases: 15 | - consul_cluster 16 | environment: 17 | - CONSUL_BIND_INTERFACE=eth0 18 | command: agent -ui -data-dir /consul/data -server -client 0.0.0.0 -retry-join consul_cluster -bootstrap-expect=5 19 | deploy: 20 | replicas: 5 21 | endpoint_mode: dnsrr 22 | placement: 23 | max_replicas_per_node: 1 24 | update_config: 25 | parallelism: 1 26 | delay: 60s 27 | restart_policy: 28 | condition: on-failure 29 | ports: 30 | - target: 8500 31 | published: 8500 32 | protocol: tcp 33 | mode: host 34 | 35 | mysql: 36 | image: jnidzwetzki/mysql-ha-cloud:latest 37 | networks: 38 | backend: 39 | environment: 40 | - CONSUL_BIND_INTERFACE=eth1 41 | - CONSUL_BOOTSTRAP_SERVER=consul_cluster 42 | - MINIO_ACCESS_KEY=minio 43 | - MINIO_SECRET_KEY=minio123 44 | - MINIO_URL=http://minio:9000 45 | - MCM_BIND_INTERFACE=eth1 46 | - MYSQL_ROOT_PASSWORD=verysecret123 47 | - MYSQL_BACKUP_USER=backup_user 48 | - MYSQL_BACKUP_PASSWORD=backup_secret 49 | - MYSQL_REPLICATION_USER=replication_user 50 | - MYSQL_REPLICATION_PASSWORD=replication_secret 51 | - MYSQL_APPLICATION_USER=mysql_user 52 | - MYSQL_APPLICATION_PASSWORD=mysql_secret 53 | deploy: 54 | replicas: 3 55 | placement: 56 | max_replicas_per_node: 1 57 | update_config: 58 | parallelism: 1 59 | delay: 60s 60 | restart_policy: 61 | condition: on-failure 62 | ports: 63 | - 6032:6032 64 | - 3306:6033 65 | 66 | minio: 67 | image: minio/minio:RELEASE.2020-10-18T21-54-12Z 68 | networks: 69 | backend: 70 | aliases: 71 | - minio_endpoint 72 | volumes: 73 | - backup-volume:/data 74 | ports: 75 | - 9000:9000 76 | environment: 77 | - MINIO_ACCESS_KEY=minio 78 | - MINIO_SECRET_KEY=minio123 79 | command: server /data 80 | -------------------------------------------------------------------------------- /mysql_cluster_manager/src/mysql_cluster_manager.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | """This file is part of the MySQL cluster manager""" 4 | 5 | import os 6 | import sys 7 | import logging 8 | import argparse 9 | 10 | from mcm.actions import Actions 11 | from mcm.consul import Consul 12 | from mcm.mysql import Mysql 13 | from mcm.proxysql import Proxysql 14 | 15 | parser = argparse.ArgumentParser( 16 | description="MySQL cluster manager", 17 | epilog="For more info, please see: https://github.com/jnidzwetzki/mysql-ha-cloud") 18 | 19 | AVAILABLE_OPERATIONS = "(join_or_bootstrap, mysql_backup, mysql_restore, mysql_start, mysql_stop)" 20 | parser.add_argument('operation', metavar='operation', 21 | help=f'Operation to be executed {AVAILABLE_OPERATIONS}') 22 | 23 | log_levels = ('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL') 24 | parser.add_argument('--log-level', default='INFO', choices=log_levels) 25 | 26 | # Parse args 27 | args = parser.parse_args() 28 | 29 | # Configure logging 30 | logging.basicConfig(level=args.log_level, 31 | format='%(asctime)-15s %(levelname)s %(name)s %(message)s') 32 | 33 | # Check for all needed env vars 34 | required_envvars = ['CONSUL_BIND_INTERFACE', 'CONSUL_BOOTSTRAP_SERVER', 35 | 'MINIO_ACCESS_KEY', 'MINIO_SECRET_KEY', 'MINIO_URL', 36 | 'MYSQL_ROOT_PASSWORD', 'MYSQL_BACKUP_USER', 'MYSQL_BACKUP_PASSWORD', 37 | 'MYSQL_REPLICATION_USER', 'MYSQL_REPLICATION_PASSWORD'] 38 | 39 | for required_var in required_envvars: 40 | if not required_var in os.environ: 41 | logging.error("Required environment %s not found, exiting", required_var) 42 | sys.exit(1) 43 | 44 | # Perform operations 45 | if args.operation == 'join_or_bootstrap': 46 | Actions.join_or_bootstrap() 47 | elif args.operation == 'mysql_backup': 48 | Mysql.backup_data() 49 | elif args.operation == 'mysql_restore': 50 | Mysql.restore_backup() 51 | elif args.operation == 'mysql_start': 52 | Mysql.server_start() 53 | elif args.operation == 'mysql_stop': 54 | Mysql.server_stop() 55 | elif args.operation == 'mysql_autobackup': 56 | Mysql.create_backup_if_needed() 57 | elif args.operation == 'proxysql_init': 58 | Proxysql.inital_setup() 59 | nodes = Consul.get_instance().get_all_registered_nodes() 60 | Proxysql.set_mysql_server(nodes) 61 | else: 62 | logging.error("Unknown operation: %s", {args.operation}) 63 | sys.exit(1) 64 | -------------------------------------------------------------------------------- /Dockerfile-mysql: -------------------------------------------------------------------------------- 1 | # 2 | # Build with: 3 | # 4 | # docker build -t jnidzwetzki/mysql-ha-cloud:latest -f Dockerfile-mysql . 5 | # 6 | ################# 7 | 8 | FROM mysql:8.0.21 9 | 10 | SHELL ["/bin/bash", "-c"] 11 | WORKDIR /cluster 12 | 13 | COPY ./mysql_cluster_manager/src . 14 | COPY ./mysql_cluster_manager/requirements.txt . 15 | COPY ./entry-point.sh . 16 | 17 | RUN \ 18 | # \ 19 | # Install GPG Key \ 20 | # \ 21 | apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 467B942D3A79BD29 && \ 22 | # \ 23 | # Pin MySQL to 8.0.21 due to: https://jira.percona.com/browse/PXB-2315 \ 24 | # \ 25 | apt-mark hold mysql-common mysql-community-client mysql-community-client-core mysql-community-server-core && \ 26 | # \ 27 | # Run System Upgrade \ 28 | # \ 29 | apt-get update && \ 30 | apt-get upgrade -y && \ 31 | # \ 32 | # Install system basics \ 33 | # \ 34 | apt-get install -y unzip curl wget gnupg2 lsb-release procps && \ 35 | # \ 36 | # Install percona XtraBackup \ 37 | # \ 38 | apt-get install -y libdbd-mysql-perl libcurl4-openssl-dev rsync libev4 && \ 39 | wget https://www.percona.com/downloads/Percona-XtraBackup-LATEST/Percona-XtraBackup-8.0.14/binary/debian/buster/x86_64/percona-xtrabackup-80_8.0.14-1.buster_amd64.deb -O /tmp/xtrabackup.deb && \ 40 | dpkg -i /tmp/xtrabackup.deb && \ 41 | rm /tmp/xtrabackup.deb && \ 42 | # \ 43 | # Install consul \ 44 | # \ 45 | wget https://releases.hashicorp.com/consul/1.8.4/consul_1.8.4_linux_amd64.zip -O /tmp/consul.zip && \ 46 | echo "220b0af8e439d2fe3fc7e1ca07bdbda1f3ee5b2fa889983c04e7004d99ade5ece005b45e1288bfcbe2bf847f23d35684845bd6edbf59fe4220be8e9e83f05439 /tmp/consul.zip" | sha512sum -c && \ 47 | unzip /tmp/consul.zip -d /usr/local/bin && \ 48 | rm /tmp/consul.zip && \ 49 | # \ 50 | # Install minIO client \ 51 | # \ 52 | wget https://dl.min.io/client/mc/release/linux-amd64/mc -O /usr/local/bin/mc && \ 53 | chmod +x /usr/local/bin/mc && \ 54 | # \ 55 | # Install mysql cluster manager \ 56 | # \ 57 | apt-get install -y python3.7 python3.7-dev python3-pip && \ 58 | pip3 install -r requirements.txt && \ 59 | # \ 60 | # Install ProxySQL \ 61 | # \ 62 | wget https://github.com/sysown/proxysql/releases/download/v2.0.15/proxysql_2.0.15-debian10_amd64.deb && \ 63 | dpkg -i proxysql_2.0.15-debian10_amd64.deb && \ 64 | rm proxysql_2.0.15-debian10_amd64.deb 65 | 66 | CMD ["bash", "entry-point.sh"] 67 | EXPOSE 6032/tcp 68 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ "main" ] 17 | pull_request: 18 | # The branches below must be a subset of the branches above 19 | branches: [ "main" ] 20 | schedule: 21 | - cron: '23 3 * * 5' 22 | 23 | jobs: 24 | analyze: 25 | name: Analyze 26 | runs-on: ubuntu-latest 27 | permissions: 28 | actions: read 29 | contents: read 30 | security-events: write 31 | 32 | strategy: 33 | fail-fast: false 34 | matrix: 35 | language: [ 'python' ] 36 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ] 37 | # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support 38 | 39 | steps: 40 | - name: Checkout repository 41 | uses: actions/checkout@v3 42 | 43 | # Initializes the CodeQL tools for scanning. 44 | - name: Initialize CodeQL 45 | uses: github/codeql-action/init@v2 46 | with: 47 | languages: ${{ matrix.language }} 48 | # If you wish to specify custom queries, you can do so here or in a config file. 49 | # By default, queries listed here will override any specified in a config file. 50 | # Prefix the list here with "+" to use these queries and those in the config file. 51 | 52 | # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs 53 | # queries: security-extended,security-and-quality 54 | 55 | 56 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java). 57 | # If this step fails, then you should remove it and run the build manually (see below) 58 | - name: Autobuild 59 | uses: github/codeql-action/autobuild@v2 60 | 61 | # ℹ️ Command-line programs to run using the OS shell. 62 | # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun 63 | 64 | # If the Autobuild fails above, remove it and uncomment the following three lines. 65 | # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. 66 | 67 | # - run: | 68 | # echo "Run, Build Application using script" 69 | # ./location_of_script_within_repo/buildscript.sh 70 | 71 | - name: Perform CodeQL Analysis 72 | uses: github/codeql-action/analyze@v2 73 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behavior that contributes to creating a positive environment include: 10 | 11 | * Using welcoming and inclusive language 12 | * Being respectful of differing viewpoints and experiences 13 | * Gracefully accepting constructive criticism 14 | * Focusing on what is best for the community 15 | * Showing empathy towards other community members 16 | 17 | Examples of unacceptable behavior by participants include: 18 | 19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 20 | * Trolling, insulting/derogatory comments, and personal or political attacks 21 | * Public or private harassment 22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 23 | * Other conduct which could reasonably be considered inappropriate in a professional setting 24 | 25 | ## Our Responsibilities 26 | 27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 28 | 29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 30 | 31 | ## Scope 32 | 33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 34 | 35 | ## Enforcement 36 | 37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at jnidzwetzki@gmx.de. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 38 | 39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 40 | 41 | ## Attribution 42 | 43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] 44 | 45 | [homepage]: http://contributor-covenant.org 46 | [version]: http://contributor-covenant.org/version/1/4/ 47 | -------------------------------------------------------------------------------- /mysql_cluster_manager/src/mcm/minio.py: -------------------------------------------------------------------------------- 1 | """This file is part of the MySQL cluster manager""" 2 | 3 | import os 4 | import logging 5 | import datetime 6 | import subprocess 7 | 8 | class Minio: 9 | """ 10 | This class encapsulates all Minio related things 11 | """ 12 | 13 | minio_binary = "/usr/local/bin/mc" 14 | 15 | @staticmethod 16 | def setup_connection(): 17 | """ 18 | Setup the MinIO agent. 19 | """ 20 | 21 | logging.info("Setup MinIO agent") 22 | 23 | minio_url = os.environ.get("MINIO_URL") 24 | minio_access_key = os.environ.get("MINIO_ACCESS_KEY") 25 | minio_secret_key = os.environ.get("MINIO_SECRET_KEY") 26 | 27 | bucket_name = "backup/mysqlbackup" 28 | 29 | # Register server 30 | mc_args = [Minio.minio_binary, "alias", "set", "backup", 31 | minio_url, minio_access_key, minio_secret_key] 32 | subprocess.run(mc_args, check=True) 33 | 34 | # Create bucket 35 | mc_create_bucket = [Minio.minio_binary, "mb", bucket_name, "-p"] 36 | subprocess.run(mc_create_bucket, check=True) 37 | 38 | # Set expire policy on bucket 39 | mc_set_policy_bucket = [Minio.minio_binary, "ilm", "edit", "--id=expire_rule", 40 | "-expiry-days=7", bucket_name] 41 | subprocess.run(mc_set_policy_bucket, check=True) 42 | 43 | @staticmethod 44 | def get_backup_info(): 45 | """ 46 | Get the information about backups 47 | """ 48 | # Call Setup to ensure bucket and connection do exist 49 | Minio.setup_connection() 50 | 51 | logging.debug("Searching for latest MySQL Backup") 52 | mc_search = [Minio.minio_binary, "find", "backup/mysqlbackup/", "--name", 53 | "mysql*.tgz", "-print", "{time} # {base}"] 54 | 55 | # mc find backup/mysqlbackup/ --name "mysql*.tgz" -print '{time} # {base}' 56 | # 2020-11-08 08:42:12 UTC # mysql_backup_1604824911.437146.tgz 57 | # 2020-11-08 08:50:53 UTC # mysql_backup_1604825437.6691067.tgz 58 | # 2020-11-08 08:55:03 UTC # mysql_backup_1604825684.9835322.tgz 59 | 60 | process = subprocess.run(mc_search, check=True, capture_output=True) 61 | files = process.stdout.splitlines() 62 | 63 | return files 64 | 65 | @staticmethod 66 | def does_backup_exists(): 67 | """ 68 | Does a old backups exists? 69 | """ 70 | files = Minio.get_backup_info() 71 | 72 | if not files: 73 | logging.debug("S3 Bucket is empty") 74 | return False 75 | 76 | return True 77 | 78 | @staticmethod 79 | def get_latest_backup(): 80 | """ 81 | Get the latest backup filename from the bucket 82 | """ 83 | files = Minio.get_backup_info() 84 | 85 | newest_changedate = None 86 | newest_file = None 87 | 88 | # Take the newest file 89 | for element in files: 90 | element_changedate, element_filename = element.decode().split("#") 91 | 92 | # Remove empty chars after split 93 | element_changedate = element_changedate.strip() 94 | element_filename = element_filename.strip() 95 | 96 | element_change_date = datetime.datetime.strptime(element_changedate, 97 | '%Y-%m-%d %H:%M:%S UTC') 98 | 99 | if (newest_changedate is None) or (element_change_date > newest_changedate): 100 | newest_changedate = element_change_date 101 | newest_file = element_filename 102 | 103 | logging.debug("Newest backup file '%s', date '%s'", newest_file, newest_changedate) 104 | 105 | return (newest_file, newest_changedate) 106 | -------------------------------------------------------------------------------- /mysql_cluster_manager/src/mcm/proxysql.py: -------------------------------------------------------------------------------- 1 | """This file contains the ProxySQL related actions""" 2 | 3 | import os 4 | import logging 5 | import subprocess 6 | 7 | from mcm.mysql import Mysql 8 | 9 | class Proxysql: 10 | """ 11 | This class encapsulates all ProxySQL related things 12 | """ 13 | 14 | def __init__(self): 15 | """ 16 | Init the instance 17 | """ 18 | self.configured_mysql_hosts = () 19 | 20 | 21 | @staticmethod 22 | def inital_setup(): 23 | """ 24 | Inital setup of ProxySQL 25 | """ 26 | logging.info("Performing initial ProxySQL setup") 27 | 28 | # Setup Monitoring User 29 | replication_user = os.environ.get("MYSQL_REPLICATION_USER") 30 | replication_password = os.environ.get("MYSQL_REPLICATION_PASSWORD") 31 | 32 | Proxysql.perform_sql_query(f"UPDATE global_variables SET variable_value='{replication_user}' " 33 | "WHERE variable_name='mysql-monitor_username'") 34 | Proxysql.perform_sql_query(f"UPDATE global_variables SET variable_value='{replication_password}' " 35 | "WHERE variable_name='mysql-monitor_password'") 36 | 37 | # Configure read write hostgroup (writer = 1, reader = 2) 38 | Proxysql.perform_sql_query("DELETE FROM mysql_replication_hostgroups") 39 | Proxysql.perform_sql_query("INSERT INTO mysql_replication_hostgroups " 40 | "(writer_hostgroup, reader_hostgroup,comment) VALUES (1, 2, 'cluster1')") 41 | 42 | # Configure read write split 43 | Proxysql.perform_sql_query("INSERT INTO mysql_query_rules (active, match_digest, " 44 | "destination_hostgroup, apply) VALUES (1, '^SELECT.*', 2, 0)") 45 | Proxysql.perform_sql_query("INSERT INTO mysql_query_rules (active, match_digest, " 46 | "destination_hostgroup, apply) VALUES (1, '^SELECT.*FOR UPDATE', 1, 1)") 47 | 48 | # Configure Application User 49 | application_user = os.environ.get("MYSQL_APPLICATION_USER") 50 | application_password = os.environ.get("MYSQL_APPLICATION_PASSWORD") 51 | 52 | Proxysql.perform_sql_query("DELETE FROM mysql_users") 53 | Proxysql.perform_sql_query("INSERT INTO mysql_users(username, password, default_hostgroup) " 54 | f"VALUES ('{application_user}', '{application_password}', 1)") 55 | 56 | # Persist and activate config 57 | Proxysql.persist_and_activate_config() 58 | 59 | @staticmethod 60 | def persist_and_activate_config(): 61 | """ 62 | Persist and activate the ProxySQL configuration 63 | """ 64 | Proxysql.perform_sql_query("LOAD MYSQL VARIABLES TO RUNTIME") 65 | Proxysql.perform_sql_query("LOAD MYSQL SERVERS TO RUNTIME") 66 | Proxysql.perform_sql_query("LOAD MYSQL USERS TO RUNTIME") 67 | Proxysql.perform_sql_query("LOAD MYSQL QUERY RULES TO RUNTIME") 68 | 69 | Proxysql.perform_sql_query("SAVE MYSQL VARIABLES TO DISK") 70 | Proxysql.perform_sql_query("SAVE MYSQL SERVERS TO DISK") 71 | Proxysql.perform_sql_query("SAVE MYSQL USERS TO DISK") 72 | Proxysql.perform_sql_query("SAVE MYSQL QUERY RULES TO DISK") 73 | 74 | @staticmethod 75 | def set_mysql_server(mysql_servers): 76 | """ 77 | Set the backend MySQL server 78 | """ 79 | logging.info("Removing all old backend MySQL Server") 80 | Proxysql.perform_sql_query("DELETE FROM mysql_servers") 81 | 82 | for mysql_server in mysql_servers: 83 | logging.info("Adding %s as backend MySQL Server", mysql_server) 84 | Proxysql.perform_sql_query("INSERT INTO mysql_servers(hostgroup_id, hostname, port) " 85 | f"VALUES (1, '{mysql_server}', 3306)") 86 | 87 | Proxysql.perform_sql_query("LOAD MYSQL SERVERS TO RUNTIME") 88 | Proxysql.perform_sql_query("SAVE MYSQL SERVERS TO DISK") 89 | 90 | def update_mysql_server_if_needed(self, current_mysql_servers): 91 | """ 92 | Update the MySQL-Servers if needed (changed) 93 | """ 94 | current_mysql_servers.sort() 95 | 96 | if self.configured_mysql_hosts != current_mysql_servers: 97 | logging.info("MySQL backend has changed (old=%s, new=%s), reconfiguring", 98 | self.configured_mysql_hosts, current_mysql_servers) 99 | Proxysql.set_mysql_server(current_mysql_servers) 100 | self.configured_mysql_hosts = current_mysql_servers 101 | return True 102 | 103 | return False 104 | 105 | @staticmethod 106 | def perform_sql_query(sql): 107 | """ 108 | Perform a SQL query 109 | """ 110 | Mysql.execute_statement_or_exit(sql=sql, username="admin", password="admin", database="", port=6032) 111 | 112 | @staticmethod 113 | def start_proxysql(): 114 | """ 115 | Start the ProxySQL 116 | """ 117 | 118 | # Init proxysql 119 | proxysql_init = ["/usr/bin/proxysql", "--idle-threads", "-c", "/etc/proxysql.cnf", "--initial"] 120 | subprocess.run(proxysql_init, check=True) 121 | 122 | # Start the proxysql 123 | # proxysql = ["/usr/bin/proxysql", "--idle-threads", "-c", "/etc/proxysql.cnf"] 124 | # subprocess.run(proxysql, check=True) 125 | 126 | return True 127 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MySQL-HA-Cloud - A Highly-Available Self-Hosted MySQL Cloud Container Orchestrator 2 | 3 | Build Status 4 | 5 | 6 | 7 | 8 | Join the chat at https://gitter.im/mysql-ha-cloud/Lobby 9 | 10 | 11 | 12 |
13 | This project provides a container image for a highly-available MySQL installation that can be deployed to Kubernetes or Docker Swarm environments. 14 |
15 |
16 | 17 | 18 | **Project state:** Beta version available 19 | 20 | ## Architecture 21 | 22 | 23 | The `mysql-ha-cloud` container image contains a [MySQL 8.0 Server](https://dev.mysql.com/doc/relnotes/mysql/8.0/en/), [Consul](https://www.hashicorp.com/products/consul) for the service discovery, health checks of the nodes, and the MySQL replication leader election. [ProxySQL](https://proxysql.com/) provides the entry point for the client; the software forwards the connections of the client to the MySQL nodes. Write requests are send to the replication leader, and read requests are sent to the replication follower. In addition, [MinIO](https://min.io/) is used as backup storage and to bootstrap the replication follower. Backups are created by using [XtraBackup](https://www.percona.com/software/mysql-database/percona-xtrabackup) without creating table locks. 24 | 25 | Container Orchestrators like [Kubernetes](https://kubernetes.io/) or [Docker Swarm](https://docs.docker.com/get-started/swarm-deploy/) can be used to deploy the provided [container image](https://hub.docker.com/repository/docker/jnidzwetzki/mysql-ha-cloud). 26 | 27 | The complete architecture is highly-available; failing and newly started containers are handled automatically. A new replication leader is automatically elected if the current leader fails. ProxySQL redirects database connections transparently to the nodes; the complete distribution and fail-over logic are hidden from the client applications. The solution is also horizontal scalable, new MySQL replication follower nodes can be added, and the query load is automatically distributed to these nodes. 28 | 29 | ## Features 30 | 31 | * ✅ Automatic (non locking) backups on S3 Buckets using Xtrabackup and MinIO 32 | * ✅ Automatic MySQL replication leader election 33 | * ✅ Automatic MySQL replication leader configuration and fail-over 34 | * ✅ Automatic MySQL replication follower configuration 35 | * ✅ Automatic MySQL provisioning 36 | * ✅ Transparent connection routing for read-/write-splits using ProxySQL 37 | * ✅ Horizontal scalable 38 | * ✅ Compatible with Kubernetes and Docker Swarm 39 | 40 | ## What is The Main Focus of This Project? 41 | 42 | This project provides a robust, tested, and easy to deploy container image for self-hosted MySQL cloud installations. The goal is that everybody can deploy highly-available and scalable MySQL installations and eliminate the DBMS as a single point of failure in his architecture. 43 | 44 | ## Why Do I Need MySQL-HA-Cloud? 45 | 46 | In today's software development, robust applications are often developed as stateless cloud-native containers. Such containers can be easily moved between hosts, automatically restarted on failures, and replicated to handle increasing workloads. On the other hand, data are stored in relational database systems (RDBMS), which are often running on bare-metal hardware. Relational databases are stateful applications that are hard to scale, and they are often a single point of failure; high availability (HA) is rarely implemented. 47 | 48 | ## Are NoSQL Databases a Solution? 49 | 50 | NoSQL databases are mostly cloud-native applications; however, they leak of the support of a full flagged relational database. Features such as transactions, complex data models, or consistency are omitted to make these systems horizontal scalable and fault-tolerant. However, simple tasks that can easily be implemented by using a relational database (e.g., an increasing counter, secondary indexes, isolation of uncommitted data, or joins) can be hard to implement. Therefore, relational databases are still used by moderns applications. 51 | 52 | ## Deployment and Usage Examples 53 | * Deploymnet using [Docker Swarm](docs/deployment-docker-swarm.md) 54 | * Deploymnet using [Kubernetes](docs/deployment-kubernetes.md) 55 | 56 | ## Are There Other Solutions? 57 | 58 | Of course, there are other projects that also focus on highly available MySQL systems. For instance: 59 | 60 | * [MySQL replication](https://dev.mysql.com/doc/refman/8.0/en/replication.html) 61 | * [Galera cluster for MySQL](https://galeracluster.com/products/) 62 | * [MySQL InnoDB Cluster](https://dev.mysql.com/doc/refman/8.0/en/admin-api-userguide.html) 63 | * [Signal 18 replication manager](https://signal18.io/products/srm) 64 | * [Autopilot pattern for MySQL](https://github.com/autopilotpattern/mysql) 65 | * [Percona Kubernetes Operator for Percona XtraDB Cluster](https://www.percona.com/doc/kubernetes-operator-for-pxc/index.html) 66 | 67 | ## What's next? 68 | * If you like the project, please give it a star on GitHub! 69 | * For more information see [https://github.com/jnidzwetzki](https://github.com/jnidzwetzki). 70 | -------------------------------------------------------------------------------- /deployment/mysql-kubernetes-iscsi.yml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | name: chap-secret 5 | type: "kubernetes.io/iscsi-chap" 6 | data: 7 | node.session.auth.username_in: bXl1c2VyLW91dGdvaW5n 8 | node.session.auth.password_in: bXlwYXNzMg== 9 | node.session.auth.username: bXl1c2VyLWluY29taW5n 10 | node.session.auth.password: bXlwYXNzMQ== 11 | 12 | --- 13 | 14 | apiVersion: apps/v1 15 | kind: Deployment 16 | metadata: 17 | name: minio 18 | spec: 19 | selector: 20 | matchLabels: 21 | app: minio # has to match .spec.template.metadata.labels 22 | strategy: 23 | type: Recreate 24 | template: 25 | metadata: 26 | labels: 27 | app: minio #This label is used as a selector in Service definition 28 | spec: 29 | containers: 30 | - name: minio 31 | image: minio/minio:RELEASE.2020-10-18T21-54-12Z 32 | args: 33 | - server 34 | - /data 35 | env: 36 | - name: MINIO_ACCESS_KEY 37 | value: "minio" 38 | - name: MINIO_SECRET_KEY 39 | value: "minio123" 40 | ports: 41 | - containerPort: 9000 42 | volumeMounts: 43 | - mountPath: "/data" 44 | name: iscsivol 45 | tolerations: 46 | - key: "node.kubernetes.io/unreachable" 47 | operator: "Exists" 48 | effect: "NoExecute" 49 | tolerationSeconds: 30 50 | - key: "node.kubernetes.io/not-ready" 51 | operator: "Exists" 52 | effect: "NoExecute" 53 | tolerationSeconds: 30 54 | 55 | volumes: 56 | - name: iscsivol 57 | iscsi: 58 | targetPortal: 192.168.178.199 59 | iqn: iqn.2020-12.block-storage:lun1 60 | lun: 1 61 | fsType: ext4 62 | readOnly: false 63 | chapAuthDiscovery: false 64 | chapAuthSession: true 65 | secretRef: 66 | name: chap-secret 67 | --- 68 | 69 | apiVersion: v1 70 | kind: Service 71 | metadata: 72 | name: minio 73 | spec: 74 | type: NodePort 75 | ports: 76 | - port: 9000 77 | targetPort: 9000 78 | nodePort: 30013 79 | protocol: TCP 80 | selector: 81 | # Looks for labels `app:minio` in the namespace and applies the spec 82 | app: minio 83 | 84 | --- 85 | 86 | apiVersion: v1 87 | kind: Service 88 | metadata: 89 | name: consul 90 | labels: 91 | app: consul 92 | spec: 93 | type: NodePort 94 | ports: 95 | - port: 8500 96 | targetPort: 8500 97 | nodePort: 30014 98 | protocol: TCP 99 | selector: 100 | # Looks for labels `app:consul` in the namespace and applies the spec 101 | app: consul 102 | 103 | --- 104 | 105 | apiVersion: apps/v1 106 | kind: StatefulSet 107 | metadata: 108 | name: consul 109 | spec: 110 | selector: 111 | matchLabels: 112 | app: consul # has to match .spec.template.metadata.labels 113 | serviceName: "consul" 114 | replicas: 3 115 | template: 116 | metadata: 117 | labels: 118 | app: consul # has to match .spec.selector.matchLabels 119 | spec: 120 | terminationGracePeriodSeconds: 10 121 | containers: 122 | - name: consul 123 | image: consul:1.9 124 | args: 125 | - agent 126 | - -ui 127 | - -data-dir 128 | - /consul/data 129 | - -server 130 | - -client 131 | - 0.0.0.0 132 | - -retry-join 133 | - consul-0.consul 134 | - -retry-join 135 | - consul-1.consul 136 | - -retry-join 137 | - consul-2.consul 138 | - -bootstrap-expect=3 139 | ports: 140 | - containerPort: 8500 141 | name: web 142 | --- 143 | 144 | apiVersion: apps/v1 145 | kind: StatefulSet 146 | metadata: 147 | name: mysql 148 | spec: 149 | selector: 150 | matchLabels: 151 | app: mysql # has to match .spec.template.metadata.labels 152 | serviceName: "mysql" 153 | replicas: 3 154 | template: 155 | metadata: 156 | labels: 157 | app: mysql # has to match .spec.selector.matchLabels 158 | spec: 159 | terminationGracePeriodSeconds: 10 160 | containers: 161 | - name: mysql 162 | image: jnidzwetzki/mysql-ha-cloud:latest 163 | ports: 164 | - containerPort: 3306 165 | name: mysql 166 | - containerPort: 6032 167 | name: sqlproxy 168 | env: 169 | - name: CONSUL_BIND_INTERFACE 170 | value: eth0 171 | - name: CONSUL_BOOTSTRAP_SERVER 172 | value: "consul-0.consul" 173 | - name: MINIO_ACCESS_KEY 174 | value: minio 175 | - name: MINIO_SECRET_KEY 176 | value: minio123 177 | - name: MINIO_URL 178 | value: http://minio:9000 179 | - name: MCM_BIND_INTERFACE 180 | value: eth0 181 | - name: MYSQL_ROOT_PASSWORD 182 | value: verysecret123 183 | - name: MYSQL_BACKUP_USER 184 | value: backup_user 185 | - name: MYSQL_BACKUP_PASSWORD 186 | value: backup_secret 187 | - name: MYSQL_REPLICATION_USER 188 | value: replication_user 189 | - name: MYSQL_REPLICATION_PASSWORD 190 | value: replication_secret 191 | - name: MYSQL_APPLICATION_USER 192 | value: mysql_user 193 | - name: MYSQL_APPLICATION_PASSWORD 194 | value: mysql_secret 195 | 196 | --- 197 | 198 | apiVersion: v1 199 | kind: Service 200 | metadata: 201 | name: mysql 202 | labels: 203 | app: mysql 204 | spec: 205 | type: NodePort 206 | ports: 207 | - port: 3306 208 | targetPort: 3306 209 | nodePort: 30015 210 | protocol: TCP 211 | name: mysql 212 | - port: 6032 213 | targetPort: 6032 214 | nodePort: 30016 215 | protocol: TCP 216 | name: mysqlproxy 217 | selector: 218 | # Looks for labels `app:mysql` in the namespace and applies the spec 219 | app: mysql 220 | -------------------------------------------------------------------------------- /mysql_cluster_manager/src/mcm/actions.py: -------------------------------------------------------------------------------- 1 | """This file contains the actions of the cluster manager""" 2 | 3 | import sys 4 | import time 5 | import logging 6 | 7 | from datetime import timedelta, datetime 8 | 9 | from mcm.consul import Consul 10 | from mcm.minio import Minio 11 | from mcm.mysql import Mysql 12 | from mcm.proxysql import Proxysql 13 | from mcm.utils import Utils 14 | 15 | class Actions: 16 | """The actions of the application""" 17 | 18 | @staticmethod 19 | def join_or_bootstrap(): 20 | """ 21 | Join the existing cluster or bootstrap a new cluster 22 | """ 23 | 24 | # Start the local consul agent 25 | consul_process = Consul.agent_start() 26 | 27 | # Check if we have an existing backup to restore 28 | # Use this backup if exists, or init a new MySQL database 29 | Minio.setup_connection() 30 | backup_exists = Minio.does_backup_exists() 31 | 32 | # Test for unstable environment (other nodes are present and no leader is present) 33 | # We don't want to become the new leader on the restored backup directly 34 | # 35 | # Needs be be checked before Consul.get_instance().register_node() is called 36 | # 37 | while Consul.get_instance().get_replication_leader_ip() is None: 38 | nodes = Consul.get_instance().get_all_registered_nodes() 39 | if len(nodes) == 0: 40 | break 41 | 42 | logging.warning("Other nodes (%s) detected but no leader, waiting", nodes) 43 | time.sleep(5) 44 | 45 | # Try to become session leader (needed to decide if we can create a database) 46 | replication_leader = Consul.get_instance().try_to_become_replication_leader() 47 | 48 | # Keep session alive until we start the main loop 49 | Consul.get_instance().start_session_auto_refresh_thread() 50 | 51 | logging.info("Init local node (leader=%s, backup=%s)", 52 | replication_leader, backup_exists) 53 | 54 | if replication_leader and not backup_exists: 55 | Mysql.init_database_if_needed() 56 | elif replication_leader and backup_exists: 57 | Mysql.restore_backup_or_exit() 58 | elif not replication_leader and backup_exists: 59 | Mysql.restore_backup_or_exit() 60 | elif not replication_leader and not backup_exists: 61 | logging.info("We are not the replication leader, waiting for backups") 62 | backup_exists = Utils.wait_for_backup_exists(Consul.get_instance()) 63 | 64 | if not backup_exists: 65 | logging.error("No backups to restore available, please check master logs, exiting") 66 | sys.exit(1) 67 | 68 | Mysql.restore_backup_or_exit() 69 | 70 | else: 71 | logging.error("This case should not happen (leader=%s, backup=%s)", 72 | replication_leader, backup_exists) 73 | sys.exit(1) 74 | 75 | # Start ProxySQL 76 | Proxysql.start_proxysql() 77 | 78 | # Start MySQL 79 | mysql_process = Mysql.server_start() 80 | 81 | # Configure ProxySQL 82 | Proxysql.inital_setup() 83 | 84 | # Get data from MySQL 85 | mysql_version = Mysql.execute_query_as_root("SELECT version()")[0]['version()'] 86 | server_id = Mysql.execute_query_as_root("SELECT @@GLOBAL.server_id")[0]['@@GLOBAL.server_id'] 87 | 88 | Consul.get_instance().register_node(mysql_version=mysql_version, 89 | server_id=server_id) 90 | 91 | # Remove the old replication configuration (e.g., from backup) 92 | Mysql.delete_replication_config() 93 | 94 | # Register service as leader or follower 95 | Consul.get_instance().register_service(replication_leader) 96 | 97 | # Session keep alive will be handled by the main event loop 98 | Consul.get_instance().stop_session_auto_refresh_thread() 99 | 100 | # Run the main event loop 101 | Actions.join_main_event_loop(consul_process, mysql_process) 102 | 103 | @staticmethod 104 | def join_main_event_loop(consul_process, mysql_process): 105 | """ 106 | The main event loop for the join_or_bootstrap action 107 | """ 108 | 109 | last_backup_check = None 110 | last_session_refresh = None 111 | last_replication_leader_check = None 112 | able_to_become_leader = False 113 | 114 | proxysql = Proxysql() 115 | 116 | # Main Loop, heavy operations needs to be dispatched 117 | # to an extra thread. The loop needs to refresh the 118 | # Consul sessions every few seconds. 119 | while True: 120 | consul_process.poll() 121 | mysql_process.poll() 122 | 123 | # Try to replace a failed replication leader 124 | if Utils.is_refresh_needed(last_replication_leader_check, timedelta(seconds=5)): 125 | last_replication_leader_check = datetime.now() 126 | 127 | # Update ProxySQL nodes 128 | mysql_nodes = Consul.get_instance().get_all_registered_nodes() 129 | proxysql.update_mysql_server_if_needed(mysql_nodes) 130 | 131 | # Are the replication data completely processed 132 | # (i.e., the data from the leader is stored locally and we 133 | # can become the new leader?) 134 | if not able_to_become_leader: 135 | if Mysql.is_repliation_data_processed(): 136 | logging.info("All replication data are read, node can become replication leader") 137 | able_to_become_leader = True 138 | 139 | replication_leader = Consul.get_instance().is_replication_leader() 140 | 141 | # Try to become new leader 142 | if not replication_leader and able_to_become_leader: 143 | promotion = Consul.get_instance().try_to_become_replication_leader() 144 | 145 | # Are we the new leader? 146 | if promotion: 147 | Mysql.delete_replication_config() 148 | Consul.get_instance().register_service(True) 149 | replication_leader = True 150 | 151 | # Check for correct replication leader 152 | if not replication_leader: 153 | real_leader = Consul.get_instance().get_replication_leader_ip() 154 | configured_leader = Mysql.get_replication_leader_ip() 155 | 156 | if real_leader != configured_leader: 157 | logging.info("Replication leader change (old=%s, new=%s)", configured_leader, real_leader) 158 | Mysql.change_to_replication_client(real_leader) 159 | 160 | # Keep Consul sessions alive 161 | if Utils.is_refresh_needed(last_session_refresh, timedelta(seconds=5)): 162 | Consul.get_instance().refresh_sessions() 163 | last_session_refresh = datetime.now() 164 | 165 | # Create MySQL Backups (using extra thread for backup) 166 | if Utils.is_refresh_needed(last_backup_check, timedelta(minutes=5)): 167 | Consul.get_instance().start_session_auto_refresh_thread() 168 | Mysql.create_backup_if_needed() 169 | last_backup_check = datetime.now() 170 | Consul.get_instance().stop_session_auto_refresh_thread() 171 | 172 | time.sleep(1) 173 | -------------------------------------------------------------------------------- /docs/deployment-kubernetes.md: -------------------------------------------------------------------------------- 1 | # Example - Using Kubernetes 2 | 3 | To reproduce this example, you need a Kubernetes cluster with at least three worker nodes. The following services are deployed to the cluster: 4 | 5 | * Three Consul instances, they are used for the election of the primary MySQL server, for service discovery, and for providing additional information about the state of the cluster. 6 | * One of the MinIO object storage to store MySQL backups. These backups are used to bootstrap new MySQL replicas automatically. MinIO needs at least to provide four nodes / volumes to provide highly available. Therefore, a persistent iSCSI volume is used in this example. On this volume, you can also store a MySQL backup that is used to bootstrap the cluster. However, the persistent volume is not necessary. The solution also works without this volume. If the MinIO pod is started on another node, a new backup is created and uploaded automatically. 7 | * One primary MySQL server (read/write) and two read-only MySQL replicas. 8 | * An instance of [ProxySQL](https://github.com/sysown/proxysql) is available on every MySQL-Server. ProxySQL is used to access the MySQL installations. Write requests (e.g., `INSERT` or `UPDATE`) are automatically send to the replication leader, and read requests (e.g., `SELECT`) are sent to the replication follower. 9 | 10 | __Note:__ If you don't have a local Kubernetes installation, you can use [kubeadm](https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/create-cluster-kubeadm/) to setup such a cluster locally. In addition, you find a proper Ansible Playbook [here](https://github.com/jnidzwetzki/ansible-playbooks/tree/main/playbooks) to create such a cluster with `Docker` or `Container.io` as runtime. 11 | 12 | ### Step 1 - Check your Kubernetes installation 13 | 14 | Execute the command `kubectl get nodes` to check the state of your Kubernetes cluster. 15 | 16 | ```bash 17 | $ kubectl get nodes 18 | NAME STATUS ROLES AGE VERSION 19 | debian10-k8s-vm1 Ready master 3d3h v1.19.4 20 | debian10-k8s-vm2 Ready 3d3h v1.19.4 21 | debian10-k8s-vm3 Ready 3d2h v1.19.4 22 | debian10-k8s-vm4 Ready 24h v1.19.4 23 | ``` 24 | 25 | In this example, the node `debian10-k8s-vm1` is the contol node for the cluster. The nodes `debian10-k8s-vm2`, `debian10-k8s-vm3`, `debian10-k8s-vm4` are the worker nodes of the cluster. 26 | 27 | ### Step 2 - Deploy the Services 28 | 29 | Please download the [configuration](https://raw.githubusercontent.com/jnidzwetzki/mysql-ha-cloud/main/deployment/mysql-kubernetes-iscsi.yml) for Kubernetes and adjust the configuration according to your local settings. For example, when you use the persistent iSCSI volume, the iSCSI target settings need to be adjusted. 30 | 31 | ```bash 32 | $ curl https://raw.githubusercontent.com/jnidzwetzki/mysql-ha-cloud/main/deployment/mysql-kubernetes-iscsi.yml --output mysql-kubernetes-iscsi.yml 33 | $ kubectl create -f mysql-kubernetes-iscsi.yml 34 | secret/chap-secret created 35 | deployment.apps/minio created 36 | service/minio created 37 | service/consul created 38 | statefulset.apps/consul created 39 | statefulset.apps/mysql created 40 | service/mysql created 41 | ``` 42 | 43 | After the deployment is done, the available pods should look as follows: 44 | 45 | ```bash 46 | $ kubectl get pods 47 | NAME READY STATUS RESTARTS AGE 48 | consul-0 1/1 Running 0 3h49m 49 | consul-1 1/1 Running 0 2m43s 50 | consul-2 1/1 Running 0 2m41s 51 | minio-567b86887c-wlpdn 1/1 Running 0 3h49m 52 | mysql-0 1/1 Running 0 3h49m 53 | mysql-1 1/1 Running 0 88s 54 | mysql-2 1/1 Running 0 13s 55 | ``` 56 | 57 | In addition, the following services should be available: 58 | 59 | ```bash 60 | $ kubectl get services 61 | NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE 62 | consul NodePort 10.108.236.59 8500:30014/TCP 3h50m 63 | minio NodePort 10.100.165.38 9000:30013/TCP 3h50m 64 | mysql NodePort 10.103.124.5 3306:30015/TCP,6032:30016/TCP 3h50m 65 | ``` 66 | 67 | Consul tries to bootstrap a new cluster in the background and the Consul agents on the MySQL pods also try to join this cluster. The status of the consul cluster could be checked with the following command: 68 | 69 | ```bash 70 | $ kubectl exec consul-0 -- consul members 71 | Node Address Status Type Build Protocol DC Segment 72 | consul-0 10.244.3.22:8301 alive server 1.9.0 2 dc1 73 | consul-1 10.244.1.28:8301 alive server 1.9.0 2 dc1 74 | consul-2 10.244.2.27:8301 alive server 1.9.0 2 dc1 75 | mysql-0 10.244.3.21:8301 alive client 1.8.4 2 dc1 76 | mysql-1 10.244.1.29:8301 alive client 1.8.4 2 dc1 77 | mysql-2 10.244.2.28:8301 alive client 1.8.4 2 dc1 78 | ``` 79 | 80 | The output shows that the deployment of the three Consul servers was successful. Three Consul servers are deployed, and from the MySQL installations, three agents joined the cluster. 81 | 82 | ### Step 3 - Check Deployment 83 | 84 | After the deployment is done, you can check which MySQL nodes are avaialable and which node is the replication leader: 85 | 86 | ```bash 87 | $ kubectl exec consul-0 -- consul kv get -recurse mcm/instances 88 | mcm/instances/10.244.1.29:{"ip_address": "10.244.1.29", "server_id": 2, "mysql_version": "8.0.21"} 89 | mcm/instances/10.244.2.28:{"ip_address": "10.244.2.28", "server_id": 3, "mysql_version": "8.0.21"} 90 | mcm/instances/10.244.3.21:{"ip_address": "10.244.3.21", "server_id": 1, "mysql_version": "8.0.21"} 91 | 92 | $ kubectl exec consul-0 -- consul kv get mcm/replication_leader 93 | {"ip_address": "10.244.3.21"} 94 | ``` 95 | 96 | In the logfiles of the pod, you can see which pod is the MySQL replication leader and which pods are the replication follower. Besides, it can be seen which backend MySQL server are added to ProxySQL: 97 | 98 | ```bash 99 | $ kubectl logs mysql-0 100 | [...] 101 | 2020-12-07 19:01:27,482 INFO root Setting up replication (leader=10.244.3.21) 102 | [...] 103 | 2020-12-07 19:02:47,501 INFO root MySQL backend has changed (old=['10.244.1.29', '10.244.3.21'], new=['10.244.1.29', '10.244.2.28', '10.244.3.21']), reconfiguring 104 | 2020-12-07 19:02:47,501 INFO root Removing all old backend MySQL Server 105 | 2020-12-07 19:02:47,503 INFO root Adding 10.244.1.29 as backend MySQL Server 106 | 2020-12-07 19:02:47,505 INFO root Adding 10.244.2.28 as backend MySQL Server 107 | 2020-12-07 19:02:47,506 INFO root Adding 10.244.3.21 as backend MySQL Server 108 | ``` 109 | 110 | In addition, you can list the available backups of the database: 111 | 112 | ```bash 113 | $ kubectl exec mysql-0 -- mc ls backup/mysqlbackup 114 | [2020-12-06 21:23:55 UTC] 1.6MiB mysql_backup_1607289823.6914027.tgz 115 | [2020-12-07 19:00:21 UTC] 1.6MiB mysql_backup_1607367611.8148804.tgz 116 | ``` 117 | 118 | You can use also your browser to check the Consul installation and the MinIO setup: 119 | 120 | * At the URL [http://Kubernetes-Node:30013](http://Kubernetes-Node:30013) is the MinIO webinterface available. Please use the value of the variables `MINIO_ACCESS_KEY` and `MINIO_SECRET_KEY` from the deployment description for the login. 121 | * At the URL [http://Kubernetes-Node:30014](http://Kubernetes-Node:30014) is the Consul webinterface available. 122 | 123 | ### Step 4 - Use the highly-available MySQL-Server 124 | 125 | On port `30015/tcp` on all Kubernetes nodes, you can now reach the highly-available MySQL-Server. As user use `MYSQL_APPLICATION_USER` and the `MYSQL_APPLICATION_PASSWORD` from the docker-swarm file. 126 | 127 | For example: 128 | 129 | ```bash 130 | mysql -u mysql_user -pmysql_secret -h -P30015 131 | ``` 132 | 133 | While you work on the MySQL-Shell you can restart the Kubernetes worker nodes. Kubernetes will restart the missing pods on other nodes and the MySQL orchestrator will reconfigure the replication setup in MySQL. The MySQL-Shell is usable all the time for read- and write-requests. 134 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [2020] [Jan Nidzwetzki] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /mysql_cluster_manager/src/mcm/consul.py: -------------------------------------------------------------------------------- 1 | """This file is part of the MySQL cluster manager""" 2 | 3 | import os 4 | import time 5 | import json 6 | import logging 7 | import threading 8 | import subprocess 9 | 10 | import consul as pyconsul 11 | 12 | from mcm.utils import Utils 13 | 14 | class Consul: 15 | 16 | """ 17 | This class encapsulates all Consul related things 18 | """ 19 | 20 | # The signeton instance 21 | __instance = None 22 | 23 | # Retry counter for operations 24 | retry_counter = 100 25 | 26 | # KV prefix 27 | kv_prefix = "mcm/" 28 | 29 | # Server ID key 30 | kv_server_id = kv_prefix + "server_id" 31 | 32 | # Instances ID key 33 | instances_path = kv_prefix + "instances/" 34 | 35 | # Instances session key 36 | instances_session_key = kv_prefix + "instances" 37 | 38 | # Replication leader path 39 | replication_leader_path = kv_prefix + "replication_leader" 40 | 41 | def __init__(self): 42 | """ 43 | Init the Consul client 44 | """ 45 | if Consul.__instance is not None: 46 | raise Exception("This class is a singleton!") 47 | 48 | Consul.__instance = self 49 | logging.info("Register Consul connection") 50 | self.client = pyconsul.Consul(host="localhost") 51 | self.active_sessions = [] 52 | self.node_health_session = self.create_node_health_session() 53 | 54 | # The session auto refresh thread 55 | self.auto_refresh_thread = None 56 | self.run_auto_refresh_thread = False 57 | 58 | @staticmethod 59 | def get_instance(): 60 | """ Static access method. """ 61 | if Consul.__instance is None: 62 | Consul() 63 | return Consul.__instance 64 | 65 | def start_session_auto_refresh_thread(self): 66 | """ 67 | Start the session auto refresh thread 68 | """ 69 | logging.info("Starting the Consul session auto refresh thread") 70 | self.run_auto_refresh_thread = True 71 | self.auto_refresh_thread = threading.Thread(target=self.auto_refresh_sessions, args=()) 72 | self.auto_refresh_thread.start() 73 | 74 | def auto_refresh_sessions(self): 75 | """ 76 | Auto refresh the active sessions 77 | """ 78 | while self.run_auto_refresh_thread: 79 | logging.debug("Refreshing active consul sessions from auto refresh thread") 80 | self.refresh_sessions() 81 | time.sleep(2) 82 | 83 | def stop_session_auto_refresh_thread(self): 84 | """ 85 | Stop the session auto refresh thread 86 | """ 87 | logging.info("Stopping the Consul session auto refresh thread") 88 | self.run_auto_refresh_thread = False 89 | if self.auto_refresh_thread is not None: 90 | self.auto_refresh_thread.join() 91 | self.auto_refresh_thread = None 92 | logging.info("Consul session auto refresh thread is stopped") 93 | 94 | def create_node_health_session(self): 95 | """ 96 | Create the node health session 97 | all created KV entries automatically removed 98 | on session destory. 99 | """ 100 | 101 | return self.create_session( 102 | name=Consul.instances_session_key, 103 | behavior='delete', ttl=15, lock_delay=0) 104 | 105 | def get_all_registered_nodes(self): 106 | """ 107 | Get all registered MySQL nodes 108 | """ 109 | mysql_nodes = [] 110 | result = self.client.kv.get(Consul.instances_path, recurse=True) 111 | 112 | if result[1] is not None: 113 | for node in result[1]: 114 | node_value = node['Value'] 115 | node_data = json.loads(node_value) 116 | 117 | if not "ip_address" in node_data: 118 | logging.error("ip_address missing in %s", node) 119 | continue 120 | 121 | ip_address = node_data["ip_address"] 122 | mysql_nodes.append(ip_address) 123 | 124 | return mysql_nodes 125 | 126 | def get_mysql_server_id(self): 127 | """ 128 | Get the MySQL server id from consul 129 | 130 | Try to get existing value and update to +1 131 | * If Update fails, retry 132 | * If Key not exists, try to create 133 | """ 134 | for _ in range(Consul.retry_counter): 135 | result = self.client.kv.get(Consul.kv_server_id) 136 | 137 | # Create new key 138 | if result[1] is None: 139 | logging.debug("Old serverkey %s not found, preparing new one", 140 | Consul.kv_server_id) 141 | 142 | json_string = json.dumps({'last_used_id': 1}) 143 | 144 | # Try to create 145 | put_result = self.client.kv.put(Consul.kv_server_id, json_string, cas=0) 146 | if put_result is True: 147 | logging.debug("Created new key, started new server counter") 148 | return 1 149 | 150 | logging.debug("New key could not be created, retrying") 151 | continue 152 | 153 | # Updating existing key 154 | logging.debug("Updating existing key %s", result) 155 | json_string = result[1]['Value'] 156 | version = result[1]['ModifyIndex'] 157 | server_data = json.loads(json_string) 158 | 159 | if not "last_used_id" in server_data: 160 | logging.error("Invalid JSON returned (missing last_used_id) %s", 161 | json_string) 162 | 163 | server_data['last_used_id'] = server_data['last_used_id'] + 1 164 | json_string = json.dumps(server_data) 165 | put_result = self.client.kv.put(Consul.kv_server_id, json_string, cas=version) 166 | 167 | if put_result is True: 168 | logging.debug("Successfully updated consul value %s, new server_id is %i", 169 | put_result, server_data['last_used_id']) 170 | return server_data['last_used_id'] 171 | 172 | logging.debug("Unable to update consul value, retrying %s", put_result) 173 | time.sleep(10) 174 | 175 | raise Exception("Unable to determine server id") 176 | 177 | def is_replication_leader(self): 178 | """ 179 | Test if this is the MySQL replication leader or not 180 | """ 181 | 182 | result = self.client.kv.get(Consul.replication_leader_path) 183 | 184 | if result[1] is None: 185 | logging.debug("No replication leader node available") 186 | return False 187 | 188 | leader_session = result[1]['Session'] 189 | 190 | logging.debug("Replication leader is %s, we are %s", 191 | leader_session, self.node_health_session) 192 | 193 | return leader_session == self.node_health_session 194 | 195 | def get_replication_leader_ip(self): 196 | """ 197 | Get the IP of the current replication ledear 198 | """ 199 | result = self.client.kv.get(Consul.replication_leader_path) 200 | 201 | if result[1] is None: 202 | return None 203 | 204 | json_string = result[1]['Value'] 205 | server_data = json.loads(json_string) 206 | 207 | if not "ip_address" in server_data: 208 | logging.error("Invalid JSON returned from replication ledader (missing server_id) %s", 209 | json_string) 210 | 211 | return server_data['ip_address'] 212 | 213 | def try_to_become_replication_leader(self): 214 | """ 215 | Try to get the new replication leader 216 | """ 217 | 218 | result = self.client.kv.get(Consul.replication_leader_path) 219 | 220 | if result[1] is None: 221 | logging.debug("Register MySQL instance in Consul") 222 | ip_address = Utils.get_local_ip_address() 223 | 224 | json_string = json.dumps({ 225 | 'ip_address': ip_address 226 | }) 227 | 228 | put_result = self.client.kv.put(Consul.replication_leader_path, 229 | json_string, 230 | acquire=self.node_health_session) 231 | 232 | if put_result: 233 | logging.info("We are the new replication leader") 234 | else: 235 | logging.debug("Unable to become replication leader, retry") 236 | 237 | return put_result 238 | 239 | return False 240 | 241 | 242 | def register_service(self, leader=False, port=3306): 243 | """ 244 | Register the MySQL primary service 245 | """ 246 | ip_address = Utils.get_local_ip_address() 247 | 248 | tags = [] 249 | service_id = f"mysql_{ip_address}" 250 | 251 | if leader: 252 | tags.append("leader") 253 | else: 254 | tags.append("follower") 255 | 256 | # Unrregister old service 257 | all_services = self.client.agent.services() 258 | 259 | if service_id in all_services: 260 | logging.debug("Unregister old service %s (%s)", service_id, all_services) 261 | self.client.agent.service.deregister(service_id) 262 | 263 | # Register new service 264 | logging.info("Register new service_id=%s, tags=%s", service_id, tags) 265 | self.client.agent.service.register("mysql", service_id=service_id, port=port, tags=tags) 266 | 267 | def register_node(self, mysql_version=None, server_id=None): 268 | """ 269 | Register the node in Consul 270 | """ 271 | logging.debug("Register MySQL instance in Consul") 272 | ip_address = Utils.get_local_ip_address() 273 | 274 | json_string = json.dumps({ 275 | 'ip_address': ip_address, 276 | 'server_id': server_id, 277 | 'mysql_version': mysql_version 278 | }) 279 | 280 | path = f"{Consul.instances_path}{ip_address}" 281 | logging.debug("Consul: Path %s, value %s (session %s)", 282 | path, json_string, self.node_health_session) 283 | 284 | put_result = self.client.kv.put(path, json_string, acquire=self.node_health_session) 285 | 286 | if not put_result: 287 | logging.error("Unable to create %s", path) 288 | return False 289 | 290 | return True 291 | 292 | def refresh_sessions(self): 293 | """ 294 | Refresh the active sessions 295 | """ 296 | logging.debug("Keeping Consul sessions alive") 297 | 298 | for session in self.active_sessions: 299 | logging.debug("Refreshing session %s", session) 300 | self.client.session.renew(session) 301 | 302 | def create_session(self, name, behavior='release', ttl=None, lock_delay=15): 303 | """ 304 | Create a new session. 305 | 306 | Keep in mind that the real invalidation is around 2*ttl 307 | see https://github.com/hashicorp/consul/issues/1172 308 | """ 309 | 310 | session_id = self.client.session.create(name=name, 311 | behavior=behavior, 312 | ttl=ttl, 313 | lock_delay=lock_delay) 314 | 315 | # Keep session for auto refresh 316 | self.active_sessions.append(session_id) 317 | 318 | logging.debug("Created new session on node %s named %s", name, session_id) 319 | 320 | return session_id 321 | 322 | 323 | def destroy_session(self, session_id): 324 | """ 325 | Destory a previosly registered session 326 | """ 327 | 328 | if not session_id in self.active_sessions: 329 | return False 330 | 331 | self.active_sessions.remove(session_id) 332 | self.client.session.destroy(session_id) 333 | 334 | return True 335 | 336 | @staticmethod 337 | def agent_start(): 338 | """ 339 | Start the local Consul agent. 340 | """ 341 | 342 | logging.info("Starting Consul Agent") 343 | consul_args = ["consul"] 344 | consul_args.append("agent") 345 | consul_args.append("--data-dir") 346 | consul_args.append("/tmp/consul") 347 | 348 | consul_interface = os.environ.get("CONSUL_BIND_INTERFACE") 349 | 350 | if consul_interface is not None: 351 | consul_args.append("--bind") 352 | consul_args.append(f'{{{{ GetInterfaceIP "{consul_interface}" }}}}') 353 | 354 | consul_seed = os.environ.get("CONSUL_BOOTSTRAP_SERVER") 355 | 356 | if consul_seed is not None: 357 | consul_args.append("--join") 358 | consul_args.append(consul_seed) 359 | 360 | # Run process in background 361 | consul_process = subprocess.Popen(consul_args) 362 | 363 | return consul_process 364 | -------------------------------------------------------------------------------- /mysql_cluster_manager/pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | 3 | # A comma-separated list of package or module names from where C extensions may 4 | # be loaded. Extensions are loading into the active Python interpreter and may 5 | # run arbitrary code. 6 | extension-pkg-whitelist=netifaces 7 | 8 | # Specify a score threshold to be exceeded before program exits with error. 9 | fail-under=10 10 | 11 | # Add files or directories to the blacklist. They should be base names, not 12 | # paths. 13 | ignore=CVS, compound 14 | 15 | # Add files or directories matching the regex patterns to the blacklist. The 16 | # regex matches against base names, not paths. 17 | ignore-patterns= 18 | 19 | # Python code to execute, usually for sys.path manipulation such as 20 | # pygtk.require(). 21 | #init-hook= 22 | 23 | # Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the 24 | # number of processors available to use. 25 | jobs=0 26 | 27 | # Control the amount of potential inferred values when inferring a single 28 | # object. This can help the performance when dealing with large functions or 29 | # complex, nested conditions. 30 | limit-inference-results=100 31 | 32 | # List of plugins (as comma separated values of python module names) to load, 33 | # usually to register additional checkers. 34 | load-plugins= 35 | 36 | # Pickle collected data for later comparisons. 37 | persistent=yes 38 | 39 | # When enabled, pylint would attempt to guess common misconfiguration and emit 40 | # user-friendly hints instead of false-positive error messages. 41 | suggestion-mode=yes 42 | 43 | # Allow loading of arbitrary C extensions. Extensions are imported into the 44 | # active Python interpreter and may run arbitrary code. 45 | unsafe-load-any-extension=no 46 | 47 | 48 | [MESSAGES CONTROL] 49 | 50 | # Only show warnings with the listed confidence levels. Leave empty to show 51 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED. 52 | confidence= 53 | 54 | # Disable the message, report, category or checker with the given id(s). You 55 | # can either give multiple identifiers separated by comma (,) or put this 56 | # option multiple times (only on the command line, not in the configuration 57 | # file where it should appear only once). You can also use "--disable=all" to 58 | # disable everything first and then reenable specific checks. For example, if 59 | # you want to run only the similarities checker, you can use "--disable=all 60 | # --enable=similarities". If you want to run only the classes checker, but have 61 | # no Warning level messages displayed, use "--disable=all --enable=classes 62 | # --disable=W". 63 | disable= 64 | fixme, 65 | too-many-return-statements, 66 | too-many-instance-attributes, 67 | too-many-locals, 68 | too-many-arguments, 69 | too-many-public-methods, 70 | too-few-public-methods 71 | 72 | # Enable the message, report, category or checker with the given id(s). You can 73 | # either give multiple identifier separated by comma (,) or put this option 74 | # multiple time (only on the command line, not in the configuration file where 75 | # it should appear only once). See also the "--disable" option for examples. 76 | enable= 77 | 78 | 79 | [REPORTS] 80 | 81 | # Python expression which should return a score less than or equal to 10. You 82 | # have access to the variables 'error', 'warning', 'refactor', and 'convention' 83 | # which contain the number of messages in each category, as well as 'statement' 84 | # which is the total number of statements analyzed. This score is used by the 85 | # global evaluation report (RP0004). 86 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) 87 | 88 | # Template used to display messages. This is a python new-style format string 89 | # used to format the message information. See doc for all details. 90 | #msg-template= 91 | 92 | # Set the output format. Available formats are text, parseable, colorized, json 93 | # and msvs (visual studio). You can also give a reporter class, e.g. 94 | # mypackage.mymodule.MyReporterClass. 95 | output-format=text 96 | 97 | # Tells whether to display a full report or only the messages. 98 | reports=no 99 | 100 | # Activate the evaluation score. 101 | score=yes 102 | 103 | 104 | [REFACTORING] 105 | 106 | # Maximum number of nested blocks for function / method body 107 | max-nested-blocks=5 108 | 109 | # Complete name of functions that never returns. When checking for 110 | # inconsistent-return-statements if a never returning function is called then 111 | # it will be considered as an explicit return statement and no message will be 112 | # printed. 113 | never-returning-functions=sys.exit 114 | 115 | 116 | [MISCELLANEOUS] 117 | 118 | # List of note tags to take in consideration, separated by a comma. 119 | notes=FIXME, 120 | XXX, 121 | TODO 122 | 123 | # Regular expression of note tags to take in consideration. 124 | #notes-rgx= 125 | 126 | 127 | [TYPECHECK] 128 | 129 | # List of decorators that produce context managers, such as 130 | # contextlib.contextmanager. Add to this list to register other decorators that 131 | # produce valid context managers. 132 | contextmanager-decorators=contextlib.contextmanager 133 | 134 | # List of members which are set dynamically and missed by pylint inference 135 | # system, and so shouldn't trigger E1101 when accessed. Python regular 136 | # expressions are accepted. 137 | generated-members= 138 | 139 | # Tells whether missing members accessed in mixin class should be ignored. A 140 | # mixin class is detected if its name ends with "mixin" (case insensitive). 141 | ignore-mixin-members=yes 142 | 143 | # Tells whether to warn about missing members when the owner of the attribute 144 | # is inferred to be None. 145 | ignore-none=yes 146 | 147 | # This flag controls whether pylint should warn about no-member and similar 148 | # checks whenever an opaque object is returned when inferring. The inference 149 | # can return multiple potential results while evaluating a Python object, but 150 | # some branches might not be evaluated, which results in partial inference. In 151 | # that case, it might be useful to still emit no-member and other checks for 152 | # the rest of the inferred objects. 153 | ignore-on-opaque-inference=yes 154 | 155 | # List of class names for which member attributes should not be checked (useful 156 | # for classes with dynamically set attributes). This supports the use of 157 | # qualified names. 158 | ignored-classes=optparse.Values,thread._local,_thread._local 159 | 160 | # List of module names for which member attributes should not be checked 161 | # (useful for modules/projects where namespaces are manipulated during runtime 162 | # and thus existing member attributes cannot be deduced by static analysis). It 163 | # supports qualified module names, as well as Unix pattern matching. 164 | ignored-modules= 165 | 166 | # Show a hint with possible names when a member name was not found. The aspect 167 | # of finding the hint is based on edit distance. 168 | missing-member-hint=yes 169 | 170 | # The minimum edit distance a name should have in order to be considered a 171 | # similar match for a missing member name. 172 | missing-member-hint-distance=1 173 | 174 | # The total number of similar names that should be taken in consideration when 175 | # showing a hint for a missing member. 176 | missing-member-max-choices=1 177 | 178 | # List of decorators that change the signature of a decorated function. 179 | signature-mutators= 180 | 181 | 182 | [FORMAT] 183 | 184 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF. 185 | expected-line-ending-format= 186 | 187 | # Regexp for a line that is allowed to be longer than the limit. 188 | ignore-long-lines=^\s*(# )??$ 189 | 190 | # Number of spaces of indent required inside a hanging or continued line. 191 | indent-after-paren=4 192 | 193 | # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 194 | # tab). 195 | indent-string=' ' 196 | 197 | # Maximum number of characters on a single line. 198 | max-line-length=250 199 | 200 | # Maximum number of lines in a module. 201 | max-module-lines=1000 202 | 203 | # List of optional constructs for which whitespace checking is disabled. `dict- 204 | # separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}. 205 | # `trailing-comma` allows a space between comma and closing bracket: (a, ). 206 | # `empty-line` allows space-only lines. 207 | no-space-check=trailing-comma, 208 | dict-separator 209 | 210 | # Allow the body of a class to be on the same line as the declaration if body 211 | # contains single statement. 212 | single-line-class-stmt=no 213 | 214 | # Allow the body of an if to be on the same line as the test if there is no 215 | # else. 216 | single-line-if-stmt=no 217 | 218 | 219 | [BASIC] 220 | 221 | # Naming style matching correct argument names. 222 | argument-naming-style=snake_case 223 | 224 | # Regular expression matching correct argument names. Overrides argument- 225 | # naming-style. 226 | #argument-rgx= 227 | 228 | # Naming style matching correct attribute names. 229 | attr-naming-style=snake_case 230 | 231 | # Regular expression matching correct attribute names. Overrides attr-naming- 232 | # style. 233 | #attr-rgx= 234 | 235 | # Bad variable names which should always be refused, separated by a comma. 236 | bad-names=foo, 237 | bar, 238 | baz, 239 | toto, 240 | tutu, 241 | tata 242 | 243 | # Bad variable names regexes, separated by a comma. If names match any regex, 244 | # they will always be refused 245 | bad-names-rgxs= 246 | 247 | # Naming style matching correct class attribute names. 248 | class-attribute-naming-style=any 249 | 250 | # Regular expression matching correct class attribute names. Overrides class- 251 | # attribute-naming-style. 252 | #class-attribute-rgx= 253 | 254 | # Naming style matching correct class names. 255 | class-naming-style=PascalCase 256 | 257 | # Regular expression matching correct class names. Overrides class-naming- 258 | # style. 259 | #class-rgx= 260 | 261 | # Naming style matching correct constant names. 262 | const-naming-style=UPPER_CASE 263 | 264 | # Regular expression matching correct constant names. Overrides const-naming- 265 | # style. 266 | #const-rgx= 267 | 268 | # Minimum line length for functions/classes that require docstrings, shorter 269 | # ones are exempt. 270 | docstring-min-length=-1 271 | 272 | # Naming style matching correct function names. 273 | function-naming-style=snake_case 274 | 275 | # Regular expression matching correct function names. Overrides function- 276 | # naming-style. 277 | #function-rgx= 278 | 279 | # Good variable names which should always be accepted, separated by a comma. 280 | good-names=i, 281 | j, 282 | k, 283 | ex, 284 | Run, 285 | _ 286 | 287 | # Good variable names regexes, separated by a comma. If names match any regex, 288 | # they will always be accepted 289 | good-names-rgxs= 290 | 291 | # Include a hint for the correct naming format with invalid-name. 292 | include-naming-hint=no 293 | 294 | # Naming style matching correct inline iteration names. 295 | inlinevar-naming-style=any 296 | 297 | # Regular expression matching correct inline iteration names. Overrides 298 | # inlinevar-naming-style. 299 | #inlinevar-rgx= 300 | 301 | # Naming style matching correct method names. 302 | method-naming-style=snake_case 303 | 304 | # Regular expression matching correct method names. Overrides method-naming- 305 | # style. 306 | #method-rgx= 307 | 308 | # Naming style matching correct module names. 309 | module-naming-style=snake_case 310 | 311 | # Regular expression matching correct module names. Overrides module-naming- 312 | # style. 313 | #module-rgx= 314 | 315 | # Colon-delimited sets of names that determine each other's naming style when 316 | # the name regexes allow several styles. 317 | name-group= 318 | 319 | # Regular expression which should only match function or class names that do 320 | # not require a docstring. 321 | no-docstring-rgx=^_ 322 | 323 | # List of decorators that produce properties, such as abc.abstractproperty. Add 324 | # to this list to register other decorators that produce valid properties. 325 | # These decorators are taken in consideration only for invalid-name. 326 | property-classes=abc.abstractproperty 327 | 328 | # Naming style matching correct variable names. 329 | variable-naming-style=snake_case 330 | 331 | # Regular expression matching correct variable names. Overrides variable- 332 | # naming-style. 333 | #variable-rgx= 334 | 335 | 336 | [SIMILARITIES] 337 | 338 | # Ignore comments when computing similarities. 339 | ignore-comments=yes 340 | 341 | # Ignore docstrings when computing similarities. 342 | ignore-docstrings=yes 343 | 344 | # Ignore imports when computing similarities. 345 | ignore-imports=no 346 | 347 | # Minimum lines number of a similarity. 348 | min-similarity-lines=4 349 | 350 | 351 | [STRING] 352 | 353 | # This flag controls whether inconsistent-quotes generates a warning when the 354 | # character used as a quote delimiter is used inconsistently within a module. 355 | check-quote-consistency=no 356 | 357 | # This flag controls whether the implicit-str-concat should generate a warning 358 | # on implicit string concatenation in sequences defined over several lines. 359 | check-str-concat-over-line-jumps=no 360 | 361 | 362 | [VARIABLES] 363 | 364 | # List of additional names supposed to be defined in builtins. Remember that 365 | # you should avoid defining new builtins when possible. 366 | additional-builtins= 367 | 368 | # Tells whether unused global variables should be treated as a violation. 369 | allow-global-unused-variables=yes 370 | 371 | # List of strings which can identify a callback function by name. A callback 372 | # name must start or end with one of those strings. 373 | callbacks=cb_, 374 | _cb 375 | 376 | # A regular expression matching the name of dummy variables (i.e. expected to 377 | # not be used). 378 | dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_ 379 | 380 | # Argument names that match this expression will be ignored. Default to name 381 | # with leading underscore. 382 | ignored-argument-names=_.*|^ignored_|^unused_ 383 | 384 | # Tells whether we should check for unused import in __init__ files. 385 | init-import=no 386 | 387 | # List of qualified module names which can have objects that can redefine 388 | # builtins. 389 | redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io 390 | 391 | 392 | [LOGGING] 393 | 394 | # The type of string formatting that logging methods do. `old` means using % 395 | # formatting, `new` is for `{}` formatting. 396 | logging-format-style=old 397 | 398 | # Logging modules to check that the string format arguments are in logging 399 | # function parameter format. 400 | logging-modules=logging 401 | 402 | 403 | [SPELLING] 404 | 405 | # Limits count of emitted suggestions for spelling mistakes. 406 | max-spelling-suggestions=4 407 | 408 | # Spelling dictionary name. Available dictionaries: none. To make it work, 409 | # install the python-enchant package. 410 | spelling-dict= 411 | 412 | # List of comma separated words that should not be checked. 413 | spelling-ignore-words= 414 | 415 | # A path to a file that contains the private dictionary; one word per line. 416 | spelling-private-dict-file= 417 | 418 | # Tells whether to store unknown words to the private dictionary (see the 419 | # --spelling-private-dict-file option) instead of raising a message. 420 | spelling-store-unknown-words=no 421 | 422 | 423 | [CLASSES] 424 | 425 | # List of method names used to declare (i.e. assign) instance attributes. 426 | defining-attr-methods=__init__, 427 | __new__, 428 | setUp, 429 | __post_init__ 430 | 431 | # List of member names, which should be excluded from the protected access 432 | # warning. 433 | exclude-protected=_asdict, 434 | _fields, 435 | _replace, 436 | _source, 437 | _make 438 | 439 | # List of valid names for the first argument in a class method. 440 | valid-classmethod-first-arg=cls 441 | 442 | # List of valid names for the first argument in a metaclass class method. 443 | valid-metaclass-classmethod-first-arg=cls 444 | 445 | 446 | [IMPORTS] 447 | 448 | # List of modules that can be imported at any level, not just the top level 449 | # one. 450 | allow-any-import-level= 451 | 452 | # Allow wildcard imports from modules that define __all__. 453 | allow-wildcard-with-all=no 454 | 455 | # Analyse import fallback blocks. This can be used to support both Python 2 and 456 | # 3 compatible code, which means that the block might have code that exists 457 | # only in one or another interpreter, leading to false positives when analysed. 458 | analyse-fallback-blocks=no 459 | 460 | # Deprecated modules which should not be used, separated by a comma. 461 | deprecated-modules=optparse,tkinter.tix 462 | 463 | # Create a graph of external dependencies in the given file (report RP0402 must 464 | # not be disabled). 465 | ext-import-graph= 466 | 467 | # Create a graph of every (i.e. internal and external) dependencies in the 468 | # given file (report RP0402 must not be disabled). 469 | import-graph= 470 | 471 | # Create a graph of internal dependencies in the given file (report RP0402 must 472 | # not be disabled). 473 | int-import-graph= 474 | 475 | # Force import order to recognize a module as part of the standard 476 | # compatibility libraries. 477 | known-standard-library= 478 | 479 | # Force import order to recognize a module as part of a third party library. 480 | known-third-party=enchant 481 | 482 | # Couples of modules and preferred modules, separated by a comma. 483 | preferred-modules= 484 | 485 | 486 | [DESIGN] 487 | 488 | # Maximum number of arguments for function / method. 489 | max-args=5 490 | 491 | # Maximum number of attributes for a class (see R0902). 492 | max-attributes=7 493 | 494 | # Maximum number of boolean expressions in an if statement (see R0916). 495 | max-bool-expr=5 496 | 497 | # Maximum number of branch for function / method body. 498 | max-branches=20 499 | 500 | # Maximum number of locals for function / method body. 501 | max-locals=15 502 | 503 | # Maximum number of parents for a class (see R0901). 504 | max-parents=7 505 | 506 | # Maximum number of public methods for a class (see R0904). 507 | max-public-methods=20 508 | 509 | # Maximum number of return / yield for function / method body. 510 | max-returns=6 511 | 512 | # Maximum number of statements in function / method body. 513 | max-statements=60 514 | 515 | # Minimum number of public methods for a class (see R0903). 516 | min-public-methods=2 517 | 518 | 519 | [EXCEPTIONS] 520 | 521 | # Exceptions that will emit a warning when being caught. Defaults to 522 | # "BaseException, Exception". 523 | overgeneral-exceptions=BaseException, 524 | Exception 525 | -------------------------------------------------------------------------------- /mysql_cluster_manager/src/mcm/mysql.py: -------------------------------------------------------------------------------- 1 | """This file is part of the MySQL cluster manager""" 2 | 3 | import os 4 | import sys 5 | import time 6 | import shutil 7 | import logging 8 | import threading 9 | import subprocess 10 | 11 | from shutil import rmtree 12 | from datetime import timedelta 13 | 14 | import mysql.connector 15 | 16 | from mcm.consul import Consul 17 | from mcm.minio import Minio 18 | from mcm.utils import Utils 19 | 20 | class Mysql: 21 | 22 | """ 23 | This class encapsulates all MySQL related things 24 | """ 25 | 26 | xtrabackup_binary = "/usr/bin/xtrabackup" 27 | mysql_server_binary = "/usr/bin/mysqld_safe" 28 | mysqld_binary = "/usr/sbin/mysqld" 29 | mysql_datadir = "/var/lib/mysql" 30 | 31 | @staticmethod 32 | def init_database_if_needed(): 33 | """ 34 | Init a MySQL and configure permissions. 35 | """ 36 | 37 | logging.info("Init MySQL database directory") 38 | 39 | if os.path.isfile(f"{Mysql.mysql_datadir}/ib_logfile0"): 40 | logging.info("MySQL is already initialized, skipping") 41 | return False 42 | 43 | mysql_init = [Mysql.mysqld_binary, "--initialize-insecure", "--user=mysql"] 44 | 45 | subprocess.run(mysql_init, check=True) 46 | 47 | # Start server the first time 48 | mysql_process = Mysql.server_start(use_root_password=False) 49 | 50 | # Create application user 51 | logging.debug("Creating MySQL user for the application") 52 | application_user = os.environ.get("MYSQL_APPLICATION_USER") 53 | appication_password = os.environ.get("MYSQL_APPLICATION_PASSWORD") 54 | 55 | # Password needs to be mysql_native_password for ProxySQL 56 | # See https://github.com/sysown/proxysql/issues/2580 57 | Mysql.execute_statement_or_exit(f"CREATE USER '{application_user}'@'localhost' " 58 | f"IDENTIFIED WITH mysql_native_password BY '{appication_password}'") 59 | Mysql.execute_statement_or_exit(f"GRANT ALL PRIVILEGES ON *.* TO '{application_user}'@'localhost'") 60 | Mysql.execute_statement_or_exit(f"CREATE USER '{application_user}'@'%' " 61 | f"IDENTIFIED WITH mysql_native_password BY '{appication_password}'") 62 | Mysql.execute_statement_or_exit(f"GRANT ALL PRIVILEGES ON *.* TO '{application_user}'@'%'") 63 | 64 | # Create backup user 65 | logging.debug("Creating MySQL user for backups") 66 | backup_user = os.environ.get("MYSQL_BACKUP_USER") 67 | backup_password = os.environ.get("MYSQL_BACKUP_PASSWORD") 68 | Mysql.execute_statement_or_exit(f"CREATE USER '{backup_user}'@'localhost' " 69 | f"IDENTIFIED BY '{backup_password}'") 70 | Mysql.execute_statement_or_exit("GRANT BACKUP_ADMIN, PROCESS, RELOAD, LOCK TABLES, " 71 | f"REPLICATION CLIENT ON *.* TO '{backup_user}'@'localhost'") 72 | Mysql.execute_statement_or_exit("GRANT SELECT ON performance_schema.log_status TO " 73 | f"'{backup_user}'@'localhost'") 74 | 75 | # Create replication user 76 | logging.debug("Creating replication user") 77 | replication_user = os.environ.get("MYSQL_REPLICATION_USER") 78 | replication_password = os.environ.get("MYSQL_REPLICATION_PASSWORD") 79 | Mysql.execute_statement_or_exit(f"CREATE USER '{replication_user}'@'%' " 80 | f"IDENTIFIED BY '{replication_password}'") 81 | Mysql.execute_statement_or_exit("GRANT REPLICATION SLAVE ON *.* TO " 82 | f"'{replication_user}'@'%'") 83 | 84 | # Change permissions for the root user 85 | logging.debug("Set permissions for the root user") 86 | root_password = os.environ.get("MYSQL_ROOT_PASSWORD") 87 | Mysql.execute_statement_or_exit(f"CREATE USER 'root'@'%' IDENTIFIED BY '{root_password}'") 88 | Mysql.execute_statement_or_exit("GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' " 89 | "WITH GRANT OPTION") 90 | Mysql.execute_statement_or_exit("ALTER USER 'root'@'localhost' " 91 | f"IDENTIFIED BY '{root_password}'") 92 | 93 | # Shutdown MySQL server 94 | logging.debug("Inital MySQL setup done, shutdown server..") 95 | Mysql.execute_statement_or_exit(sql="SHUTDOWN", username="root", password=root_password) 96 | mysql_process.wait() 97 | 98 | return True 99 | 100 | @staticmethod 101 | def build_configuration(): 102 | """ 103 | Build the MySQL server configuratuion. 104 | """ 105 | consul = Consul.get_instance() 106 | server_id = consul.get_mysql_server_id() 107 | 108 | outfile = open("/etc/mysql/conf.d/zz_cluster.cnf", 'w') 109 | outfile.write("# DO NOT EDIT - This file was generated automatically\n") 110 | outfile.write("[mysqld]\n") 111 | outfile.write(f"server_id={server_id}\n") 112 | outfile.write("gtid_mode=ON\n") 113 | outfile.write("enforce-gtid-consistency=ON\n") 114 | outfile.close() 115 | 116 | @staticmethod 117 | def change_to_replication_client(leader_ip): 118 | """ 119 | Make the local MySQL installation to a replication follower 120 | """ 121 | 122 | logging.info("Setting up replication (leader=%s)", leader_ip) 123 | 124 | replication_user = os.environ.get("MYSQL_REPLICATION_USER") 125 | replication_password = os.environ.get("MYSQL_REPLICATION_PASSWORD") 126 | 127 | Mysql.execute_query_as_root("STOP SLAVE", discard_result=True) 128 | 129 | Mysql.execute_query_as_root(f"CHANGE MASTER TO MASTER_HOST = '{leader_ip}', " 130 | f"MASTER_PORT = 3306, MASTER_USER = '{replication_user}', " 131 | f"MASTER_PASSWORD = '{replication_password}', " 132 | "MASTER_AUTO_POSITION = 1, GET_MASTER_PUBLIC_KEY = 1" 133 | , discard_result=True) 134 | 135 | Mysql.execute_query_as_root("START SLAVE", discard_result=True) 136 | 137 | # Set replicia to read only 138 | logging.info("Set MySQL-Server mode to read-only") 139 | Mysql.execute_query_as_root("SET GLOBAL read_only = 1", discard_result=True) 140 | Mysql.execute_query_as_root("SET GLOBAL super_read_only = 1", discard_result=True) 141 | 142 | @staticmethod 143 | def delete_replication_config(): 144 | """ 145 | Stop the replication 146 | """ 147 | logging.debug("Removing old replication configuraion") 148 | Mysql.execute_query_as_root("STOP SLAVE", discard_result=True) 149 | Mysql.execute_query_as_root("RESET SLAVE ALL", discard_result=True) 150 | 151 | # Accept writes 152 | logging.info("Set MySQL-Server mode to read-write") 153 | Mysql.execute_query_as_root("SET GLOBAL super_read_only = 0", discard_result=True) 154 | Mysql.execute_query_as_root("SET GLOBAL read_only = 0", discard_result=True) 155 | 156 | @staticmethod 157 | def get_replication_leader_ip(): 158 | """ 159 | Get the current replication leader ip 160 | """ 161 | slave_status = Mysql.execute_query_as_root("SHOW SLAVE STATUS") 162 | 163 | if len(slave_status) != 1: 164 | return None 165 | 166 | if not 'Master_Host' in slave_status[0]: 167 | logging.error("Invalid output, master_host not found %s", slave_status) 168 | return None 169 | 170 | return slave_status[0]['Master_Host'] 171 | 172 | @staticmethod 173 | def is_repliation_data_processed(): 174 | """ 175 | Is the repliation log from the master completely processed 176 | """ 177 | 178 | slave_status = Mysql.execute_query_as_root("SHOW SLAVE STATUS") 179 | 180 | if len(slave_status) != 1: 181 | return False 182 | 183 | if not 'Slave_IO_State' in slave_status[0]: 184 | logging.error("Invalid output, Slave_IO_State not found %s", slave_status) 185 | return False 186 | 187 | # Leader is sending data 188 | io_state = slave_status[0]['Slave_IO_State'] 189 | logging.debug("Follower IO state is '%s'", io_state) 190 | if io_state != "Waiting for master to send event": 191 | return False 192 | 193 | if not 'Slave_SQL_Running_State' in slave_status[0]: 194 | logging.error("Invalid output, Slave_SQL_Running_State not found %s", slave_status) 195 | return False 196 | 197 | # Data is not completely proessed 198 | sql_state = slave_status[0]['Slave_SQL_Running_State'] 199 | logging.debug("Follower SQL state is '%s'", sql_state) 200 | if sql_state != "Slave has read all relay log; waiting for more updates": 201 | return False 202 | 203 | return True 204 | 205 | @staticmethod 206 | def server_start(use_root_password=True): 207 | """ 208 | Start the MySQL server and wait for ready to serve connections. 209 | """ 210 | 211 | logging.info("Starting MySQL") 212 | 213 | Mysql.build_configuration() 214 | 215 | mysql_server = [Mysql.mysql_server_binary, "--user=mysql"] 216 | mysql_process = subprocess.Popen(mysql_server) 217 | 218 | # Use root password for the connection or not 219 | root_password = None 220 | if use_root_password: 221 | root_password = os.environ.get("MYSQL_ROOT_PASSWORD") 222 | 223 | Mysql.wait_for_connection(password=root_password) 224 | 225 | return mysql_process 226 | 227 | @staticmethod 228 | def server_stop(): 229 | """ 230 | Stop the MySQL server. 231 | """ 232 | logging.info("Stopping MySQL Server") 233 | 234 | # Try to shutdown the server without a password 235 | result = Mysql.execute_statement(sql="SHUTDOWN", log_error=False) 236 | 237 | # Try to shutdown the server using the root password 238 | if not result: 239 | root_password = os.environ.get("MYSQL_ROOT_PASSWORD") 240 | Mysql.execute_statement(sql="SHUTDOWN", password=root_password) 241 | 242 | @staticmethod 243 | def execute_query_as_root(sql, database='mysql', discard_result=False): 244 | """ 245 | Execute the SQL query and return result. 246 | """ 247 | 248 | root_password = os.environ.get("MYSQL_ROOT_PASSWORD") 249 | 250 | cnx = None 251 | 252 | try: 253 | cnx = mysql.connector.connect(user='root', password=root_password, 254 | database=database, 255 | unix_socket='/var/run/mysqld/mysqld.sock') 256 | 257 | 258 | cur = cnx.cursor(dictionary=True, buffered=True) 259 | cur.execute(sql) 260 | 261 | if discard_result: 262 | return None 263 | 264 | return cur.fetchall() 265 | finally: 266 | if cnx: 267 | cnx.close() 268 | 269 | @staticmethod 270 | def wait_for_connection(timeout=120, username='root', 271 | password=None, database='mysql'): 272 | 273 | """ 274 | Test connection via unix-socket. During first init 275 | MySQL start without network access. 276 | """ 277 | elapsed_time = 0 278 | last_error = None 279 | 280 | while elapsed_time < timeout: 281 | try: 282 | cnx = mysql.connector.connect(user=username, password=password, 283 | database=database, 284 | unix_socket='/var/run/mysqld/mysqld.sock') 285 | cnx.close() 286 | logging.debug("MySQL connection successfully") 287 | return True 288 | except mysql.connector.Error as err: 289 | time.sleep(1) 290 | elapsed_time = elapsed_time + 1 291 | last_error = err 292 | 293 | logging.error("Unable to connect to MySQL (timeout=%i). %s", 294 | elapsed_time, last_error) 295 | sys.exit(1) 296 | 297 | return False 298 | 299 | @staticmethod 300 | def execute_statement_or_exit(sql=None, username='root', 301 | password=None, database='mysql', 302 | port=None): 303 | 304 | """ 305 | Execute the given SQL statement. 306 | """ 307 | result = Mysql.execute_statement(sql=sql, username=username, port=port, 308 | password=password, database=database) 309 | if not result: 310 | sys.exit(1) 311 | 312 | @staticmethod 313 | def execute_statement(sql=None, username='root', 314 | password=None, database='mysql', 315 | port=None, log_error=True): 316 | """ 317 | Execute the given SQL statement. 318 | """ 319 | try: 320 | if port is None: 321 | cnx = mysql.connector.connect(user=username, password=password, 322 | database=database, 323 | unix_socket='/var/run/mysqld/mysqld.sock') 324 | 325 | else: 326 | cnx = mysql.connector.connect(user=username, password=password, 327 | database=database, port=port) 328 | 329 | cursor = cnx.cursor() 330 | 331 | cursor.execute(sql) 332 | 333 | cnx.close() 334 | return True 335 | except mysql.connector.Error as err: 336 | if log_error: 337 | logging.error("Failed to execute SQL: %s", err) 338 | return False 339 | 340 | @staticmethod 341 | def backup_data(): 342 | """ 343 | Backup the local MySQL Server and upload 344 | the backup into a S3 bucket. 345 | """ 346 | 347 | # Call Setup to ensure bucket and policies do exist 348 | Minio.setup_connection() 349 | 350 | # Backup directory 351 | current_time = time.time() 352 | backup_dir = f"/tmp/mysql_backup_{current_time}" 353 | backup_folder_name = "mysql" 354 | backup_dest = f"{backup_dir}/{backup_folder_name}" 355 | 356 | logging.info("Backing up MySQL into dir %s", backup_dest) 357 | if os.path.exists(backup_dir): 358 | logging.error("Backup path %s already exists, skipping backup run", backup_dest) 359 | 360 | # Crate backup dir 361 | os.makedirs(backup_dir) 362 | 363 | # Create mysql backup 364 | backup_user = os.environ.get("MYSQL_BACKUP_USER") 365 | backup_password = os.environ.get("MYSQL_BACKUP_PASSWORD") 366 | xtrabackup = [Mysql.xtrabackup_binary, f"--user={backup_user}", 367 | f"--password={backup_password}", "--backup", 368 | f"--target-dir={backup_dest}"] 369 | 370 | subprocess.run(xtrabackup, check=True) 371 | 372 | # Prepare backup 373 | xtrabackup_prepare = [Mysql.xtrabackup_binary, "--prepare", 374 | f"--target-dir={backup_dest}"] 375 | 376 | subprocess.run(xtrabackup_prepare, check=True) 377 | 378 | # Compress backup (structure in tar mysql/*) 379 | backup_file = f"/tmp/mysql_backup_{current_time}.tgz" 380 | tar = ["/bin/tar", "zcf", backup_file, "-C", backup_dir, backup_folder_name] 381 | subprocess.run(tar, check=True) 382 | 383 | # Upload Backup to S3 Bucket 384 | mc_args = [Minio.minio_binary, "cp", backup_file, "backup/mysqlbackup/"] 385 | subprocess.run(mc_args, check=True) 386 | 387 | # Remove old backup data 388 | rmtree(backup_dir) 389 | os.remove(backup_file) 390 | 391 | logging.info("Backup was successfully created") 392 | 393 | @staticmethod 394 | def create_backup_if_needed(maxage_seconds=60*60*6): 395 | """ 396 | Create a new backup if needed. Default age is 6h 397 | """ 398 | logging.debug("Checking for backups") 399 | 400 | consul_client = Consul.get_instance() 401 | if not consul_client.is_replication_leader(): 402 | logging.debug("We are not the replication master, skipping backup check") 403 | return False 404 | 405 | backup_name, backup_date = Minio.get_latest_backup() 406 | 407 | if Utils.is_refresh_needed(backup_date, timedelta(seconds=maxage_seconds)): 408 | logging.info("Old backup is outdated (%s, %s), creating new one", 409 | backup_name, backup_date) 410 | 411 | # Perform backup in extra thread to prevent Consul loop interruption 412 | backup_thread = threading.Thread(target=Mysql.backup_data) 413 | backup_thread.start() 414 | 415 | return True 416 | 417 | return False 418 | 419 | @staticmethod 420 | def restore_backup(): 421 | """ 422 | Restore the latest MySQL dump from the S3 Bucket 423 | """ 424 | logging.info("Restore MySQL Backup") 425 | current_time = time.time() 426 | 427 | if os.path.isfile(f"{Mysql.mysql_datadir}/ib_logfile0"): 428 | logging.info("MySQL is already initialized, cleaning up first") 429 | old_mysql_dir = f"{Mysql.mysql_datadir}_old_{current_time}" 430 | 431 | os.mkdir(old_mysql_dir, 0o700) 432 | 433 | # Renaming file per file, on some docker images 434 | # the complete directory can not be moved 435 | for entry in os.listdir(Mysql.mysql_datadir): 436 | source_name = f"{Mysql.mysql_datadir}/{entry}" 437 | dest_name = f"{old_mysql_dir}/{entry}" 438 | logging.debug("Moving %s to %s", source_name, dest_name) 439 | shutil.move(source_name, dest_name) 440 | 441 | logging.info("Old MySQL data moved to: %s", old_mysql_dir) 442 | 443 | 444 | backup_file, _ = Minio.get_latest_backup() 445 | 446 | if backup_file is None: 447 | logging.error("Unable to restore backup, no backup found in bucket") 448 | return False 449 | 450 | # Restore directory 451 | restore_dir = f"/tmp/mysql_restore_{current_time}" 452 | 453 | # Crate restore dir 454 | os.makedirs(restore_dir) 455 | 456 | # Download backup 457 | mc_download = [Minio.minio_binary, "cp", f"backup/mysqlbackup/{backup_file}", 458 | restore_dir] 459 | subprocess.run(mc_download, check=True) 460 | 461 | # Unpack backup 462 | tar = ["/bin/tar", "zxf", f"{restore_dir}/{backup_file}", "-C", restore_dir] 463 | subprocess.run(tar, check=True) 464 | 465 | # Ensure that this is a MySQL Backup 466 | if not os.path.isfile(f"{restore_dir}/mysql/ib_logfile0"): 467 | logging.error("Unpacked backup is not a MySQL backup") 468 | rmtree(restore_dir) 469 | return False 470 | 471 | # Restore backup 472 | xtrabackup = [Mysql.xtrabackup_binary, "--copy-back", 473 | f"--target-dir={restore_dir}/mysql"] 474 | subprocess.run(xtrabackup, check=True) 475 | 476 | # Change permissions of the restored data 477 | chown = ['chown', 'mysql.mysql', '-R', '/var/lib/mysql/'] 478 | subprocess.run(chown, check=True) 479 | 480 | # Remove old backup data 481 | rmtree(restore_dir) 482 | return True 483 | 484 | 485 | @staticmethod 486 | def restore_backup_or_exit(): 487 | """ 488 | Restore a backup or exit 489 | """ 490 | 491 | result = Mysql.restore_backup() 492 | 493 | if not result: 494 | logging.error("Unable to restore MySQL backup") 495 | sys.exit(1) 496 | -------------------------------------------------------------------------------- /docs/deployment-docker-swarm.md: -------------------------------------------------------------------------------- 1 | # Example - Using Docker Swarm 2 | 3 | In this example, a cluster consisting of five nodes running Debian 10 is used. The following services are deployed on the cluster: 4 | 5 | * Five Consul instances, they are used for election of the primary MySQL server, for service discovery, and for providing additional information about the state of the cluster. 6 | * One of the MinIO object storage to store MySQL backups. These backups are used to bootstrap new MySQL replicas automatically. MinIO needs at least to provide four nodes / volumes to provide highly available. In addition, deploying such a setup without labeling the Docker nodes and creating stateful volumes is hard. The data on the S3 Bucket are re-written periodically. Therefore, we don't deploy a highly available and replicated version of MinIO in this example. 7 | * One primary MySQL server (read/write) and two read-only MySQL replicas. 8 | * An instance of [ProxySQL](https://github.com/sysown/proxysql) is available on every MySQL-Server. ProxySQL is used to access the MySQL installations. Write requests (e.g., `INSERT` or `UPDATE`) are automatically send to the replication leader, and read requests (e.g., `SELECT`) are sent to the replication follower. 9 | 10 | The four Docker nodes should be running in different availability zones. Therefore, one Docker node or availability zones can fail, and the MySQL service is still available. 11 | 12 | When one Docker node fails, the aborted Docker containers are re-started on the remaining nodes. If the primary MySQL fails, one of the replicas MySQL servers is promoted to the new primary MySQL server, and a new replica Server is started. If one of the replicas MySQL servers fails, a new replica MySQL server is started, provisioned, and configured. 13 | 14 | ### Step 1 - Setup Docker 15 | 16 | Setup your [Docker Swarm](https://docs.docker.com/engine/swarm/). The following commands have to be executed on all nodes of the cluster. As an alternative, you can use the following [Ansible Playbook](https://github.com/jnidzwetzki/ansible-playbooks/tree/main/docker) to install Docker on the cluster. 17 | 18 | ```bash 19 | apt-get update 20 | apt-get install -y apt-transport-https ca-certificates curl gnupg2 software-properties-common sudo 21 | curl -fsSL https://download.docker.com/linux/debian/gpg | sudo apt-key add - 22 | add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/debian $(lsb_release -cs) stable" 23 | apt-get update 24 | apt-get install -y docker-ce docker-ce-cli containerd.io 25 | ``` 26 | 27 | ### Step 2 - Init the Docker Swarm 28 | 29 | On one of the nodes, execute the following commands to bootstrap the Docker Swarm: 30 | 31 | ```bash 32 | docker swarm init --advertise-addr 33 | ``` 34 | 35 | The command above will show how you can add further _worker nodes_ to the cluster. Worker nodes only execute docker container and do __not__ be part of the cluster management. The node that has inited the cluster will be the only _manager node_ in the cluster. If this node becomes unavailable, the cluster runs into an unhealthy state. Therefore, you should at least have three _manager nodes_ in your cluster. 36 | 37 | To join a new node as _manager node_, execute the following command on a master node and execute the provided command on the new node: 38 | 39 | ```bash 40 | docker swarm join-token manager 41 | ``` 42 | The output of the command above should be executed on the worker nodes to join the cluster as managers. 43 | 44 | ```bash 45 | docker swarm join --token 46 | ``` 47 | 48 | After executing these commands, the status of the cluster should look as follows: 49 | 50 | ```bash 51 | $ docker node ls 52 | ID HOSTNAME STATUS AVAILABILITY MANAGER STATUS ENGINE VERSION 53 | cqshak7jcuh97oqtznbcorkjp * debian10-vm1 Ready Active Leader 19.03.13 54 | deihndvm1vwbym9q9x3fyksev debian10-vm2 Ready Active Reachable 19.03.13 55 | 3rqp1te4d66tm56b7a1zzlpr2 debian10-vm3 Ready Active Reachable 19.03.13 56 | 7l21f6mdy0dytmiy4oh70ttjo debian10-vm4 Ready Active Reachable 19.03.13 57 | uttuejl2q48hwizz3bya5engw debian10-vm5 Ready Active Reachable 19.03.13 58 | ``` 59 | 60 | __Note__: Per default, manager nodes also execute Docker containers. This can lead to the situation that a manager node becomes unreliable if a heavy workload is processed; the node is detected as dead, and the workload becomes re-scheduled even if all nodes of the cluster are available. To avoid such situations, in a real-world setup, manager nodes should only interact as manager nodes and not execute any workload. This can be done by executing `docker node update --availability drain ` for the manager nodes. 61 | 62 | ### Step 3 - Deploy the Services 63 | 64 | The Deployment of the services to Docker Swarm is done with a [Compose file](https://github.com/jnidzwetzki/mysql-ha-cloud/tree/main/deployment). This file descibes the services of the Docker Swarm cluster. The file can be downloaded and deployed as follows: 65 | 66 | ```bash 67 | wget https://raw.githubusercontent.com/jnidzwetzki/mysql-ha-cloud/main/deployment/mysql-docker-swarm.yml 68 | docker stack deploy --compose-file mysql-docker-swarm.yml mysql 69 | ``` 70 | 71 | After the deployment is done, the stack should look as follows: 72 | 73 | ``` 74 | $ docker stack ps mysql 75 | ID NAME IMAGE NODE DESIRED STATE CURRENT STATE ERROR PORTS 76 | zywtlmvswfz1 mysql_minio.1 minio/minio:RELEASE.2020-10-18T21-54-12Z debian10-vm4 Running Running 53 seconds ago 77 | v8hks8xa6vub mysql_mysql.1 jnidzwetzki/mysql-ha-cloud:latest debian10-vm2 Running Preparing about a minute ago 78 | bhsvp0muev51 mysql_consul.1 consul:1.8 debian10-vm1 Running Running about a minute ago *:8500->8500/tcp 79 | 4no74auuqpv0 mysql_mysql.2 jnidzwetzki/mysql-ha-cloud:latest debian10-vm3 Running Preparing about a minute ago 80 | t1dan93zja0e mysql_consul.2 consul:1.8 debian10-vm2 Running Running about a minute ago *:8500->8500/tcp 81 | 0b3pyj32v5db mysql_mysql.3 jnidzwetzki/mysql-ha-cloud:latest debian10-vm1 Running Preparing about a minute ago 82 | gptp9fpmkw4r mysql_consul.3 consul:1.8 debian10-vm4 Running Running about a minute ago *:8500->8500/tcp 83 | i2egrq1cbieu mysql_consul.4 consul:1.8 debian10-vm5 Running Running 32 seconds ago *:8500->8500/tcp 84 | vvsf1wwb1zr2 mysql_consul.5 consul:1.8 debian10-vm3 Running Running about a minute ago *:8500->8500/tcp 85 | 86 | $ docker stack services mysql 87 | ID NAME MODE REPLICAS IMAGE PORTS 88 | 0v8qhwaaawx5 mysql_minio replicated 1/1 minio/minio:RELEASE.2020-10-18T21-54-12Z *:9000->9000/tcp 89 | pro64635i2j4 mysql_mysql replicated 3/3 (max 1 per node) jnidzwetzki/mysql-ha-cloud:latest 90 | ya9luugwcri4 mysql_consul replicated 5/5 (max 1 per node) consul:1.8 91 | ``` 92 | 93 | After the service is deployed, the state of the docker installation can be checked. On the Docker node, the following command can be excuted in one of the consul containers `a856acfc1635`: 94 | 95 | 96 | ```bash 97 | $ docker exec -t a856acfc1635 consul members 98 | Node Address Status Type Build Protocol DC Segment 99 | 234d94d9063f 10.0.3.3:8301 alive server 1.8.5 2 dc1 100 | 753784b1624a 10.0.3.5:8301 alive server 1.8.5 2 dc1 101 | cba13bbba731 10.0.3.2:8301 alive server 1.8.5 2 dc1 102 | f00780b002e8 10.0.3.6:8301 alive server 1.8.5 2 dc1 103 | f418f8ae1023 10.0.3.4:8301 alive server 1.8.5 2 dc1 104 | 0d744a098502 10.0.3.40:8301 alive client 1.8.4 2 dc1 105 | 72e398e0f1bc 10.0.3.41:8301 alive client 1.8.4 2 dc1 106 | 9e96a9596e76 10.0.3.42:8301 alive client 1.8.4 2 dc1 107 | ``` 108 | 109 | In the output above can be seen that the deployment of the Consul servers was successful. Three servers are deployed, and from the MySQL installations, three agents are started. 110 | 111 | ### Step 4 - Check Deployment 112 | 113 | After the deployment is done, you can check which MySQL nodes are avaialable and which node is the replication leader: 114 | 115 | ```bash 116 | $ docker exec -t a856acfc1635 consul kv get -recurse mcm/instances 117 | mcm/instances/10.0.3.40:{"ip_address": "10.0.3.40", "server_id": 44, "mysql_version": "8.0.21"} 118 | mcm/instances/10.0.3.41:{"ip_address": "10.0.3.41", "server_id": 45, "mysql_version": "8.0.21"} 119 | mcm/instances/10.0.3.42:{"ip_address": "10.0.3.42", "server_id": 46, "mysql_version": "8.0.21"} 120 | 121 | $ docker exec -t a856acfc1635 consul kv get mcm/replication_leader 122 | {"ip_address": "10.0.3.41"} 123 | ``` 124 | 125 | In addition, you can have a look at the MySQL replication configuration 126 | 127 | ```bash 128 | $ docker exec -t a856acfc1635 /bin/bash -c 'mysql -u root -p`echo $MYSQL_ROOT_PASSWORD` -e "SHOW SLAVE STATUS"' 129 | mysql: [Warning] Using a password on the command line interface can be insecure. 130 | +----------------------------------+-------------+------------------+-------------+---------------+-----------------+---------------------+-------------------------------+---------------+-----------------------+------------------+-------------------+-----------------+---------------------+--------------------+------------------------+-------------------------+-----------------------------+------------+------------+--------------+---------------------+-----------------+-----------------+----------------+---------------+--------------------+--------------------+--------------------+-----------------+-------------------+----------------+-----------------------+-------------------------------+---------------+---------------+----------------+----------------+-----------------------------+------------------+--------------------------------------+-------------------------+-----------+---------------------+--------------------------------------------------------+--------------------+-------------+-------------------------+--------------------------+----------------+--------------------+--------------------+----------------------------------------------------------------------------------+---------------+----------------------+--------------+--------------------+------------------------+-----------------------+-------------------+ 131 | | Slave_IO_State | Master_Host | Master_User | Master_Port | Connect_Retry | Master_Log_File | Read_Master_Log_Pos | Relay_Log_File | Relay_Log_Pos | Relay_Master_Log_File | Slave_IO_Running | Slave_SQL_Running | Replicate_Do_DB | Replicate_Ignore_DB | Replicate_Do_Table | Replicate_Ignore_Table | Replicate_Wild_Do_Table | Replicate_Wild_Ignore_Table | Last_Errno | Last_Error | Skip_Counter | Exec_Master_Log_Pos | Relay_Log_Space | Until_Condition | Until_Log_File | Until_Log_Pos | Master_SSL_Allowed | Master_SSL_CA_File | Master_SSL_CA_Path | Master_SSL_Cert | Master_SSL_Cipher | Master_SSL_Key | Seconds_Behind_Master | Master_SSL_Verify_Server_Cert | Last_IO_Errno | Last_IO_Error | Last_SQL_Errno | Last_SQL_Error | Replicate_Ignore_Server_Ids | Master_Server_Id | Master_UUID | Master_Info_File | SQL_Delay | SQL_Remaining_Delay | Slave_SQL_Running_State | Master_Retry_Count | Master_Bind | Last_IO_Error_Timestamp | Last_SQL_Error_Timestamp | Master_SSL_Crl | Master_SSL_Crlpath | Retrieved_Gtid_Set | Executed_Gtid_Set | Auto_Position | Replicate_Rewrite_DB | Channel_Name | Master_TLS_Version | Master_public_key_path | Get_master_public_key | Network_Namespace | 132 | +----------------------------------+-------------+------------------+-------------+---------------+-----------------+---------------------+-------------------------------+---------------+-----------------------+------------------+-------------------+-----------------+---------------------+--------------------+------------------------+-------------------------+-----------------------------+------------+------------+--------------+---------------------+-----------------+-----------------+----------------+---------------+--------------------+--------------------+--------------------+-----------------+-------------------+----------------+-----------------------+-------------------------------+---------------+---------------+----------------+----------------+-----------------------------+------------------+--------------------------------------+-------------------------+-----------+---------------------+--------------------------------------------------------+--------------------+-------------+-------------------------+--------------------------+----------------+--------------------+--------------------+----------------------------------------------------------------------------------+---------------+----------------------+--------------+--------------------+------------------------+-----------------------+-------------------+ 133 | | Waiting for master to send event | 10.0.3.41 | replication_user | 3306 | 60 | binlog.000024 | 196 | 82df8cfe97e2-relay-bin.000002 | 365 | binlog.000024 | Yes | Yes | | | | | | | 0 | | 0 | 196 | 581 | None | | 0 | No | | | | | | 0 | No | 0 | | 0 | | | 45 | f2260821-2ced-11eb-89ef-02420a000329 | mysql.slave_master_info | 0 | NULL | Slave has read all relay log; waiting for more updates | 86400 | | | | | | | 1256e020-2cfe-11eb-a273-02420a00032a:1, 4aa0562f-28ac-11eb-93fa-02420a000305:1-8 | 1 | | | | | 1 | | 134 | +----------------------------------+-------------+------------------+-------------+---------------+-----------------+---------------------+-------------------------------+---------------+-----------------------+------------------+-------------------+-----------------+---------------------+--------------------+------------------------+-------------------------+-----------------------------+------------+------------+--------------+---------------------+-----------------+-----------------+----------------+---------------+--------------------+--------------------+--------------------+-----------------+-------------------+----------------+-----------------------+-------------------------------+---------------+---------------+----------------+----------------+-----------------------------+------------------+--------------------------------------+-------------------------+-----------+---------------------+--------------------------------------------------------+--------------------+-------------+-------------------------+--------------------------+----------------+--------------------+--------------------+----------------------------------------------------------------------------------+---------------+----------------------+--------------+--------------------+------------------------+-----------------------+-------------------+ 135 | ``` 136 | 137 | Or list the available backups of the database: 138 | 139 | ```bash 140 | $ docker exec -t a856acfc1635 mc ls backup/mysqlbackup 141 | [2020-11-20 21:50:24 UTC] 1.6MiB mysql_backup_1605909015.0471048.tgz 142 | [2020-11-20 21:50:34 UTC] 1.6MiB mysql_backup_1605909024.6657646.tgz 143 | [2020-11-21 03:51:21 UTC] 1.6MiB mysql_backup_1605930672.1543853.tgz 144 | [2020-11-21 09:52:18 UTC] 1.6MiB mysql_backup_1605952329.1124055.tgz 145 | [2020-11-22 12:46:39 UTC] 1.6MiB mysql_backup_1606049190.0292351.tgz 146 | [2020-11-22 18:50:19 UTC] 1.6MiB mysql_backup_1606071009.6974795.tgz 147 | ``` 148 | 149 | The DNS settings for the service discovery could also be tested: 150 | 151 | ```bash 152 | $ docker exec -t a856acfc1635 dig @127.0.0.1 -p 8600 _mysql._leader.service.consul SRV 153 | 154 | ; <<>> DiG 9.11.5-P4-5.1+deb10u2-Debian <<>> @127.0.0.1 -p 8600 _mysql._leader.service.consul SRV 155 | ; (1 server found) 156 | ;; global options: +cmd 157 | ;; Got answer: 158 | ;; ->>HEADER<<- opcode: QUERY, status: NOERROR, id: 61130 159 | ;; flags: qr aa rd; QUERY: 1, ANSWER: 1, AUTHORITY: 0, ADDITIONAL: 3 160 | ;; WARNING: recursion requested but not available 161 | 162 | ;; OPT PSEUDOSECTION: 163 | ; EDNS: version: 0, flags:; udp: 4096 164 | ;; QUESTION SECTION: 165 | ;_mysql._leader.service.consul. IN SRV 166 | 167 | ;; ANSWER SECTION: 168 | _mysql._leader.service.consul. 0 IN SRV 1 1 3306 cd1e7b5ae9a4.node.dc1.consul. 169 | 170 | ;; ADDITIONAL SECTION: 171 | cd1e7b5ae9a4.node.dc1.consul. 0 IN A 10.0.3.41 172 | cd1e7b5ae9a4.node.dc1.consul. 0 IN TXT "consul-network-segment=" 173 | 174 | ;; Query time: 1 msec 175 | ;; SERVER: 127.0.0.1#8600(127.0.0.1) 176 | ;; WHEN: Tue Nov 24 07:06:10 UTC 2020 177 | ;; MSG SIZE rcvd: 158 178 | 179 | 180 | 181 | $ docker exec -t a856acfc1635 dig @127.0.0.1 -p 8600 _mysql._follower.service.consul SRV 182 | 183 | ; <<>> DiG 9.11.5-P4-5.1+deb10u2-Debian <<>> @127.0.0.1 -p 8600 _mysql._follower.service.consul SRV 184 | ; (1 server found) 185 | ;; global options: +cmd 186 | ;; Got answer: 187 | ;; ->>HEADER<<- opcode: QUERY, status: NOERROR, id: 46995 188 | ;; flags: qr aa rd; QUERY: 1, ANSWER: 2, AUTHORITY: 0, ADDITIONAL: 5 189 | ;; WARNING: recursion requested but not available 190 | 191 | ;; OPT PSEUDOSECTION: 192 | ; EDNS: version: 0, flags:; udp: 4096 193 | ;; QUESTION SECTION: 194 | ;_mysql._follower.service.consul. IN SRV 195 | 196 | ;; ANSWER SECTION: 197 | _mysql._follower.service.consul. 0 IN SRV 1 1 3306 f36ddfed8617.node.dc1.consul. 198 | _mysql._follower.service.consul. 0 IN SRV 1 1 3306 ddcadd280a98.node.dc1.consul. 199 | 200 | ;; ADDITIONAL SECTION: 201 | f36ddfed8617.node.dc1.consul. 0 IN A 10.0.3.40 202 | f36ddfed8617.node.dc1.consul. 0 IN TXT "consul-network-segment=" 203 | ddcadd280a98.node.dc1.consul. 0 IN A 10.0.3.42 204 | ddcadd280a98.node.dc1.consul. 0 IN TXT "consul-network-segment=" 205 | 206 | ;; Query time: 1 msec 207 | ;; SERVER: 127.0.0.1#8600(127.0.0.1) 208 | ;; WHEN: Tue Nov 24 07:06:20 UTC 2020 209 | ;; MSG SIZE rcvd: 260 210 | ``` 211 | 212 | ### Step 5 - Use the highly-available MySQL-Server 213 | 214 | On port `3306/tcp` (the default MySQL port) on all Docker nodes, you can now reach the highly-available MySQL-Server. As user use `MYSQL_APPLICATION_USER` and the `MYSQL_APPLICATION_PASSWORD` from the docker-swarm file. 215 | 216 | For example: 217 | 218 | ```bash 219 | mysql -u mysql_user -pmysql_secret -h debian10-vm1 220 | ``` 221 | 222 | While you work on the MySQL-Shell you can restart the Docker nodes. Docker Swarm will restart the missing sevices on other nodes and the MySQL orchestrator will reconfigure the replication setup in MySQL. The MySQL-Shell is usable all the time for read- and write requests. 223 | -------------------------------------------------------------------------------- /docs/images/architecture.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | Master slide 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | Node N 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | Node 2 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | MySQL 169 | 170 | 171 | 172 | Leader 173 | 174 | (Write) 175 | 176 | 177 | 178 | 179 | 180 | 181 | MySQL 182 | 183 | 184 | 185 | Follower 186 | 187 | (Read) 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | Consul 196 | 197 | 198 | 199 | (Service discovery and leader election) 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | ProxySQL 208 | 209 | 210 | 211 | (Dispatch MySQL connections) 212 | 213 | 214 | 215 | 216 | 217 | 218 | Docker Swarm 219 | 220 | 221 | 222 | or 223 | 224 | 225 | 226 | Kubernetes 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | Client 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | Node 1 247 | 248 | 249 | 250 | 251 | 252 | 253 | MySQL 254 | 255 | 256 | 257 | Follower 258 | 259 | (Read) 260 | 261 | 262 | 263 | --------------------------------------------------------------------------------