├── mysql_cluster_manager
├── src
│ ├── mcm
│ │ ├── __init__.py
│ │ ├── utils.py
│ │ ├── minio.py
│ │ ├── proxysql.py
│ │ ├── actions.py
│ │ ├── consul.py
│ │ └── mysql.py
│ └── mysql_cluster_manager.py
├── requirements.txt
└── pylintrc
├── .gitignore
├── docs
├── images
│ ├── architecture.odg
│ ├── architecture.png
│ └── architecture.svg
├── deployment-kubernetes.md
└── deployment-docker-swarm.md
├── entry-point.sh
├── .github
└── workflows
│ ├── build.yml
│ └── codeql.yml
├── CONTRIBUTING.md
├── deployment
├── mysql-docker-swarm.yml
└── mysql-kubernetes-iscsi.yml
├── Dockerfile-mysql
├── CODE_OF_CONDUCT.md
├── README.md
└── LICENSE
/mysql_cluster_manager/src/mcm/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .venv
2 | .env
3 | settings.json
4 |
--------------------------------------------------------------------------------
/docs/images/architecture.odg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jnidzwetzki/mysql-ha-cloud/HEAD/docs/images/architecture.odg
--------------------------------------------------------------------------------
/docs/images/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jnidzwetzki/mysql-ha-cloud/HEAD/docs/images/architecture.png
--------------------------------------------------------------------------------
/mysql_cluster_manager/requirements.txt:
--------------------------------------------------------------------------------
1 | mysql-connector-python==8.0.22
2 | netifaces==0.10.9
3 | pylint==2.5.3
4 | python-consul2==0.1.4
5 |
--------------------------------------------------------------------------------
/entry-point.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #
3 | # Start the MySQL cluster manager
4 | #
5 | ########################
6 |
7 | # Exit on error
8 | set -e
9 |
10 | ./mysql_cluster_manager.py join_or_bootstrap
11 |
--------------------------------------------------------------------------------
/.github/workflows/build.yml:
--------------------------------------------------------------------------------
1 | name: Build project
2 |
3 | on: [push, pull_request]
4 |
5 | jobs:
6 | compile:
7 | runs-on: ubuntu-22.04
8 |
9 | steps:
10 | - name: Checkout Source Code
11 | uses: actions/checkout@v2
12 | - name: Setup python 3.10
13 | uses: actions/setup-python@v2
14 | with:
15 | python-version: '3.10'
16 | - name: Install dependencies
17 | run: |
18 | python -m pip install -r mysql_cluster_manager/requirements.txt
19 | cd mysql_cluster_manager
20 | pylint src
21 | src/mysql_cluster_manager.py --help
22 |
23 | - name: Build docker image
24 | run: docker build -t jnidzwetzki/mysql-ha-cloud:latest -f Dockerfile-mysql .
25 |
26 | - name: Login at docker hub
27 | run: |
28 | docker login -u ${{secrets.DOCKER_USER}} -p ${{secrets.DOCKER_PASSWORD}}
29 |
30 | - name: Push image
31 | if: github.ref == 'refs/heads/main'
32 | run: |
33 | docker push jnidzwetzki/mysql-ha-cloud:latest
34 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # Contributing to this project
2 | First of all, thank you so much for being interested in contributing. At the moment, you can contribute to this project by:
3 |
4 | * Finding and reporting issues
5 | * Creating new examples
6 | * Requesting new features
7 | * Open pull requests
8 |
9 | ## New examples
10 | The provided examples for the BBoxDB-Client are very limited at the moment. Feel free to write new examples and open a pull request.
11 |
12 | ## Feature requests
13 | Feature requests are welcome. Please take a moment and describe the details of the new feature and why this is needed. Please provide as much detail and context as possible.
14 |
15 | ## Pull requests
16 | * Open a new issue
17 | * Fork the repository
18 | * Create a new feature branch named _contrib/issue-number_
19 | * Make your changes
20 | * Test the code (i.e. write some unit tests)
21 | * Create and submit a pull request
22 | * Watch the build state of TravisCI
23 | * Wait for the merge
24 |
25 | ## License Agreement
26 | By contributing your code, you agree to license your contribution under the terms of the Apache 2.0 license.
27 |
--------------------------------------------------------------------------------
/mysql_cluster_manager/src/mcm/utils.py:
--------------------------------------------------------------------------------
1 | """This file contains the utils of the cluster manager"""
2 |
3 | import os
4 | import time
5 |
6 | from datetime import datetime
7 |
8 | import netifaces
9 |
10 | from mcm.minio import Minio
11 |
12 |
13 | class Utils:
14 | """
15 | Utilities for the project
16 | """
17 |
18 | @staticmethod
19 | def get_local_ip_address():
20 | """
21 | Get the local IP Address
22 | """
23 |
24 | interface = os.getenv('MCM_BIND_INTERFACE', "eth0")
25 | return netifaces.ifaddresses(interface)[netifaces.AF_INET][0]["addr"]
26 |
27 | @staticmethod
28 | def is_refresh_needed(last_execution, max_timedelta):
29 | """
30 | Is a new execution needed, based on the time delta
31 | """
32 | if last_execution is None:
33 | return True
34 |
35 | return datetime.now() - last_execution > max_timedelta
36 |
37 | @staticmethod
38 | def wait_for_backup_exists(consul):
39 | """
40 | Wait for a backup to be occour
41 | """
42 |
43 | Minio.setup_connection()
44 |
45 | retry_counter = 100
46 |
47 | for _ in range(retry_counter):
48 | backup_exists = Minio.does_backup_exists()
49 |
50 | if backup_exists:
51 | return True
52 |
53 | # Keep consul sessions alive
54 | consul.refresh_sessions()
55 | time.sleep(5000)
56 |
57 | return False
58 |
--------------------------------------------------------------------------------
/deployment/mysql-docker-swarm.yml:
--------------------------------------------------------------------------------
1 | version: "3.8"
2 |
3 | networks:
4 | backend:
5 |
6 | volumes:
7 | backup-volume:
8 |
9 | services:
10 | consul:
11 | image: consul:1.9
12 | networks:
13 | backend:
14 | aliases:
15 | - consul_cluster
16 | environment:
17 | - CONSUL_BIND_INTERFACE=eth0
18 | command: agent -ui -data-dir /consul/data -server -client 0.0.0.0 -retry-join consul_cluster -bootstrap-expect=5
19 | deploy:
20 | replicas: 5
21 | endpoint_mode: dnsrr
22 | placement:
23 | max_replicas_per_node: 1
24 | update_config:
25 | parallelism: 1
26 | delay: 60s
27 | restart_policy:
28 | condition: on-failure
29 | ports:
30 | - target: 8500
31 | published: 8500
32 | protocol: tcp
33 | mode: host
34 |
35 | mysql:
36 | image: jnidzwetzki/mysql-ha-cloud:latest
37 | networks:
38 | backend:
39 | environment:
40 | - CONSUL_BIND_INTERFACE=eth1
41 | - CONSUL_BOOTSTRAP_SERVER=consul_cluster
42 | - MINIO_ACCESS_KEY=minio
43 | - MINIO_SECRET_KEY=minio123
44 | - MINIO_URL=http://minio:9000
45 | - MCM_BIND_INTERFACE=eth1
46 | - MYSQL_ROOT_PASSWORD=verysecret123
47 | - MYSQL_BACKUP_USER=backup_user
48 | - MYSQL_BACKUP_PASSWORD=backup_secret
49 | - MYSQL_REPLICATION_USER=replication_user
50 | - MYSQL_REPLICATION_PASSWORD=replication_secret
51 | - MYSQL_APPLICATION_USER=mysql_user
52 | - MYSQL_APPLICATION_PASSWORD=mysql_secret
53 | deploy:
54 | replicas: 3
55 | placement:
56 | max_replicas_per_node: 1
57 | update_config:
58 | parallelism: 1
59 | delay: 60s
60 | restart_policy:
61 | condition: on-failure
62 | ports:
63 | - 6032:6032
64 | - 3306:6033
65 |
66 | minio:
67 | image: minio/minio:RELEASE.2020-10-18T21-54-12Z
68 | networks:
69 | backend:
70 | aliases:
71 | - minio_endpoint
72 | volumes:
73 | - backup-volume:/data
74 | ports:
75 | - 9000:9000
76 | environment:
77 | - MINIO_ACCESS_KEY=minio
78 | - MINIO_SECRET_KEY=minio123
79 | command: server /data
80 |
--------------------------------------------------------------------------------
/mysql_cluster_manager/src/mysql_cluster_manager.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | """This file is part of the MySQL cluster manager"""
4 |
5 | import os
6 | import sys
7 | import logging
8 | import argparse
9 |
10 | from mcm.actions import Actions
11 | from mcm.consul import Consul
12 | from mcm.mysql import Mysql
13 | from mcm.proxysql import Proxysql
14 |
15 | parser = argparse.ArgumentParser(
16 | description="MySQL cluster manager",
17 | epilog="For more info, please see: https://github.com/jnidzwetzki/mysql-ha-cloud")
18 |
19 | AVAILABLE_OPERATIONS = "(join_or_bootstrap, mysql_backup, mysql_restore, mysql_start, mysql_stop)"
20 | parser.add_argument('operation', metavar='operation',
21 | help=f'Operation to be executed {AVAILABLE_OPERATIONS}')
22 |
23 | log_levels = ('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL')
24 | parser.add_argument('--log-level', default='INFO', choices=log_levels)
25 |
26 | # Parse args
27 | args = parser.parse_args()
28 |
29 | # Configure logging
30 | logging.basicConfig(level=args.log_level,
31 | format='%(asctime)-15s %(levelname)s %(name)s %(message)s')
32 |
33 | # Check for all needed env vars
34 | required_envvars = ['CONSUL_BIND_INTERFACE', 'CONSUL_BOOTSTRAP_SERVER',
35 | 'MINIO_ACCESS_KEY', 'MINIO_SECRET_KEY', 'MINIO_URL',
36 | 'MYSQL_ROOT_PASSWORD', 'MYSQL_BACKUP_USER', 'MYSQL_BACKUP_PASSWORD',
37 | 'MYSQL_REPLICATION_USER', 'MYSQL_REPLICATION_PASSWORD']
38 |
39 | for required_var in required_envvars:
40 | if not required_var in os.environ:
41 | logging.error("Required environment %s not found, exiting", required_var)
42 | sys.exit(1)
43 |
44 | # Perform operations
45 | if args.operation == 'join_or_bootstrap':
46 | Actions.join_or_bootstrap()
47 | elif args.operation == 'mysql_backup':
48 | Mysql.backup_data()
49 | elif args.operation == 'mysql_restore':
50 | Mysql.restore_backup()
51 | elif args.operation == 'mysql_start':
52 | Mysql.server_start()
53 | elif args.operation == 'mysql_stop':
54 | Mysql.server_stop()
55 | elif args.operation == 'mysql_autobackup':
56 | Mysql.create_backup_if_needed()
57 | elif args.operation == 'proxysql_init':
58 | Proxysql.inital_setup()
59 | nodes = Consul.get_instance().get_all_registered_nodes()
60 | Proxysql.set_mysql_server(nodes)
61 | else:
62 | logging.error("Unknown operation: %s", {args.operation})
63 | sys.exit(1)
64 |
--------------------------------------------------------------------------------
/Dockerfile-mysql:
--------------------------------------------------------------------------------
1 | #
2 | # Build with:
3 | #
4 | # docker build -t jnidzwetzki/mysql-ha-cloud:latest -f Dockerfile-mysql .
5 | #
6 | #################
7 |
8 | FROM mysql:8.0.21
9 |
10 | SHELL ["/bin/bash", "-c"]
11 | WORKDIR /cluster
12 |
13 | COPY ./mysql_cluster_manager/src .
14 | COPY ./mysql_cluster_manager/requirements.txt .
15 | COPY ./entry-point.sh .
16 |
17 | RUN \
18 | # \
19 | # Install GPG Key \
20 | # \
21 | apt-key adv --keyserver hkp://keyserver.ubuntu.com:80 --recv-keys 467B942D3A79BD29 && \
22 | # \
23 | # Pin MySQL to 8.0.21 due to: https://jira.percona.com/browse/PXB-2315 \
24 | # \
25 | apt-mark hold mysql-common mysql-community-client mysql-community-client-core mysql-community-server-core && \
26 | # \
27 | # Run System Upgrade \
28 | # \
29 | apt-get update && \
30 | apt-get upgrade -y && \
31 | # \
32 | # Install system basics \
33 | # \
34 | apt-get install -y unzip curl wget gnupg2 lsb-release procps && \
35 | # \
36 | # Install percona XtraBackup \
37 | # \
38 | apt-get install -y libdbd-mysql-perl libcurl4-openssl-dev rsync libev4 && \
39 | wget https://www.percona.com/downloads/Percona-XtraBackup-LATEST/Percona-XtraBackup-8.0.14/binary/debian/buster/x86_64/percona-xtrabackup-80_8.0.14-1.buster_amd64.deb -O /tmp/xtrabackup.deb && \
40 | dpkg -i /tmp/xtrabackup.deb && \
41 | rm /tmp/xtrabackup.deb && \
42 | # \
43 | # Install consul \
44 | # \
45 | wget https://releases.hashicorp.com/consul/1.8.4/consul_1.8.4_linux_amd64.zip -O /tmp/consul.zip && \
46 | echo "220b0af8e439d2fe3fc7e1ca07bdbda1f3ee5b2fa889983c04e7004d99ade5ece005b45e1288bfcbe2bf847f23d35684845bd6edbf59fe4220be8e9e83f05439 /tmp/consul.zip" | sha512sum -c && \
47 | unzip /tmp/consul.zip -d /usr/local/bin && \
48 | rm /tmp/consul.zip && \
49 | # \
50 | # Install minIO client \
51 | # \
52 | wget https://dl.min.io/client/mc/release/linux-amd64/mc -O /usr/local/bin/mc && \
53 | chmod +x /usr/local/bin/mc && \
54 | # \
55 | # Install mysql cluster manager \
56 | # \
57 | apt-get install -y python3.7 python3.7-dev python3-pip && \
58 | pip3 install -r requirements.txt && \
59 | # \
60 | # Install ProxySQL \
61 | # \
62 | wget https://github.com/sysown/proxysql/releases/download/v2.0.15/proxysql_2.0.15-debian10_amd64.deb && \
63 | dpkg -i proxysql_2.0.15-debian10_amd64.deb && \
64 | rm proxysql_2.0.15-debian10_amd64.deb
65 |
66 | CMD ["bash", "entry-point.sh"]
67 | EXPOSE 6032/tcp
68 |
--------------------------------------------------------------------------------
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
1 | # For most projects, this workflow file will not need changing; you simply need
2 | # to commit it to your repository.
3 | #
4 | # You may wish to alter this file to override the set of languages analyzed,
5 | # or to provide custom queries or build logic.
6 | #
7 | # ******** NOTE ********
8 | # We have attempted to detect the languages in your repository. Please check
9 | # the `language` matrix defined below to confirm you have the correct set of
10 | # supported CodeQL languages.
11 | #
12 | name: "CodeQL"
13 |
14 | on:
15 | push:
16 | branches: [ "main" ]
17 | pull_request:
18 | # The branches below must be a subset of the branches above
19 | branches: [ "main" ]
20 | schedule:
21 | - cron: '23 3 * * 5'
22 |
23 | jobs:
24 | analyze:
25 | name: Analyze
26 | runs-on: ubuntu-latest
27 | permissions:
28 | actions: read
29 | contents: read
30 | security-events: write
31 |
32 | strategy:
33 | fail-fast: false
34 | matrix:
35 | language: [ 'python' ]
36 | # CodeQL supports [ 'cpp', 'csharp', 'go', 'java', 'javascript', 'python', 'ruby' ]
37 | # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support
38 |
39 | steps:
40 | - name: Checkout repository
41 | uses: actions/checkout@v3
42 |
43 | # Initializes the CodeQL tools for scanning.
44 | - name: Initialize CodeQL
45 | uses: github/codeql-action/init@v2
46 | with:
47 | languages: ${{ matrix.language }}
48 | # If you wish to specify custom queries, you can do so here or in a config file.
49 | # By default, queries listed here will override any specified in a config file.
50 | # Prefix the list here with "+" to use these queries and those in the config file.
51 |
52 | # Details on CodeQL's query packs refer to : https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
53 | # queries: security-extended,security-and-quality
54 |
55 |
56 | # Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
57 | # If this step fails, then you should remove it and run the build manually (see below)
58 | - name: Autobuild
59 | uses: github/codeql-action/autobuild@v2
60 |
61 | # ℹ️ Command-line programs to run using the OS shell.
62 | # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
63 |
64 | # If the Autobuild fails above, remove it and uncomment the following three lines.
65 | # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance.
66 |
67 | # - run: |
68 | # echo "Run, Build Application using script"
69 | # ./location_of_script_within_repo/buildscript.sh
70 |
71 | - name: Perform CodeQL Analysis
72 | uses: github/codeql-action/analyze@v2
73 |
--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Contributor Covenant Code of Conduct
2 |
3 | ## Our Pledge
4 |
5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
6 |
7 | ## Our Standards
8 |
9 | Examples of behavior that contributes to creating a positive environment include:
10 |
11 | * Using welcoming and inclusive language
12 | * Being respectful of differing viewpoints and experiences
13 | * Gracefully accepting constructive criticism
14 | * Focusing on what is best for the community
15 | * Showing empathy towards other community members
16 |
17 | Examples of unacceptable behavior by participants include:
18 |
19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances
20 | * Trolling, insulting/derogatory comments, and personal or political attacks
21 | * Public or private harassment
22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission
23 | * Other conduct which could reasonably be considered inappropriate in a professional setting
24 |
25 | ## Our Responsibilities
26 |
27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
28 |
29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
30 |
31 | ## Scope
32 |
33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
34 |
35 | ## Enforcement
36 |
37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at jnidzwetzki@gmx.de. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
38 |
39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
40 |
41 | ## Attribution
42 |
43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
44 |
45 | [homepage]: http://contributor-covenant.org
46 | [version]: http://contributor-covenant.org/version/1/4/
47 |
--------------------------------------------------------------------------------
/mysql_cluster_manager/src/mcm/minio.py:
--------------------------------------------------------------------------------
1 | """This file is part of the MySQL cluster manager"""
2 |
3 | import os
4 | import logging
5 | import datetime
6 | import subprocess
7 |
8 | class Minio:
9 | """
10 | This class encapsulates all Minio related things
11 | """
12 |
13 | minio_binary = "/usr/local/bin/mc"
14 |
15 | @staticmethod
16 | def setup_connection():
17 | """
18 | Setup the MinIO agent.
19 | """
20 |
21 | logging.info("Setup MinIO agent")
22 |
23 | minio_url = os.environ.get("MINIO_URL")
24 | minio_access_key = os.environ.get("MINIO_ACCESS_KEY")
25 | minio_secret_key = os.environ.get("MINIO_SECRET_KEY")
26 |
27 | bucket_name = "backup/mysqlbackup"
28 |
29 | # Register server
30 | mc_args = [Minio.minio_binary, "alias", "set", "backup",
31 | minio_url, minio_access_key, minio_secret_key]
32 | subprocess.run(mc_args, check=True)
33 |
34 | # Create bucket
35 | mc_create_bucket = [Minio.minio_binary, "mb", bucket_name, "-p"]
36 | subprocess.run(mc_create_bucket, check=True)
37 |
38 | # Set expire policy on bucket
39 | mc_set_policy_bucket = [Minio.minio_binary, "ilm", "edit", "--id=expire_rule",
40 | "-expiry-days=7", bucket_name]
41 | subprocess.run(mc_set_policy_bucket, check=True)
42 |
43 | @staticmethod
44 | def get_backup_info():
45 | """
46 | Get the information about backups
47 | """
48 | # Call Setup to ensure bucket and connection do exist
49 | Minio.setup_connection()
50 |
51 | logging.debug("Searching for latest MySQL Backup")
52 | mc_search = [Minio.minio_binary, "find", "backup/mysqlbackup/", "--name",
53 | "mysql*.tgz", "-print", "{time} # {base}"]
54 |
55 | # mc find backup/mysqlbackup/ --name "mysql*.tgz" -print '{time} # {base}'
56 | # 2020-11-08 08:42:12 UTC # mysql_backup_1604824911.437146.tgz
57 | # 2020-11-08 08:50:53 UTC # mysql_backup_1604825437.6691067.tgz
58 | # 2020-11-08 08:55:03 UTC # mysql_backup_1604825684.9835322.tgz
59 |
60 | process = subprocess.run(mc_search, check=True, capture_output=True)
61 | files = process.stdout.splitlines()
62 |
63 | return files
64 |
65 | @staticmethod
66 | def does_backup_exists():
67 | """
68 | Does a old backups exists?
69 | """
70 | files = Minio.get_backup_info()
71 |
72 | if not files:
73 | logging.debug("S3 Bucket is empty")
74 | return False
75 |
76 | return True
77 |
78 | @staticmethod
79 | def get_latest_backup():
80 | """
81 | Get the latest backup filename from the bucket
82 | """
83 | files = Minio.get_backup_info()
84 |
85 | newest_changedate = None
86 | newest_file = None
87 |
88 | # Take the newest file
89 | for element in files:
90 | element_changedate, element_filename = element.decode().split("#")
91 |
92 | # Remove empty chars after split
93 | element_changedate = element_changedate.strip()
94 | element_filename = element_filename.strip()
95 |
96 | element_change_date = datetime.datetime.strptime(element_changedate,
97 | '%Y-%m-%d %H:%M:%S UTC')
98 |
99 | if (newest_changedate is None) or (element_change_date > newest_changedate):
100 | newest_changedate = element_change_date
101 | newest_file = element_filename
102 |
103 | logging.debug("Newest backup file '%s', date '%s'", newest_file, newest_changedate)
104 |
105 | return (newest_file, newest_changedate)
106 |
--------------------------------------------------------------------------------
/mysql_cluster_manager/src/mcm/proxysql.py:
--------------------------------------------------------------------------------
1 | """This file contains the ProxySQL related actions"""
2 |
3 | import os
4 | import logging
5 | import subprocess
6 |
7 | from mcm.mysql import Mysql
8 |
9 | class Proxysql:
10 | """
11 | This class encapsulates all ProxySQL related things
12 | """
13 |
14 | def __init__(self):
15 | """
16 | Init the instance
17 | """
18 | self.configured_mysql_hosts = ()
19 |
20 |
21 | @staticmethod
22 | def inital_setup():
23 | """
24 | Inital setup of ProxySQL
25 | """
26 | logging.info("Performing initial ProxySQL setup")
27 |
28 | # Setup Monitoring User
29 | replication_user = os.environ.get("MYSQL_REPLICATION_USER")
30 | replication_password = os.environ.get("MYSQL_REPLICATION_PASSWORD")
31 |
32 | Proxysql.perform_sql_query(f"UPDATE global_variables SET variable_value='{replication_user}' "
33 | "WHERE variable_name='mysql-monitor_username'")
34 | Proxysql.perform_sql_query(f"UPDATE global_variables SET variable_value='{replication_password}' "
35 | "WHERE variable_name='mysql-monitor_password'")
36 |
37 | # Configure read write hostgroup (writer = 1, reader = 2)
38 | Proxysql.perform_sql_query("DELETE FROM mysql_replication_hostgroups")
39 | Proxysql.perform_sql_query("INSERT INTO mysql_replication_hostgroups "
40 | "(writer_hostgroup, reader_hostgroup,comment) VALUES (1, 2, 'cluster1')")
41 |
42 | # Configure read write split
43 | Proxysql.perform_sql_query("INSERT INTO mysql_query_rules (active, match_digest, "
44 | "destination_hostgroup, apply) VALUES (1, '^SELECT.*', 2, 0)")
45 | Proxysql.perform_sql_query("INSERT INTO mysql_query_rules (active, match_digest, "
46 | "destination_hostgroup, apply) VALUES (1, '^SELECT.*FOR UPDATE', 1, 1)")
47 |
48 | # Configure Application User
49 | application_user = os.environ.get("MYSQL_APPLICATION_USER")
50 | application_password = os.environ.get("MYSQL_APPLICATION_PASSWORD")
51 |
52 | Proxysql.perform_sql_query("DELETE FROM mysql_users")
53 | Proxysql.perform_sql_query("INSERT INTO mysql_users(username, password, default_hostgroup) "
54 | f"VALUES ('{application_user}', '{application_password}', 1)")
55 |
56 | # Persist and activate config
57 | Proxysql.persist_and_activate_config()
58 |
59 | @staticmethod
60 | def persist_and_activate_config():
61 | """
62 | Persist and activate the ProxySQL configuration
63 | """
64 | Proxysql.perform_sql_query("LOAD MYSQL VARIABLES TO RUNTIME")
65 | Proxysql.perform_sql_query("LOAD MYSQL SERVERS TO RUNTIME")
66 | Proxysql.perform_sql_query("LOAD MYSQL USERS TO RUNTIME")
67 | Proxysql.perform_sql_query("LOAD MYSQL QUERY RULES TO RUNTIME")
68 |
69 | Proxysql.perform_sql_query("SAVE MYSQL VARIABLES TO DISK")
70 | Proxysql.perform_sql_query("SAVE MYSQL SERVERS TO DISK")
71 | Proxysql.perform_sql_query("SAVE MYSQL USERS TO DISK")
72 | Proxysql.perform_sql_query("SAVE MYSQL QUERY RULES TO DISK")
73 |
74 | @staticmethod
75 | def set_mysql_server(mysql_servers):
76 | """
77 | Set the backend MySQL server
78 | """
79 | logging.info("Removing all old backend MySQL Server")
80 | Proxysql.perform_sql_query("DELETE FROM mysql_servers")
81 |
82 | for mysql_server in mysql_servers:
83 | logging.info("Adding %s as backend MySQL Server", mysql_server)
84 | Proxysql.perform_sql_query("INSERT INTO mysql_servers(hostgroup_id, hostname, port) "
85 | f"VALUES (1, '{mysql_server}', 3306)")
86 |
87 | Proxysql.perform_sql_query("LOAD MYSQL SERVERS TO RUNTIME")
88 | Proxysql.perform_sql_query("SAVE MYSQL SERVERS TO DISK")
89 |
90 | def update_mysql_server_if_needed(self, current_mysql_servers):
91 | """
92 | Update the MySQL-Servers if needed (changed)
93 | """
94 | current_mysql_servers.sort()
95 |
96 | if self.configured_mysql_hosts != current_mysql_servers:
97 | logging.info("MySQL backend has changed (old=%s, new=%s), reconfiguring",
98 | self.configured_mysql_hosts, current_mysql_servers)
99 | Proxysql.set_mysql_server(current_mysql_servers)
100 | self.configured_mysql_hosts = current_mysql_servers
101 | return True
102 |
103 | return False
104 |
105 | @staticmethod
106 | def perform_sql_query(sql):
107 | """
108 | Perform a SQL query
109 | """
110 | Mysql.execute_statement_or_exit(sql=sql, username="admin", password="admin", database="", port=6032)
111 |
112 | @staticmethod
113 | def start_proxysql():
114 | """
115 | Start the ProxySQL
116 | """
117 |
118 | # Init proxysql
119 | proxysql_init = ["/usr/bin/proxysql", "--idle-threads", "-c", "/etc/proxysql.cnf", "--initial"]
120 | subprocess.run(proxysql_init, check=True)
121 |
122 | # Start the proxysql
123 | # proxysql = ["/usr/bin/proxysql", "--idle-threads", "-c", "/etc/proxysql.cnf"]
124 | # subprocess.run(proxysql, check=True)
125 |
126 | return True
127 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # MySQL-HA-Cloud - A Highly-Available Self-Hosted MySQL Cloud Container Orchestrator
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 | This project provides a container image for a highly-available MySQL installation that can be deployed to Kubernetes or Docker Swarm environments.
14 |
15 |
16 |
17 |
18 | **Project state:** Beta version available
19 |
20 | ## Architecture
21 |
22 |
23 | The `mysql-ha-cloud` container image contains a [MySQL 8.0 Server](https://dev.mysql.com/doc/relnotes/mysql/8.0/en/), [Consul](https://www.hashicorp.com/products/consul) for the service discovery, health checks of the nodes, and the MySQL replication leader election. [ProxySQL](https://proxysql.com/) provides the entry point for the client; the software forwards the connections of the client to the MySQL nodes. Write requests are send to the replication leader, and read requests are sent to the replication follower. In addition, [MinIO](https://min.io/) is used as backup storage and to bootstrap the replication follower. Backups are created by using [XtraBackup](https://www.percona.com/software/mysql-database/percona-xtrabackup) without creating table locks.
24 |
25 | Container Orchestrators like [Kubernetes](https://kubernetes.io/) or [Docker Swarm](https://docs.docker.com/get-started/swarm-deploy/) can be used to deploy the provided [container image](https://hub.docker.com/repository/docker/jnidzwetzki/mysql-ha-cloud).
26 |
27 | The complete architecture is highly-available; failing and newly started containers are handled automatically. A new replication leader is automatically elected if the current leader fails. ProxySQL redirects database connections transparently to the nodes; the complete distribution and fail-over logic are hidden from the client applications. The solution is also horizontal scalable, new MySQL replication follower nodes can be added, and the query load is automatically distributed to these nodes.
28 |
29 | ## Features
30 |
31 | * ✅ Automatic (non locking) backups on S3 Buckets using Xtrabackup and MinIO
32 | * ✅ Automatic MySQL replication leader election
33 | * ✅ Automatic MySQL replication leader configuration and fail-over
34 | * ✅ Automatic MySQL replication follower configuration
35 | * ✅ Automatic MySQL provisioning
36 | * ✅ Transparent connection routing for read-/write-splits using ProxySQL
37 | * ✅ Horizontal scalable
38 | * ✅ Compatible with Kubernetes and Docker Swarm
39 |
40 | ## What is The Main Focus of This Project?
41 |
42 | This project provides a robust, tested, and easy to deploy container image for self-hosted MySQL cloud installations. The goal is that everybody can deploy highly-available and scalable MySQL installations and eliminate the DBMS as a single point of failure in his architecture.
43 |
44 | ## Why Do I Need MySQL-HA-Cloud?
45 |
46 | In today's software development, robust applications are often developed as stateless cloud-native containers. Such containers can be easily moved between hosts, automatically restarted on failures, and replicated to handle increasing workloads. On the other hand, data are stored in relational database systems (RDBMS), which are often running on bare-metal hardware. Relational databases are stateful applications that are hard to scale, and they are often a single point of failure; high availability (HA) is rarely implemented.
47 |
48 | ## Are NoSQL Databases a Solution?
49 |
50 | NoSQL databases are mostly cloud-native applications; however, they leak of the support of a full flagged relational database. Features such as transactions, complex data models, or consistency are omitted to make these systems horizontal scalable and fault-tolerant. However, simple tasks that can easily be implemented by using a relational database (e.g., an increasing counter, secondary indexes, isolation of uncommitted data, or joins) can be hard to implement. Therefore, relational databases are still used by moderns applications.
51 |
52 | ## Deployment and Usage Examples
53 | * Deploymnet using [Docker Swarm](docs/deployment-docker-swarm.md)
54 | * Deploymnet using [Kubernetes](docs/deployment-kubernetes.md)
55 |
56 | ## Are There Other Solutions?
57 |
58 | Of course, there are other projects that also focus on highly available MySQL systems. For instance:
59 |
60 | * [MySQL replication](https://dev.mysql.com/doc/refman/8.0/en/replication.html)
61 | * [Galera cluster for MySQL](https://galeracluster.com/products/)
62 | * [MySQL InnoDB Cluster](https://dev.mysql.com/doc/refman/8.0/en/admin-api-userguide.html)
63 | * [Signal 18 replication manager](https://signal18.io/products/srm)
64 | * [Autopilot pattern for MySQL](https://github.com/autopilotpattern/mysql)
65 | * [Percona Kubernetes Operator for Percona XtraDB Cluster](https://www.percona.com/doc/kubernetes-operator-for-pxc/index.html)
66 |
67 | ## What's next?
68 | * If you like the project, please give it a star on GitHub!
69 | * For more information see [https://github.com/jnidzwetzki](https://github.com/jnidzwetzki).
70 |
--------------------------------------------------------------------------------
/deployment/mysql-kubernetes-iscsi.yml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: Secret
3 | metadata:
4 | name: chap-secret
5 | type: "kubernetes.io/iscsi-chap"
6 | data:
7 | node.session.auth.username_in: bXl1c2VyLW91dGdvaW5n
8 | node.session.auth.password_in: bXlwYXNzMg==
9 | node.session.auth.username: bXl1c2VyLWluY29taW5n
10 | node.session.auth.password: bXlwYXNzMQ==
11 |
12 | ---
13 |
14 | apiVersion: apps/v1
15 | kind: Deployment
16 | metadata:
17 | name: minio
18 | spec:
19 | selector:
20 | matchLabels:
21 | app: minio # has to match .spec.template.metadata.labels
22 | strategy:
23 | type: Recreate
24 | template:
25 | metadata:
26 | labels:
27 | app: minio #This label is used as a selector in Service definition
28 | spec:
29 | containers:
30 | - name: minio
31 | image: minio/minio:RELEASE.2020-10-18T21-54-12Z
32 | args:
33 | - server
34 | - /data
35 | env:
36 | - name: MINIO_ACCESS_KEY
37 | value: "minio"
38 | - name: MINIO_SECRET_KEY
39 | value: "minio123"
40 | ports:
41 | - containerPort: 9000
42 | volumeMounts:
43 | - mountPath: "/data"
44 | name: iscsivol
45 | tolerations:
46 | - key: "node.kubernetes.io/unreachable"
47 | operator: "Exists"
48 | effect: "NoExecute"
49 | tolerationSeconds: 30
50 | - key: "node.kubernetes.io/not-ready"
51 | operator: "Exists"
52 | effect: "NoExecute"
53 | tolerationSeconds: 30
54 |
55 | volumes:
56 | - name: iscsivol
57 | iscsi:
58 | targetPortal: 192.168.178.199
59 | iqn: iqn.2020-12.block-storage:lun1
60 | lun: 1
61 | fsType: ext4
62 | readOnly: false
63 | chapAuthDiscovery: false
64 | chapAuthSession: true
65 | secretRef:
66 | name: chap-secret
67 | ---
68 |
69 | apiVersion: v1
70 | kind: Service
71 | metadata:
72 | name: minio
73 | spec:
74 | type: NodePort
75 | ports:
76 | - port: 9000
77 | targetPort: 9000
78 | nodePort: 30013
79 | protocol: TCP
80 | selector:
81 | # Looks for labels `app:minio` in the namespace and applies the spec
82 | app: minio
83 |
84 | ---
85 |
86 | apiVersion: v1
87 | kind: Service
88 | metadata:
89 | name: consul
90 | labels:
91 | app: consul
92 | spec:
93 | type: NodePort
94 | ports:
95 | - port: 8500
96 | targetPort: 8500
97 | nodePort: 30014
98 | protocol: TCP
99 | selector:
100 | # Looks for labels `app:consul` in the namespace and applies the spec
101 | app: consul
102 |
103 | ---
104 |
105 | apiVersion: apps/v1
106 | kind: StatefulSet
107 | metadata:
108 | name: consul
109 | spec:
110 | selector:
111 | matchLabels:
112 | app: consul # has to match .spec.template.metadata.labels
113 | serviceName: "consul"
114 | replicas: 3
115 | template:
116 | metadata:
117 | labels:
118 | app: consul # has to match .spec.selector.matchLabels
119 | spec:
120 | terminationGracePeriodSeconds: 10
121 | containers:
122 | - name: consul
123 | image: consul:1.9
124 | args:
125 | - agent
126 | - -ui
127 | - -data-dir
128 | - /consul/data
129 | - -server
130 | - -client
131 | - 0.0.0.0
132 | - -retry-join
133 | - consul-0.consul
134 | - -retry-join
135 | - consul-1.consul
136 | - -retry-join
137 | - consul-2.consul
138 | - -bootstrap-expect=3
139 | ports:
140 | - containerPort: 8500
141 | name: web
142 | ---
143 |
144 | apiVersion: apps/v1
145 | kind: StatefulSet
146 | metadata:
147 | name: mysql
148 | spec:
149 | selector:
150 | matchLabels:
151 | app: mysql # has to match .spec.template.metadata.labels
152 | serviceName: "mysql"
153 | replicas: 3
154 | template:
155 | metadata:
156 | labels:
157 | app: mysql # has to match .spec.selector.matchLabels
158 | spec:
159 | terminationGracePeriodSeconds: 10
160 | containers:
161 | - name: mysql
162 | image: jnidzwetzki/mysql-ha-cloud:latest
163 | ports:
164 | - containerPort: 3306
165 | name: mysql
166 | - containerPort: 6032
167 | name: sqlproxy
168 | env:
169 | - name: CONSUL_BIND_INTERFACE
170 | value: eth0
171 | - name: CONSUL_BOOTSTRAP_SERVER
172 | value: "consul-0.consul"
173 | - name: MINIO_ACCESS_KEY
174 | value: minio
175 | - name: MINIO_SECRET_KEY
176 | value: minio123
177 | - name: MINIO_URL
178 | value: http://minio:9000
179 | - name: MCM_BIND_INTERFACE
180 | value: eth0
181 | - name: MYSQL_ROOT_PASSWORD
182 | value: verysecret123
183 | - name: MYSQL_BACKUP_USER
184 | value: backup_user
185 | - name: MYSQL_BACKUP_PASSWORD
186 | value: backup_secret
187 | - name: MYSQL_REPLICATION_USER
188 | value: replication_user
189 | - name: MYSQL_REPLICATION_PASSWORD
190 | value: replication_secret
191 | - name: MYSQL_APPLICATION_USER
192 | value: mysql_user
193 | - name: MYSQL_APPLICATION_PASSWORD
194 | value: mysql_secret
195 |
196 | ---
197 |
198 | apiVersion: v1
199 | kind: Service
200 | metadata:
201 | name: mysql
202 | labels:
203 | app: mysql
204 | spec:
205 | type: NodePort
206 | ports:
207 | - port: 3306
208 | targetPort: 3306
209 | nodePort: 30015
210 | protocol: TCP
211 | name: mysql
212 | - port: 6032
213 | targetPort: 6032
214 | nodePort: 30016
215 | protocol: TCP
216 | name: mysqlproxy
217 | selector:
218 | # Looks for labels `app:mysql` in the namespace and applies the spec
219 | app: mysql
220 |
--------------------------------------------------------------------------------
/mysql_cluster_manager/src/mcm/actions.py:
--------------------------------------------------------------------------------
1 | """This file contains the actions of the cluster manager"""
2 |
3 | import sys
4 | import time
5 | import logging
6 |
7 | from datetime import timedelta, datetime
8 |
9 | from mcm.consul import Consul
10 | from mcm.minio import Minio
11 | from mcm.mysql import Mysql
12 | from mcm.proxysql import Proxysql
13 | from mcm.utils import Utils
14 |
15 | class Actions:
16 | """The actions of the application"""
17 |
18 | @staticmethod
19 | def join_or_bootstrap():
20 | """
21 | Join the existing cluster or bootstrap a new cluster
22 | """
23 |
24 | # Start the local consul agent
25 | consul_process = Consul.agent_start()
26 |
27 | # Check if we have an existing backup to restore
28 | # Use this backup if exists, or init a new MySQL database
29 | Minio.setup_connection()
30 | backup_exists = Minio.does_backup_exists()
31 |
32 | # Test for unstable environment (other nodes are present and no leader is present)
33 | # We don't want to become the new leader on the restored backup directly
34 | #
35 | # Needs be be checked before Consul.get_instance().register_node() is called
36 | #
37 | while Consul.get_instance().get_replication_leader_ip() is None:
38 | nodes = Consul.get_instance().get_all_registered_nodes()
39 | if len(nodes) == 0:
40 | break
41 |
42 | logging.warning("Other nodes (%s) detected but no leader, waiting", nodes)
43 | time.sleep(5)
44 |
45 | # Try to become session leader (needed to decide if we can create a database)
46 | replication_leader = Consul.get_instance().try_to_become_replication_leader()
47 |
48 | # Keep session alive until we start the main loop
49 | Consul.get_instance().start_session_auto_refresh_thread()
50 |
51 | logging.info("Init local node (leader=%s, backup=%s)",
52 | replication_leader, backup_exists)
53 |
54 | if replication_leader and not backup_exists:
55 | Mysql.init_database_if_needed()
56 | elif replication_leader and backup_exists:
57 | Mysql.restore_backup_or_exit()
58 | elif not replication_leader and backup_exists:
59 | Mysql.restore_backup_or_exit()
60 | elif not replication_leader and not backup_exists:
61 | logging.info("We are not the replication leader, waiting for backups")
62 | backup_exists = Utils.wait_for_backup_exists(Consul.get_instance())
63 |
64 | if not backup_exists:
65 | logging.error("No backups to restore available, please check master logs, exiting")
66 | sys.exit(1)
67 |
68 | Mysql.restore_backup_or_exit()
69 |
70 | else:
71 | logging.error("This case should not happen (leader=%s, backup=%s)",
72 | replication_leader, backup_exists)
73 | sys.exit(1)
74 |
75 | # Start ProxySQL
76 | Proxysql.start_proxysql()
77 |
78 | # Start MySQL
79 | mysql_process = Mysql.server_start()
80 |
81 | # Configure ProxySQL
82 | Proxysql.inital_setup()
83 |
84 | # Get data from MySQL
85 | mysql_version = Mysql.execute_query_as_root("SELECT version()")[0]['version()']
86 | server_id = Mysql.execute_query_as_root("SELECT @@GLOBAL.server_id")[0]['@@GLOBAL.server_id']
87 |
88 | Consul.get_instance().register_node(mysql_version=mysql_version,
89 | server_id=server_id)
90 |
91 | # Remove the old replication configuration (e.g., from backup)
92 | Mysql.delete_replication_config()
93 |
94 | # Register service as leader or follower
95 | Consul.get_instance().register_service(replication_leader)
96 |
97 | # Session keep alive will be handled by the main event loop
98 | Consul.get_instance().stop_session_auto_refresh_thread()
99 |
100 | # Run the main event loop
101 | Actions.join_main_event_loop(consul_process, mysql_process)
102 |
103 | @staticmethod
104 | def join_main_event_loop(consul_process, mysql_process):
105 | """
106 | The main event loop for the join_or_bootstrap action
107 | """
108 |
109 | last_backup_check = None
110 | last_session_refresh = None
111 | last_replication_leader_check = None
112 | able_to_become_leader = False
113 |
114 | proxysql = Proxysql()
115 |
116 | # Main Loop, heavy operations needs to be dispatched
117 | # to an extra thread. The loop needs to refresh the
118 | # Consul sessions every few seconds.
119 | while True:
120 | consul_process.poll()
121 | mysql_process.poll()
122 |
123 | # Try to replace a failed replication leader
124 | if Utils.is_refresh_needed(last_replication_leader_check, timedelta(seconds=5)):
125 | last_replication_leader_check = datetime.now()
126 |
127 | # Update ProxySQL nodes
128 | mysql_nodes = Consul.get_instance().get_all_registered_nodes()
129 | proxysql.update_mysql_server_if_needed(mysql_nodes)
130 |
131 | # Are the replication data completely processed
132 | # (i.e., the data from the leader is stored locally and we
133 | # can become the new leader?)
134 | if not able_to_become_leader:
135 | if Mysql.is_repliation_data_processed():
136 | logging.info("All replication data are read, node can become replication leader")
137 | able_to_become_leader = True
138 |
139 | replication_leader = Consul.get_instance().is_replication_leader()
140 |
141 | # Try to become new leader
142 | if not replication_leader and able_to_become_leader:
143 | promotion = Consul.get_instance().try_to_become_replication_leader()
144 |
145 | # Are we the new leader?
146 | if promotion:
147 | Mysql.delete_replication_config()
148 | Consul.get_instance().register_service(True)
149 | replication_leader = True
150 |
151 | # Check for correct replication leader
152 | if not replication_leader:
153 | real_leader = Consul.get_instance().get_replication_leader_ip()
154 | configured_leader = Mysql.get_replication_leader_ip()
155 |
156 | if real_leader != configured_leader:
157 | logging.info("Replication leader change (old=%s, new=%s)", configured_leader, real_leader)
158 | Mysql.change_to_replication_client(real_leader)
159 |
160 | # Keep Consul sessions alive
161 | if Utils.is_refresh_needed(last_session_refresh, timedelta(seconds=5)):
162 | Consul.get_instance().refresh_sessions()
163 | last_session_refresh = datetime.now()
164 |
165 | # Create MySQL Backups (using extra thread for backup)
166 | if Utils.is_refresh_needed(last_backup_check, timedelta(minutes=5)):
167 | Consul.get_instance().start_session_auto_refresh_thread()
168 | Mysql.create_backup_if_needed()
169 | last_backup_check = datetime.now()
170 | Consul.get_instance().stop_session_auto_refresh_thread()
171 |
172 | time.sleep(1)
173 |
--------------------------------------------------------------------------------
/docs/deployment-kubernetes.md:
--------------------------------------------------------------------------------
1 | # Example - Using Kubernetes
2 |
3 | To reproduce this example, you need a Kubernetes cluster with at least three worker nodes. The following services are deployed to the cluster:
4 |
5 | * Three Consul instances, they are used for the election of the primary MySQL server, for service discovery, and for providing additional information about the state of the cluster.
6 | * One of the MinIO object storage to store MySQL backups. These backups are used to bootstrap new MySQL replicas automatically. MinIO needs at least to provide four nodes / volumes to provide highly available. Therefore, a persistent iSCSI volume is used in this example. On this volume, you can also store a MySQL backup that is used to bootstrap the cluster. However, the persistent volume is not necessary. The solution also works without this volume. If the MinIO pod is started on another node, a new backup is created and uploaded automatically.
7 | * One primary MySQL server (read/write) and two read-only MySQL replicas.
8 | * An instance of [ProxySQL](https://github.com/sysown/proxysql) is available on every MySQL-Server. ProxySQL is used to access the MySQL installations. Write requests (e.g., `INSERT` or `UPDATE`) are automatically send to the replication leader, and read requests (e.g., `SELECT`) are sent to the replication follower.
9 |
10 | __Note:__ If you don't have a local Kubernetes installation, you can use [kubeadm](https://kubernetes.io/docs/setup/production-environment/tools/kubeadm/create-cluster-kubeadm/) to setup such a cluster locally. In addition, you find a proper Ansible Playbook [here](https://github.com/jnidzwetzki/ansible-playbooks/tree/main/playbooks) to create such a cluster with `Docker` or `Container.io` as runtime.
11 |
12 | ### Step 1 - Check your Kubernetes installation
13 |
14 | Execute the command `kubectl get nodes` to check the state of your Kubernetes cluster.
15 |
16 | ```bash
17 | $ kubectl get nodes
18 | NAME STATUS ROLES AGE VERSION
19 | debian10-k8s-vm1 Ready master 3d3h v1.19.4
20 | debian10-k8s-vm2 Ready 3d3h v1.19.4
21 | debian10-k8s-vm3 Ready 3d2h v1.19.4
22 | debian10-k8s-vm4 Ready 24h v1.19.4
23 | ```
24 |
25 | In this example, the node `debian10-k8s-vm1` is the contol node for the cluster. The nodes `debian10-k8s-vm2`, `debian10-k8s-vm3`, `debian10-k8s-vm4` are the worker nodes of the cluster.
26 |
27 | ### Step 2 - Deploy the Services
28 |
29 | Please download the [configuration](https://raw.githubusercontent.com/jnidzwetzki/mysql-ha-cloud/main/deployment/mysql-kubernetes-iscsi.yml) for Kubernetes and adjust the configuration according to your local settings. For example, when you use the persistent iSCSI volume, the iSCSI target settings need to be adjusted.
30 |
31 | ```bash
32 | $ curl https://raw.githubusercontent.com/jnidzwetzki/mysql-ha-cloud/main/deployment/mysql-kubernetes-iscsi.yml --output mysql-kubernetes-iscsi.yml
33 | $ kubectl create -f mysql-kubernetes-iscsi.yml
34 | secret/chap-secret created
35 | deployment.apps/minio created
36 | service/minio created
37 | service/consul created
38 | statefulset.apps/consul created
39 | statefulset.apps/mysql created
40 | service/mysql created
41 | ```
42 |
43 | After the deployment is done, the available pods should look as follows:
44 |
45 | ```bash
46 | $ kubectl get pods
47 | NAME READY STATUS RESTARTS AGE
48 | consul-0 1/1 Running 0 3h49m
49 | consul-1 1/1 Running 0 2m43s
50 | consul-2 1/1 Running 0 2m41s
51 | minio-567b86887c-wlpdn 1/1 Running 0 3h49m
52 | mysql-0 1/1 Running 0 3h49m
53 | mysql-1 1/1 Running 0 88s
54 | mysql-2 1/1 Running 0 13s
55 | ```
56 |
57 | In addition, the following services should be available:
58 |
59 | ```bash
60 | $ kubectl get services
61 | NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE
62 | consul NodePort 10.108.236.59 8500:30014/TCP 3h50m
63 | minio NodePort 10.100.165.38 9000:30013/TCP 3h50m
64 | mysql NodePort 10.103.124.5 3306:30015/TCP,6032:30016/TCP 3h50m
65 | ```
66 |
67 | Consul tries to bootstrap a new cluster in the background and the Consul agents on the MySQL pods also try to join this cluster. The status of the consul cluster could be checked with the following command:
68 |
69 | ```bash
70 | $ kubectl exec consul-0 -- consul members
71 | Node Address Status Type Build Protocol DC Segment
72 | consul-0 10.244.3.22:8301 alive server 1.9.0 2 dc1
73 | consul-1 10.244.1.28:8301 alive server 1.9.0 2 dc1
74 | consul-2 10.244.2.27:8301 alive server 1.9.0 2 dc1
75 | mysql-0 10.244.3.21:8301 alive client 1.8.4 2 dc1
76 | mysql-1 10.244.1.29:8301 alive client 1.8.4 2 dc1
77 | mysql-2 10.244.2.28:8301 alive client 1.8.4 2 dc1
78 | ```
79 |
80 | The output shows that the deployment of the three Consul servers was successful. Three Consul servers are deployed, and from the MySQL installations, three agents joined the cluster.
81 |
82 | ### Step 3 - Check Deployment
83 |
84 | After the deployment is done, you can check which MySQL nodes are avaialable and which node is the replication leader:
85 |
86 | ```bash
87 | $ kubectl exec consul-0 -- consul kv get -recurse mcm/instances
88 | mcm/instances/10.244.1.29:{"ip_address": "10.244.1.29", "server_id": 2, "mysql_version": "8.0.21"}
89 | mcm/instances/10.244.2.28:{"ip_address": "10.244.2.28", "server_id": 3, "mysql_version": "8.0.21"}
90 | mcm/instances/10.244.3.21:{"ip_address": "10.244.3.21", "server_id": 1, "mysql_version": "8.0.21"}
91 |
92 | $ kubectl exec consul-0 -- consul kv get mcm/replication_leader
93 | {"ip_address": "10.244.3.21"}
94 | ```
95 |
96 | In the logfiles of the pod, you can see which pod is the MySQL replication leader and which pods are the replication follower. Besides, it can be seen which backend MySQL server are added to ProxySQL:
97 |
98 | ```bash
99 | $ kubectl logs mysql-0
100 | [...]
101 | 2020-12-07 19:01:27,482 INFO root Setting up replication (leader=10.244.3.21)
102 | [...]
103 | 2020-12-07 19:02:47,501 INFO root MySQL backend has changed (old=['10.244.1.29', '10.244.3.21'], new=['10.244.1.29', '10.244.2.28', '10.244.3.21']), reconfiguring
104 | 2020-12-07 19:02:47,501 INFO root Removing all old backend MySQL Server
105 | 2020-12-07 19:02:47,503 INFO root Adding 10.244.1.29 as backend MySQL Server
106 | 2020-12-07 19:02:47,505 INFO root Adding 10.244.2.28 as backend MySQL Server
107 | 2020-12-07 19:02:47,506 INFO root Adding 10.244.3.21 as backend MySQL Server
108 | ```
109 |
110 | In addition, you can list the available backups of the database:
111 |
112 | ```bash
113 | $ kubectl exec mysql-0 -- mc ls backup/mysqlbackup
114 | [2020-12-06 21:23:55 UTC] 1.6MiB mysql_backup_1607289823.6914027.tgz
115 | [2020-12-07 19:00:21 UTC] 1.6MiB mysql_backup_1607367611.8148804.tgz
116 | ```
117 |
118 | You can use also your browser to check the Consul installation and the MinIO setup:
119 |
120 | * At the URL [http://Kubernetes-Node:30013](http://Kubernetes-Node:30013) is the MinIO webinterface available. Please use the value of the variables `MINIO_ACCESS_KEY` and `MINIO_SECRET_KEY` from the deployment description for the login.
121 | * At the URL [http://Kubernetes-Node:30014](http://Kubernetes-Node:30014) is the Consul webinterface available.
122 |
123 | ### Step 4 - Use the highly-available MySQL-Server
124 |
125 | On port `30015/tcp` on all Kubernetes nodes, you can now reach the highly-available MySQL-Server. As user use `MYSQL_APPLICATION_USER` and the `MYSQL_APPLICATION_PASSWORD` from the docker-swarm file.
126 |
127 | For example:
128 |
129 | ```bash
130 | mysql -u mysql_user -pmysql_secret -h -P30015
131 | ```
132 |
133 | While you work on the MySQL-Shell you can restart the Kubernetes worker nodes. Kubernetes will restart the missing pods on other nodes and the MySQL orchestrator will reconfigure the replication setup in MySQL. The MySQL-Shell is usable all the time for read- and write-requests.
134 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright [2020] [Jan Nidzwetzki]
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
203 |
--------------------------------------------------------------------------------
/mysql_cluster_manager/src/mcm/consul.py:
--------------------------------------------------------------------------------
1 | """This file is part of the MySQL cluster manager"""
2 |
3 | import os
4 | import time
5 | import json
6 | import logging
7 | import threading
8 | import subprocess
9 |
10 | import consul as pyconsul
11 |
12 | from mcm.utils import Utils
13 |
14 | class Consul:
15 |
16 | """
17 | This class encapsulates all Consul related things
18 | """
19 |
20 | # The signeton instance
21 | __instance = None
22 |
23 | # Retry counter for operations
24 | retry_counter = 100
25 |
26 | # KV prefix
27 | kv_prefix = "mcm/"
28 |
29 | # Server ID key
30 | kv_server_id = kv_prefix + "server_id"
31 |
32 | # Instances ID key
33 | instances_path = kv_prefix + "instances/"
34 |
35 | # Instances session key
36 | instances_session_key = kv_prefix + "instances"
37 |
38 | # Replication leader path
39 | replication_leader_path = kv_prefix + "replication_leader"
40 |
41 | def __init__(self):
42 | """
43 | Init the Consul client
44 | """
45 | if Consul.__instance is not None:
46 | raise Exception("This class is a singleton!")
47 |
48 | Consul.__instance = self
49 | logging.info("Register Consul connection")
50 | self.client = pyconsul.Consul(host="localhost")
51 | self.active_sessions = []
52 | self.node_health_session = self.create_node_health_session()
53 |
54 | # The session auto refresh thread
55 | self.auto_refresh_thread = None
56 | self.run_auto_refresh_thread = False
57 |
58 | @staticmethod
59 | def get_instance():
60 | """ Static access method. """
61 | if Consul.__instance is None:
62 | Consul()
63 | return Consul.__instance
64 |
65 | def start_session_auto_refresh_thread(self):
66 | """
67 | Start the session auto refresh thread
68 | """
69 | logging.info("Starting the Consul session auto refresh thread")
70 | self.run_auto_refresh_thread = True
71 | self.auto_refresh_thread = threading.Thread(target=self.auto_refresh_sessions, args=())
72 | self.auto_refresh_thread.start()
73 |
74 | def auto_refresh_sessions(self):
75 | """
76 | Auto refresh the active sessions
77 | """
78 | while self.run_auto_refresh_thread:
79 | logging.debug("Refreshing active consul sessions from auto refresh thread")
80 | self.refresh_sessions()
81 | time.sleep(2)
82 |
83 | def stop_session_auto_refresh_thread(self):
84 | """
85 | Stop the session auto refresh thread
86 | """
87 | logging.info("Stopping the Consul session auto refresh thread")
88 | self.run_auto_refresh_thread = False
89 | if self.auto_refresh_thread is not None:
90 | self.auto_refresh_thread.join()
91 | self.auto_refresh_thread = None
92 | logging.info("Consul session auto refresh thread is stopped")
93 |
94 | def create_node_health_session(self):
95 | """
96 | Create the node health session
97 | all created KV entries automatically removed
98 | on session destory.
99 | """
100 |
101 | return self.create_session(
102 | name=Consul.instances_session_key,
103 | behavior='delete', ttl=15, lock_delay=0)
104 |
105 | def get_all_registered_nodes(self):
106 | """
107 | Get all registered MySQL nodes
108 | """
109 | mysql_nodes = []
110 | result = self.client.kv.get(Consul.instances_path, recurse=True)
111 |
112 | if result[1] is not None:
113 | for node in result[1]:
114 | node_value = node['Value']
115 | node_data = json.loads(node_value)
116 |
117 | if not "ip_address" in node_data:
118 | logging.error("ip_address missing in %s", node)
119 | continue
120 |
121 | ip_address = node_data["ip_address"]
122 | mysql_nodes.append(ip_address)
123 |
124 | return mysql_nodes
125 |
126 | def get_mysql_server_id(self):
127 | """
128 | Get the MySQL server id from consul
129 |
130 | Try to get existing value and update to +1
131 | * If Update fails, retry
132 | * If Key not exists, try to create
133 | """
134 | for _ in range(Consul.retry_counter):
135 | result = self.client.kv.get(Consul.kv_server_id)
136 |
137 | # Create new key
138 | if result[1] is None:
139 | logging.debug("Old serverkey %s not found, preparing new one",
140 | Consul.kv_server_id)
141 |
142 | json_string = json.dumps({'last_used_id': 1})
143 |
144 | # Try to create
145 | put_result = self.client.kv.put(Consul.kv_server_id, json_string, cas=0)
146 | if put_result is True:
147 | logging.debug("Created new key, started new server counter")
148 | return 1
149 |
150 | logging.debug("New key could not be created, retrying")
151 | continue
152 |
153 | # Updating existing key
154 | logging.debug("Updating existing key %s", result)
155 | json_string = result[1]['Value']
156 | version = result[1]['ModifyIndex']
157 | server_data = json.loads(json_string)
158 |
159 | if not "last_used_id" in server_data:
160 | logging.error("Invalid JSON returned (missing last_used_id) %s",
161 | json_string)
162 |
163 | server_data['last_used_id'] = server_data['last_used_id'] + 1
164 | json_string = json.dumps(server_data)
165 | put_result = self.client.kv.put(Consul.kv_server_id, json_string, cas=version)
166 |
167 | if put_result is True:
168 | logging.debug("Successfully updated consul value %s, new server_id is %i",
169 | put_result, server_data['last_used_id'])
170 | return server_data['last_used_id']
171 |
172 | logging.debug("Unable to update consul value, retrying %s", put_result)
173 | time.sleep(10)
174 |
175 | raise Exception("Unable to determine server id")
176 |
177 | def is_replication_leader(self):
178 | """
179 | Test if this is the MySQL replication leader or not
180 | """
181 |
182 | result = self.client.kv.get(Consul.replication_leader_path)
183 |
184 | if result[1] is None:
185 | logging.debug("No replication leader node available")
186 | return False
187 |
188 | leader_session = result[1]['Session']
189 |
190 | logging.debug("Replication leader is %s, we are %s",
191 | leader_session, self.node_health_session)
192 |
193 | return leader_session == self.node_health_session
194 |
195 | def get_replication_leader_ip(self):
196 | """
197 | Get the IP of the current replication ledear
198 | """
199 | result = self.client.kv.get(Consul.replication_leader_path)
200 |
201 | if result[1] is None:
202 | return None
203 |
204 | json_string = result[1]['Value']
205 | server_data = json.loads(json_string)
206 |
207 | if not "ip_address" in server_data:
208 | logging.error("Invalid JSON returned from replication ledader (missing server_id) %s",
209 | json_string)
210 |
211 | return server_data['ip_address']
212 |
213 | def try_to_become_replication_leader(self):
214 | """
215 | Try to get the new replication leader
216 | """
217 |
218 | result = self.client.kv.get(Consul.replication_leader_path)
219 |
220 | if result[1] is None:
221 | logging.debug("Register MySQL instance in Consul")
222 | ip_address = Utils.get_local_ip_address()
223 |
224 | json_string = json.dumps({
225 | 'ip_address': ip_address
226 | })
227 |
228 | put_result = self.client.kv.put(Consul.replication_leader_path,
229 | json_string,
230 | acquire=self.node_health_session)
231 |
232 | if put_result:
233 | logging.info("We are the new replication leader")
234 | else:
235 | logging.debug("Unable to become replication leader, retry")
236 |
237 | return put_result
238 |
239 | return False
240 |
241 |
242 | def register_service(self, leader=False, port=3306):
243 | """
244 | Register the MySQL primary service
245 | """
246 | ip_address = Utils.get_local_ip_address()
247 |
248 | tags = []
249 | service_id = f"mysql_{ip_address}"
250 |
251 | if leader:
252 | tags.append("leader")
253 | else:
254 | tags.append("follower")
255 |
256 | # Unrregister old service
257 | all_services = self.client.agent.services()
258 |
259 | if service_id in all_services:
260 | logging.debug("Unregister old service %s (%s)", service_id, all_services)
261 | self.client.agent.service.deregister(service_id)
262 |
263 | # Register new service
264 | logging.info("Register new service_id=%s, tags=%s", service_id, tags)
265 | self.client.agent.service.register("mysql", service_id=service_id, port=port, tags=tags)
266 |
267 | def register_node(self, mysql_version=None, server_id=None):
268 | """
269 | Register the node in Consul
270 | """
271 | logging.debug("Register MySQL instance in Consul")
272 | ip_address = Utils.get_local_ip_address()
273 |
274 | json_string = json.dumps({
275 | 'ip_address': ip_address,
276 | 'server_id': server_id,
277 | 'mysql_version': mysql_version
278 | })
279 |
280 | path = f"{Consul.instances_path}{ip_address}"
281 | logging.debug("Consul: Path %s, value %s (session %s)",
282 | path, json_string, self.node_health_session)
283 |
284 | put_result = self.client.kv.put(path, json_string, acquire=self.node_health_session)
285 |
286 | if not put_result:
287 | logging.error("Unable to create %s", path)
288 | return False
289 |
290 | return True
291 |
292 | def refresh_sessions(self):
293 | """
294 | Refresh the active sessions
295 | """
296 | logging.debug("Keeping Consul sessions alive")
297 |
298 | for session in self.active_sessions:
299 | logging.debug("Refreshing session %s", session)
300 | self.client.session.renew(session)
301 |
302 | def create_session(self, name, behavior='release', ttl=None, lock_delay=15):
303 | """
304 | Create a new session.
305 |
306 | Keep in mind that the real invalidation is around 2*ttl
307 | see https://github.com/hashicorp/consul/issues/1172
308 | """
309 |
310 | session_id = self.client.session.create(name=name,
311 | behavior=behavior,
312 | ttl=ttl,
313 | lock_delay=lock_delay)
314 |
315 | # Keep session for auto refresh
316 | self.active_sessions.append(session_id)
317 |
318 | logging.debug("Created new session on node %s named %s", name, session_id)
319 |
320 | return session_id
321 |
322 |
323 | def destroy_session(self, session_id):
324 | """
325 | Destory a previosly registered session
326 | """
327 |
328 | if not session_id in self.active_sessions:
329 | return False
330 |
331 | self.active_sessions.remove(session_id)
332 | self.client.session.destroy(session_id)
333 |
334 | return True
335 |
336 | @staticmethod
337 | def agent_start():
338 | """
339 | Start the local Consul agent.
340 | """
341 |
342 | logging.info("Starting Consul Agent")
343 | consul_args = ["consul"]
344 | consul_args.append("agent")
345 | consul_args.append("--data-dir")
346 | consul_args.append("/tmp/consul")
347 |
348 | consul_interface = os.environ.get("CONSUL_BIND_INTERFACE")
349 |
350 | if consul_interface is not None:
351 | consul_args.append("--bind")
352 | consul_args.append(f'{{{{ GetInterfaceIP "{consul_interface}" }}}}')
353 |
354 | consul_seed = os.environ.get("CONSUL_BOOTSTRAP_SERVER")
355 |
356 | if consul_seed is not None:
357 | consul_args.append("--join")
358 | consul_args.append(consul_seed)
359 |
360 | # Run process in background
361 | consul_process = subprocess.Popen(consul_args)
362 |
363 | return consul_process
364 |
--------------------------------------------------------------------------------
/mysql_cluster_manager/pylintrc:
--------------------------------------------------------------------------------
1 | [MASTER]
2 |
3 | # A comma-separated list of package or module names from where C extensions may
4 | # be loaded. Extensions are loading into the active Python interpreter and may
5 | # run arbitrary code.
6 | extension-pkg-whitelist=netifaces
7 |
8 | # Specify a score threshold to be exceeded before program exits with error.
9 | fail-under=10
10 |
11 | # Add files or directories to the blacklist. They should be base names, not
12 | # paths.
13 | ignore=CVS, compound
14 |
15 | # Add files or directories matching the regex patterns to the blacklist. The
16 | # regex matches against base names, not paths.
17 | ignore-patterns=
18 |
19 | # Python code to execute, usually for sys.path manipulation such as
20 | # pygtk.require().
21 | #init-hook=
22 |
23 | # Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
24 | # number of processors available to use.
25 | jobs=0
26 |
27 | # Control the amount of potential inferred values when inferring a single
28 | # object. This can help the performance when dealing with large functions or
29 | # complex, nested conditions.
30 | limit-inference-results=100
31 |
32 | # List of plugins (as comma separated values of python module names) to load,
33 | # usually to register additional checkers.
34 | load-plugins=
35 |
36 | # Pickle collected data for later comparisons.
37 | persistent=yes
38 |
39 | # When enabled, pylint would attempt to guess common misconfiguration and emit
40 | # user-friendly hints instead of false-positive error messages.
41 | suggestion-mode=yes
42 |
43 | # Allow loading of arbitrary C extensions. Extensions are imported into the
44 | # active Python interpreter and may run arbitrary code.
45 | unsafe-load-any-extension=no
46 |
47 |
48 | [MESSAGES CONTROL]
49 |
50 | # Only show warnings with the listed confidence levels. Leave empty to show
51 | # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED.
52 | confidence=
53 |
54 | # Disable the message, report, category or checker with the given id(s). You
55 | # can either give multiple identifiers separated by comma (,) or put this
56 | # option multiple times (only on the command line, not in the configuration
57 | # file where it should appear only once). You can also use "--disable=all" to
58 | # disable everything first and then reenable specific checks. For example, if
59 | # you want to run only the similarities checker, you can use "--disable=all
60 | # --enable=similarities". If you want to run only the classes checker, but have
61 | # no Warning level messages displayed, use "--disable=all --enable=classes
62 | # --disable=W".
63 | disable=
64 | fixme,
65 | too-many-return-statements,
66 | too-many-instance-attributes,
67 | too-many-locals,
68 | too-many-arguments,
69 | too-many-public-methods,
70 | too-few-public-methods
71 |
72 | # Enable the message, report, category or checker with the given id(s). You can
73 | # either give multiple identifier separated by comma (,) or put this option
74 | # multiple time (only on the command line, not in the configuration file where
75 | # it should appear only once). See also the "--disable" option for examples.
76 | enable=
77 |
78 |
79 | [REPORTS]
80 |
81 | # Python expression which should return a score less than or equal to 10. You
82 | # have access to the variables 'error', 'warning', 'refactor', and 'convention'
83 | # which contain the number of messages in each category, as well as 'statement'
84 | # which is the total number of statements analyzed. This score is used by the
85 | # global evaluation report (RP0004).
86 | evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
87 |
88 | # Template used to display messages. This is a python new-style format string
89 | # used to format the message information. See doc for all details.
90 | #msg-template=
91 |
92 | # Set the output format. Available formats are text, parseable, colorized, json
93 | # and msvs (visual studio). You can also give a reporter class, e.g.
94 | # mypackage.mymodule.MyReporterClass.
95 | output-format=text
96 |
97 | # Tells whether to display a full report or only the messages.
98 | reports=no
99 |
100 | # Activate the evaluation score.
101 | score=yes
102 |
103 |
104 | [REFACTORING]
105 |
106 | # Maximum number of nested blocks for function / method body
107 | max-nested-blocks=5
108 |
109 | # Complete name of functions that never returns. When checking for
110 | # inconsistent-return-statements if a never returning function is called then
111 | # it will be considered as an explicit return statement and no message will be
112 | # printed.
113 | never-returning-functions=sys.exit
114 |
115 |
116 | [MISCELLANEOUS]
117 |
118 | # List of note tags to take in consideration, separated by a comma.
119 | notes=FIXME,
120 | XXX,
121 | TODO
122 |
123 | # Regular expression of note tags to take in consideration.
124 | #notes-rgx=
125 |
126 |
127 | [TYPECHECK]
128 |
129 | # List of decorators that produce context managers, such as
130 | # contextlib.contextmanager. Add to this list to register other decorators that
131 | # produce valid context managers.
132 | contextmanager-decorators=contextlib.contextmanager
133 |
134 | # List of members which are set dynamically and missed by pylint inference
135 | # system, and so shouldn't trigger E1101 when accessed. Python regular
136 | # expressions are accepted.
137 | generated-members=
138 |
139 | # Tells whether missing members accessed in mixin class should be ignored. A
140 | # mixin class is detected if its name ends with "mixin" (case insensitive).
141 | ignore-mixin-members=yes
142 |
143 | # Tells whether to warn about missing members when the owner of the attribute
144 | # is inferred to be None.
145 | ignore-none=yes
146 |
147 | # This flag controls whether pylint should warn about no-member and similar
148 | # checks whenever an opaque object is returned when inferring. The inference
149 | # can return multiple potential results while evaluating a Python object, but
150 | # some branches might not be evaluated, which results in partial inference. In
151 | # that case, it might be useful to still emit no-member and other checks for
152 | # the rest of the inferred objects.
153 | ignore-on-opaque-inference=yes
154 |
155 | # List of class names for which member attributes should not be checked (useful
156 | # for classes with dynamically set attributes). This supports the use of
157 | # qualified names.
158 | ignored-classes=optparse.Values,thread._local,_thread._local
159 |
160 | # List of module names for which member attributes should not be checked
161 | # (useful for modules/projects where namespaces are manipulated during runtime
162 | # and thus existing member attributes cannot be deduced by static analysis). It
163 | # supports qualified module names, as well as Unix pattern matching.
164 | ignored-modules=
165 |
166 | # Show a hint with possible names when a member name was not found. The aspect
167 | # of finding the hint is based on edit distance.
168 | missing-member-hint=yes
169 |
170 | # The minimum edit distance a name should have in order to be considered a
171 | # similar match for a missing member name.
172 | missing-member-hint-distance=1
173 |
174 | # The total number of similar names that should be taken in consideration when
175 | # showing a hint for a missing member.
176 | missing-member-max-choices=1
177 |
178 | # List of decorators that change the signature of a decorated function.
179 | signature-mutators=
180 |
181 |
182 | [FORMAT]
183 |
184 | # Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
185 | expected-line-ending-format=
186 |
187 | # Regexp for a line that is allowed to be longer than the limit.
188 | ignore-long-lines=^\s*(# )??$
189 |
190 | # Number of spaces of indent required inside a hanging or continued line.
191 | indent-after-paren=4
192 |
193 | # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
194 | # tab).
195 | indent-string=' '
196 |
197 | # Maximum number of characters on a single line.
198 | max-line-length=250
199 |
200 | # Maximum number of lines in a module.
201 | max-module-lines=1000
202 |
203 | # List of optional constructs for which whitespace checking is disabled. `dict-
204 | # separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}.
205 | # `trailing-comma` allows a space between comma and closing bracket: (a, ).
206 | # `empty-line` allows space-only lines.
207 | no-space-check=trailing-comma,
208 | dict-separator
209 |
210 | # Allow the body of a class to be on the same line as the declaration if body
211 | # contains single statement.
212 | single-line-class-stmt=no
213 |
214 | # Allow the body of an if to be on the same line as the test if there is no
215 | # else.
216 | single-line-if-stmt=no
217 |
218 |
219 | [BASIC]
220 |
221 | # Naming style matching correct argument names.
222 | argument-naming-style=snake_case
223 |
224 | # Regular expression matching correct argument names. Overrides argument-
225 | # naming-style.
226 | #argument-rgx=
227 |
228 | # Naming style matching correct attribute names.
229 | attr-naming-style=snake_case
230 |
231 | # Regular expression matching correct attribute names. Overrides attr-naming-
232 | # style.
233 | #attr-rgx=
234 |
235 | # Bad variable names which should always be refused, separated by a comma.
236 | bad-names=foo,
237 | bar,
238 | baz,
239 | toto,
240 | tutu,
241 | tata
242 |
243 | # Bad variable names regexes, separated by a comma. If names match any regex,
244 | # they will always be refused
245 | bad-names-rgxs=
246 |
247 | # Naming style matching correct class attribute names.
248 | class-attribute-naming-style=any
249 |
250 | # Regular expression matching correct class attribute names. Overrides class-
251 | # attribute-naming-style.
252 | #class-attribute-rgx=
253 |
254 | # Naming style matching correct class names.
255 | class-naming-style=PascalCase
256 |
257 | # Regular expression matching correct class names. Overrides class-naming-
258 | # style.
259 | #class-rgx=
260 |
261 | # Naming style matching correct constant names.
262 | const-naming-style=UPPER_CASE
263 |
264 | # Regular expression matching correct constant names. Overrides const-naming-
265 | # style.
266 | #const-rgx=
267 |
268 | # Minimum line length for functions/classes that require docstrings, shorter
269 | # ones are exempt.
270 | docstring-min-length=-1
271 |
272 | # Naming style matching correct function names.
273 | function-naming-style=snake_case
274 |
275 | # Regular expression matching correct function names. Overrides function-
276 | # naming-style.
277 | #function-rgx=
278 |
279 | # Good variable names which should always be accepted, separated by a comma.
280 | good-names=i,
281 | j,
282 | k,
283 | ex,
284 | Run,
285 | _
286 |
287 | # Good variable names regexes, separated by a comma. If names match any regex,
288 | # they will always be accepted
289 | good-names-rgxs=
290 |
291 | # Include a hint for the correct naming format with invalid-name.
292 | include-naming-hint=no
293 |
294 | # Naming style matching correct inline iteration names.
295 | inlinevar-naming-style=any
296 |
297 | # Regular expression matching correct inline iteration names. Overrides
298 | # inlinevar-naming-style.
299 | #inlinevar-rgx=
300 |
301 | # Naming style matching correct method names.
302 | method-naming-style=snake_case
303 |
304 | # Regular expression matching correct method names. Overrides method-naming-
305 | # style.
306 | #method-rgx=
307 |
308 | # Naming style matching correct module names.
309 | module-naming-style=snake_case
310 |
311 | # Regular expression matching correct module names. Overrides module-naming-
312 | # style.
313 | #module-rgx=
314 |
315 | # Colon-delimited sets of names that determine each other's naming style when
316 | # the name regexes allow several styles.
317 | name-group=
318 |
319 | # Regular expression which should only match function or class names that do
320 | # not require a docstring.
321 | no-docstring-rgx=^_
322 |
323 | # List of decorators that produce properties, such as abc.abstractproperty. Add
324 | # to this list to register other decorators that produce valid properties.
325 | # These decorators are taken in consideration only for invalid-name.
326 | property-classes=abc.abstractproperty
327 |
328 | # Naming style matching correct variable names.
329 | variable-naming-style=snake_case
330 |
331 | # Regular expression matching correct variable names. Overrides variable-
332 | # naming-style.
333 | #variable-rgx=
334 |
335 |
336 | [SIMILARITIES]
337 |
338 | # Ignore comments when computing similarities.
339 | ignore-comments=yes
340 |
341 | # Ignore docstrings when computing similarities.
342 | ignore-docstrings=yes
343 |
344 | # Ignore imports when computing similarities.
345 | ignore-imports=no
346 |
347 | # Minimum lines number of a similarity.
348 | min-similarity-lines=4
349 |
350 |
351 | [STRING]
352 |
353 | # This flag controls whether inconsistent-quotes generates a warning when the
354 | # character used as a quote delimiter is used inconsistently within a module.
355 | check-quote-consistency=no
356 |
357 | # This flag controls whether the implicit-str-concat should generate a warning
358 | # on implicit string concatenation in sequences defined over several lines.
359 | check-str-concat-over-line-jumps=no
360 |
361 |
362 | [VARIABLES]
363 |
364 | # List of additional names supposed to be defined in builtins. Remember that
365 | # you should avoid defining new builtins when possible.
366 | additional-builtins=
367 |
368 | # Tells whether unused global variables should be treated as a violation.
369 | allow-global-unused-variables=yes
370 |
371 | # List of strings which can identify a callback function by name. A callback
372 | # name must start or end with one of those strings.
373 | callbacks=cb_,
374 | _cb
375 |
376 | # A regular expression matching the name of dummy variables (i.e. expected to
377 | # not be used).
378 | dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
379 |
380 | # Argument names that match this expression will be ignored. Default to name
381 | # with leading underscore.
382 | ignored-argument-names=_.*|^ignored_|^unused_
383 |
384 | # Tells whether we should check for unused import in __init__ files.
385 | init-import=no
386 |
387 | # List of qualified module names which can have objects that can redefine
388 | # builtins.
389 | redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
390 |
391 |
392 | [LOGGING]
393 |
394 | # The type of string formatting that logging methods do. `old` means using %
395 | # formatting, `new` is for `{}` formatting.
396 | logging-format-style=old
397 |
398 | # Logging modules to check that the string format arguments are in logging
399 | # function parameter format.
400 | logging-modules=logging
401 |
402 |
403 | [SPELLING]
404 |
405 | # Limits count of emitted suggestions for spelling mistakes.
406 | max-spelling-suggestions=4
407 |
408 | # Spelling dictionary name. Available dictionaries: none. To make it work,
409 | # install the python-enchant package.
410 | spelling-dict=
411 |
412 | # List of comma separated words that should not be checked.
413 | spelling-ignore-words=
414 |
415 | # A path to a file that contains the private dictionary; one word per line.
416 | spelling-private-dict-file=
417 |
418 | # Tells whether to store unknown words to the private dictionary (see the
419 | # --spelling-private-dict-file option) instead of raising a message.
420 | spelling-store-unknown-words=no
421 |
422 |
423 | [CLASSES]
424 |
425 | # List of method names used to declare (i.e. assign) instance attributes.
426 | defining-attr-methods=__init__,
427 | __new__,
428 | setUp,
429 | __post_init__
430 |
431 | # List of member names, which should be excluded from the protected access
432 | # warning.
433 | exclude-protected=_asdict,
434 | _fields,
435 | _replace,
436 | _source,
437 | _make
438 |
439 | # List of valid names for the first argument in a class method.
440 | valid-classmethod-first-arg=cls
441 |
442 | # List of valid names for the first argument in a metaclass class method.
443 | valid-metaclass-classmethod-first-arg=cls
444 |
445 |
446 | [IMPORTS]
447 |
448 | # List of modules that can be imported at any level, not just the top level
449 | # one.
450 | allow-any-import-level=
451 |
452 | # Allow wildcard imports from modules that define __all__.
453 | allow-wildcard-with-all=no
454 |
455 | # Analyse import fallback blocks. This can be used to support both Python 2 and
456 | # 3 compatible code, which means that the block might have code that exists
457 | # only in one or another interpreter, leading to false positives when analysed.
458 | analyse-fallback-blocks=no
459 |
460 | # Deprecated modules which should not be used, separated by a comma.
461 | deprecated-modules=optparse,tkinter.tix
462 |
463 | # Create a graph of external dependencies in the given file (report RP0402 must
464 | # not be disabled).
465 | ext-import-graph=
466 |
467 | # Create a graph of every (i.e. internal and external) dependencies in the
468 | # given file (report RP0402 must not be disabled).
469 | import-graph=
470 |
471 | # Create a graph of internal dependencies in the given file (report RP0402 must
472 | # not be disabled).
473 | int-import-graph=
474 |
475 | # Force import order to recognize a module as part of the standard
476 | # compatibility libraries.
477 | known-standard-library=
478 |
479 | # Force import order to recognize a module as part of a third party library.
480 | known-third-party=enchant
481 |
482 | # Couples of modules and preferred modules, separated by a comma.
483 | preferred-modules=
484 |
485 |
486 | [DESIGN]
487 |
488 | # Maximum number of arguments for function / method.
489 | max-args=5
490 |
491 | # Maximum number of attributes for a class (see R0902).
492 | max-attributes=7
493 |
494 | # Maximum number of boolean expressions in an if statement (see R0916).
495 | max-bool-expr=5
496 |
497 | # Maximum number of branch for function / method body.
498 | max-branches=20
499 |
500 | # Maximum number of locals for function / method body.
501 | max-locals=15
502 |
503 | # Maximum number of parents for a class (see R0901).
504 | max-parents=7
505 |
506 | # Maximum number of public methods for a class (see R0904).
507 | max-public-methods=20
508 |
509 | # Maximum number of return / yield for function / method body.
510 | max-returns=6
511 |
512 | # Maximum number of statements in function / method body.
513 | max-statements=60
514 |
515 | # Minimum number of public methods for a class (see R0903).
516 | min-public-methods=2
517 |
518 |
519 | [EXCEPTIONS]
520 |
521 | # Exceptions that will emit a warning when being caught. Defaults to
522 | # "BaseException, Exception".
523 | overgeneral-exceptions=BaseException,
524 | Exception
525 |
--------------------------------------------------------------------------------
/mysql_cluster_manager/src/mcm/mysql.py:
--------------------------------------------------------------------------------
1 | """This file is part of the MySQL cluster manager"""
2 |
3 | import os
4 | import sys
5 | import time
6 | import shutil
7 | import logging
8 | import threading
9 | import subprocess
10 |
11 | from shutil import rmtree
12 | from datetime import timedelta
13 |
14 | import mysql.connector
15 |
16 | from mcm.consul import Consul
17 | from mcm.minio import Minio
18 | from mcm.utils import Utils
19 |
20 | class Mysql:
21 |
22 | """
23 | This class encapsulates all MySQL related things
24 | """
25 |
26 | xtrabackup_binary = "/usr/bin/xtrabackup"
27 | mysql_server_binary = "/usr/bin/mysqld_safe"
28 | mysqld_binary = "/usr/sbin/mysqld"
29 | mysql_datadir = "/var/lib/mysql"
30 |
31 | @staticmethod
32 | def init_database_if_needed():
33 | """
34 | Init a MySQL and configure permissions.
35 | """
36 |
37 | logging.info("Init MySQL database directory")
38 |
39 | if os.path.isfile(f"{Mysql.mysql_datadir}/ib_logfile0"):
40 | logging.info("MySQL is already initialized, skipping")
41 | return False
42 |
43 | mysql_init = [Mysql.mysqld_binary, "--initialize-insecure", "--user=mysql"]
44 |
45 | subprocess.run(mysql_init, check=True)
46 |
47 | # Start server the first time
48 | mysql_process = Mysql.server_start(use_root_password=False)
49 |
50 | # Create application user
51 | logging.debug("Creating MySQL user for the application")
52 | application_user = os.environ.get("MYSQL_APPLICATION_USER")
53 | appication_password = os.environ.get("MYSQL_APPLICATION_PASSWORD")
54 |
55 | # Password needs to be mysql_native_password for ProxySQL
56 | # See https://github.com/sysown/proxysql/issues/2580
57 | Mysql.execute_statement_or_exit(f"CREATE USER '{application_user}'@'localhost' "
58 | f"IDENTIFIED WITH mysql_native_password BY '{appication_password}'")
59 | Mysql.execute_statement_or_exit(f"GRANT ALL PRIVILEGES ON *.* TO '{application_user}'@'localhost'")
60 | Mysql.execute_statement_or_exit(f"CREATE USER '{application_user}'@'%' "
61 | f"IDENTIFIED WITH mysql_native_password BY '{appication_password}'")
62 | Mysql.execute_statement_or_exit(f"GRANT ALL PRIVILEGES ON *.* TO '{application_user}'@'%'")
63 |
64 | # Create backup user
65 | logging.debug("Creating MySQL user for backups")
66 | backup_user = os.environ.get("MYSQL_BACKUP_USER")
67 | backup_password = os.environ.get("MYSQL_BACKUP_PASSWORD")
68 | Mysql.execute_statement_or_exit(f"CREATE USER '{backup_user}'@'localhost' "
69 | f"IDENTIFIED BY '{backup_password}'")
70 | Mysql.execute_statement_or_exit("GRANT BACKUP_ADMIN, PROCESS, RELOAD, LOCK TABLES, "
71 | f"REPLICATION CLIENT ON *.* TO '{backup_user}'@'localhost'")
72 | Mysql.execute_statement_or_exit("GRANT SELECT ON performance_schema.log_status TO "
73 | f"'{backup_user}'@'localhost'")
74 |
75 | # Create replication user
76 | logging.debug("Creating replication user")
77 | replication_user = os.environ.get("MYSQL_REPLICATION_USER")
78 | replication_password = os.environ.get("MYSQL_REPLICATION_PASSWORD")
79 | Mysql.execute_statement_or_exit(f"CREATE USER '{replication_user}'@'%' "
80 | f"IDENTIFIED BY '{replication_password}'")
81 | Mysql.execute_statement_or_exit("GRANT REPLICATION SLAVE ON *.* TO "
82 | f"'{replication_user}'@'%'")
83 |
84 | # Change permissions for the root user
85 | logging.debug("Set permissions for the root user")
86 | root_password = os.environ.get("MYSQL_ROOT_PASSWORD")
87 | Mysql.execute_statement_or_exit(f"CREATE USER 'root'@'%' IDENTIFIED BY '{root_password}'")
88 | Mysql.execute_statement_or_exit("GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' "
89 | "WITH GRANT OPTION")
90 | Mysql.execute_statement_or_exit("ALTER USER 'root'@'localhost' "
91 | f"IDENTIFIED BY '{root_password}'")
92 |
93 | # Shutdown MySQL server
94 | logging.debug("Inital MySQL setup done, shutdown server..")
95 | Mysql.execute_statement_or_exit(sql="SHUTDOWN", username="root", password=root_password)
96 | mysql_process.wait()
97 |
98 | return True
99 |
100 | @staticmethod
101 | def build_configuration():
102 | """
103 | Build the MySQL server configuratuion.
104 | """
105 | consul = Consul.get_instance()
106 | server_id = consul.get_mysql_server_id()
107 |
108 | outfile = open("/etc/mysql/conf.d/zz_cluster.cnf", 'w')
109 | outfile.write("# DO NOT EDIT - This file was generated automatically\n")
110 | outfile.write("[mysqld]\n")
111 | outfile.write(f"server_id={server_id}\n")
112 | outfile.write("gtid_mode=ON\n")
113 | outfile.write("enforce-gtid-consistency=ON\n")
114 | outfile.close()
115 |
116 | @staticmethod
117 | def change_to_replication_client(leader_ip):
118 | """
119 | Make the local MySQL installation to a replication follower
120 | """
121 |
122 | logging.info("Setting up replication (leader=%s)", leader_ip)
123 |
124 | replication_user = os.environ.get("MYSQL_REPLICATION_USER")
125 | replication_password = os.environ.get("MYSQL_REPLICATION_PASSWORD")
126 |
127 | Mysql.execute_query_as_root("STOP SLAVE", discard_result=True)
128 |
129 | Mysql.execute_query_as_root(f"CHANGE MASTER TO MASTER_HOST = '{leader_ip}', "
130 | f"MASTER_PORT = 3306, MASTER_USER = '{replication_user}', "
131 | f"MASTER_PASSWORD = '{replication_password}', "
132 | "MASTER_AUTO_POSITION = 1, GET_MASTER_PUBLIC_KEY = 1"
133 | , discard_result=True)
134 |
135 | Mysql.execute_query_as_root("START SLAVE", discard_result=True)
136 |
137 | # Set replicia to read only
138 | logging.info("Set MySQL-Server mode to read-only")
139 | Mysql.execute_query_as_root("SET GLOBAL read_only = 1", discard_result=True)
140 | Mysql.execute_query_as_root("SET GLOBAL super_read_only = 1", discard_result=True)
141 |
142 | @staticmethod
143 | def delete_replication_config():
144 | """
145 | Stop the replication
146 | """
147 | logging.debug("Removing old replication configuraion")
148 | Mysql.execute_query_as_root("STOP SLAVE", discard_result=True)
149 | Mysql.execute_query_as_root("RESET SLAVE ALL", discard_result=True)
150 |
151 | # Accept writes
152 | logging.info("Set MySQL-Server mode to read-write")
153 | Mysql.execute_query_as_root("SET GLOBAL super_read_only = 0", discard_result=True)
154 | Mysql.execute_query_as_root("SET GLOBAL read_only = 0", discard_result=True)
155 |
156 | @staticmethod
157 | def get_replication_leader_ip():
158 | """
159 | Get the current replication leader ip
160 | """
161 | slave_status = Mysql.execute_query_as_root("SHOW SLAVE STATUS")
162 |
163 | if len(slave_status) != 1:
164 | return None
165 |
166 | if not 'Master_Host' in slave_status[0]:
167 | logging.error("Invalid output, master_host not found %s", slave_status)
168 | return None
169 |
170 | return slave_status[0]['Master_Host']
171 |
172 | @staticmethod
173 | def is_repliation_data_processed():
174 | """
175 | Is the repliation log from the master completely processed
176 | """
177 |
178 | slave_status = Mysql.execute_query_as_root("SHOW SLAVE STATUS")
179 |
180 | if len(slave_status) != 1:
181 | return False
182 |
183 | if not 'Slave_IO_State' in slave_status[0]:
184 | logging.error("Invalid output, Slave_IO_State not found %s", slave_status)
185 | return False
186 |
187 | # Leader is sending data
188 | io_state = slave_status[0]['Slave_IO_State']
189 | logging.debug("Follower IO state is '%s'", io_state)
190 | if io_state != "Waiting for master to send event":
191 | return False
192 |
193 | if not 'Slave_SQL_Running_State' in slave_status[0]:
194 | logging.error("Invalid output, Slave_SQL_Running_State not found %s", slave_status)
195 | return False
196 |
197 | # Data is not completely proessed
198 | sql_state = slave_status[0]['Slave_SQL_Running_State']
199 | logging.debug("Follower SQL state is '%s'", sql_state)
200 | if sql_state != "Slave has read all relay log; waiting for more updates":
201 | return False
202 |
203 | return True
204 |
205 | @staticmethod
206 | def server_start(use_root_password=True):
207 | """
208 | Start the MySQL server and wait for ready to serve connections.
209 | """
210 |
211 | logging.info("Starting MySQL")
212 |
213 | Mysql.build_configuration()
214 |
215 | mysql_server = [Mysql.mysql_server_binary, "--user=mysql"]
216 | mysql_process = subprocess.Popen(mysql_server)
217 |
218 | # Use root password for the connection or not
219 | root_password = None
220 | if use_root_password:
221 | root_password = os.environ.get("MYSQL_ROOT_PASSWORD")
222 |
223 | Mysql.wait_for_connection(password=root_password)
224 |
225 | return mysql_process
226 |
227 | @staticmethod
228 | def server_stop():
229 | """
230 | Stop the MySQL server.
231 | """
232 | logging.info("Stopping MySQL Server")
233 |
234 | # Try to shutdown the server without a password
235 | result = Mysql.execute_statement(sql="SHUTDOWN", log_error=False)
236 |
237 | # Try to shutdown the server using the root password
238 | if not result:
239 | root_password = os.environ.get("MYSQL_ROOT_PASSWORD")
240 | Mysql.execute_statement(sql="SHUTDOWN", password=root_password)
241 |
242 | @staticmethod
243 | def execute_query_as_root(sql, database='mysql', discard_result=False):
244 | """
245 | Execute the SQL query and return result.
246 | """
247 |
248 | root_password = os.environ.get("MYSQL_ROOT_PASSWORD")
249 |
250 | cnx = None
251 |
252 | try:
253 | cnx = mysql.connector.connect(user='root', password=root_password,
254 | database=database,
255 | unix_socket='/var/run/mysqld/mysqld.sock')
256 |
257 |
258 | cur = cnx.cursor(dictionary=True, buffered=True)
259 | cur.execute(sql)
260 |
261 | if discard_result:
262 | return None
263 |
264 | return cur.fetchall()
265 | finally:
266 | if cnx:
267 | cnx.close()
268 |
269 | @staticmethod
270 | def wait_for_connection(timeout=120, username='root',
271 | password=None, database='mysql'):
272 |
273 | """
274 | Test connection via unix-socket. During first init
275 | MySQL start without network access.
276 | """
277 | elapsed_time = 0
278 | last_error = None
279 |
280 | while elapsed_time < timeout:
281 | try:
282 | cnx = mysql.connector.connect(user=username, password=password,
283 | database=database,
284 | unix_socket='/var/run/mysqld/mysqld.sock')
285 | cnx.close()
286 | logging.debug("MySQL connection successfully")
287 | return True
288 | except mysql.connector.Error as err:
289 | time.sleep(1)
290 | elapsed_time = elapsed_time + 1
291 | last_error = err
292 |
293 | logging.error("Unable to connect to MySQL (timeout=%i). %s",
294 | elapsed_time, last_error)
295 | sys.exit(1)
296 |
297 | return False
298 |
299 | @staticmethod
300 | def execute_statement_or_exit(sql=None, username='root',
301 | password=None, database='mysql',
302 | port=None):
303 |
304 | """
305 | Execute the given SQL statement.
306 | """
307 | result = Mysql.execute_statement(sql=sql, username=username, port=port,
308 | password=password, database=database)
309 | if not result:
310 | sys.exit(1)
311 |
312 | @staticmethod
313 | def execute_statement(sql=None, username='root',
314 | password=None, database='mysql',
315 | port=None, log_error=True):
316 | """
317 | Execute the given SQL statement.
318 | """
319 | try:
320 | if port is None:
321 | cnx = mysql.connector.connect(user=username, password=password,
322 | database=database,
323 | unix_socket='/var/run/mysqld/mysqld.sock')
324 |
325 | else:
326 | cnx = mysql.connector.connect(user=username, password=password,
327 | database=database, port=port)
328 |
329 | cursor = cnx.cursor()
330 |
331 | cursor.execute(sql)
332 |
333 | cnx.close()
334 | return True
335 | except mysql.connector.Error as err:
336 | if log_error:
337 | logging.error("Failed to execute SQL: %s", err)
338 | return False
339 |
340 | @staticmethod
341 | def backup_data():
342 | """
343 | Backup the local MySQL Server and upload
344 | the backup into a S3 bucket.
345 | """
346 |
347 | # Call Setup to ensure bucket and policies do exist
348 | Minio.setup_connection()
349 |
350 | # Backup directory
351 | current_time = time.time()
352 | backup_dir = f"/tmp/mysql_backup_{current_time}"
353 | backup_folder_name = "mysql"
354 | backup_dest = f"{backup_dir}/{backup_folder_name}"
355 |
356 | logging.info("Backing up MySQL into dir %s", backup_dest)
357 | if os.path.exists(backup_dir):
358 | logging.error("Backup path %s already exists, skipping backup run", backup_dest)
359 |
360 | # Crate backup dir
361 | os.makedirs(backup_dir)
362 |
363 | # Create mysql backup
364 | backup_user = os.environ.get("MYSQL_BACKUP_USER")
365 | backup_password = os.environ.get("MYSQL_BACKUP_PASSWORD")
366 | xtrabackup = [Mysql.xtrabackup_binary, f"--user={backup_user}",
367 | f"--password={backup_password}", "--backup",
368 | f"--target-dir={backup_dest}"]
369 |
370 | subprocess.run(xtrabackup, check=True)
371 |
372 | # Prepare backup
373 | xtrabackup_prepare = [Mysql.xtrabackup_binary, "--prepare",
374 | f"--target-dir={backup_dest}"]
375 |
376 | subprocess.run(xtrabackup_prepare, check=True)
377 |
378 | # Compress backup (structure in tar mysql/*)
379 | backup_file = f"/tmp/mysql_backup_{current_time}.tgz"
380 | tar = ["/bin/tar", "zcf", backup_file, "-C", backup_dir, backup_folder_name]
381 | subprocess.run(tar, check=True)
382 |
383 | # Upload Backup to S3 Bucket
384 | mc_args = [Minio.minio_binary, "cp", backup_file, "backup/mysqlbackup/"]
385 | subprocess.run(mc_args, check=True)
386 |
387 | # Remove old backup data
388 | rmtree(backup_dir)
389 | os.remove(backup_file)
390 |
391 | logging.info("Backup was successfully created")
392 |
393 | @staticmethod
394 | def create_backup_if_needed(maxage_seconds=60*60*6):
395 | """
396 | Create a new backup if needed. Default age is 6h
397 | """
398 | logging.debug("Checking for backups")
399 |
400 | consul_client = Consul.get_instance()
401 | if not consul_client.is_replication_leader():
402 | logging.debug("We are not the replication master, skipping backup check")
403 | return False
404 |
405 | backup_name, backup_date = Minio.get_latest_backup()
406 |
407 | if Utils.is_refresh_needed(backup_date, timedelta(seconds=maxage_seconds)):
408 | logging.info("Old backup is outdated (%s, %s), creating new one",
409 | backup_name, backup_date)
410 |
411 | # Perform backup in extra thread to prevent Consul loop interruption
412 | backup_thread = threading.Thread(target=Mysql.backup_data)
413 | backup_thread.start()
414 |
415 | return True
416 |
417 | return False
418 |
419 | @staticmethod
420 | def restore_backup():
421 | """
422 | Restore the latest MySQL dump from the S3 Bucket
423 | """
424 | logging.info("Restore MySQL Backup")
425 | current_time = time.time()
426 |
427 | if os.path.isfile(f"{Mysql.mysql_datadir}/ib_logfile0"):
428 | logging.info("MySQL is already initialized, cleaning up first")
429 | old_mysql_dir = f"{Mysql.mysql_datadir}_old_{current_time}"
430 |
431 | os.mkdir(old_mysql_dir, 0o700)
432 |
433 | # Renaming file per file, on some docker images
434 | # the complete directory can not be moved
435 | for entry in os.listdir(Mysql.mysql_datadir):
436 | source_name = f"{Mysql.mysql_datadir}/{entry}"
437 | dest_name = f"{old_mysql_dir}/{entry}"
438 | logging.debug("Moving %s to %s", source_name, dest_name)
439 | shutil.move(source_name, dest_name)
440 |
441 | logging.info("Old MySQL data moved to: %s", old_mysql_dir)
442 |
443 |
444 | backup_file, _ = Minio.get_latest_backup()
445 |
446 | if backup_file is None:
447 | logging.error("Unable to restore backup, no backup found in bucket")
448 | return False
449 |
450 | # Restore directory
451 | restore_dir = f"/tmp/mysql_restore_{current_time}"
452 |
453 | # Crate restore dir
454 | os.makedirs(restore_dir)
455 |
456 | # Download backup
457 | mc_download = [Minio.minio_binary, "cp", f"backup/mysqlbackup/{backup_file}",
458 | restore_dir]
459 | subprocess.run(mc_download, check=True)
460 |
461 | # Unpack backup
462 | tar = ["/bin/tar", "zxf", f"{restore_dir}/{backup_file}", "-C", restore_dir]
463 | subprocess.run(tar, check=True)
464 |
465 | # Ensure that this is a MySQL Backup
466 | if not os.path.isfile(f"{restore_dir}/mysql/ib_logfile0"):
467 | logging.error("Unpacked backup is not a MySQL backup")
468 | rmtree(restore_dir)
469 | return False
470 |
471 | # Restore backup
472 | xtrabackup = [Mysql.xtrabackup_binary, "--copy-back",
473 | f"--target-dir={restore_dir}/mysql"]
474 | subprocess.run(xtrabackup, check=True)
475 |
476 | # Change permissions of the restored data
477 | chown = ['chown', 'mysql.mysql', '-R', '/var/lib/mysql/']
478 | subprocess.run(chown, check=True)
479 |
480 | # Remove old backup data
481 | rmtree(restore_dir)
482 | return True
483 |
484 |
485 | @staticmethod
486 | def restore_backup_or_exit():
487 | """
488 | Restore a backup or exit
489 | """
490 |
491 | result = Mysql.restore_backup()
492 |
493 | if not result:
494 | logging.error("Unable to restore MySQL backup")
495 | sys.exit(1)
496 |
--------------------------------------------------------------------------------
/docs/deployment-docker-swarm.md:
--------------------------------------------------------------------------------
1 | # Example - Using Docker Swarm
2 |
3 | In this example, a cluster consisting of five nodes running Debian 10 is used. The following services are deployed on the cluster:
4 |
5 | * Five Consul instances, they are used for election of the primary MySQL server, for service discovery, and for providing additional information about the state of the cluster.
6 | * One of the MinIO object storage to store MySQL backups. These backups are used to bootstrap new MySQL replicas automatically. MinIO needs at least to provide four nodes / volumes to provide highly available. In addition, deploying such a setup without labeling the Docker nodes and creating stateful volumes is hard. The data on the S3 Bucket are re-written periodically. Therefore, we don't deploy a highly available and replicated version of MinIO in this example.
7 | * One primary MySQL server (read/write) and two read-only MySQL replicas.
8 | * An instance of [ProxySQL](https://github.com/sysown/proxysql) is available on every MySQL-Server. ProxySQL is used to access the MySQL installations. Write requests (e.g., `INSERT` or `UPDATE`) are automatically send to the replication leader, and read requests (e.g., `SELECT`) are sent to the replication follower.
9 |
10 | The four Docker nodes should be running in different availability zones. Therefore, one Docker node or availability zones can fail, and the MySQL service is still available.
11 |
12 | When one Docker node fails, the aborted Docker containers are re-started on the remaining nodes. If the primary MySQL fails, one of the replicas MySQL servers is promoted to the new primary MySQL server, and a new replica Server is started. If one of the replicas MySQL servers fails, a new replica MySQL server is started, provisioned, and configured.
13 |
14 | ### Step 1 - Setup Docker
15 |
16 | Setup your [Docker Swarm](https://docs.docker.com/engine/swarm/). The following commands have to be executed on all nodes of the cluster. As an alternative, you can use the following [Ansible Playbook](https://github.com/jnidzwetzki/ansible-playbooks/tree/main/docker) to install Docker on the cluster.
17 |
18 | ```bash
19 | apt-get update
20 | apt-get install -y apt-transport-https ca-certificates curl gnupg2 software-properties-common sudo
21 | curl -fsSL https://download.docker.com/linux/debian/gpg | sudo apt-key add -
22 | add-apt-repository "deb [arch=amd64] https://download.docker.com/linux/debian $(lsb_release -cs) stable"
23 | apt-get update
24 | apt-get install -y docker-ce docker-ce-cli containerd.io
25 | ```
26 |
27 | ### Step 2 - Init the Docker Swarm
28 |
29 | On one of the nodes, execute the following commands to bootstrap the Docker Swarm:
30 |
31 | ```bash
32 | docker swarm init --advertise-addr
33 | ```
34 |
35 | The command above will show how you can add further _worker nodes_ to the cluster. Worker nodes only execute docker container and do __not__ be part of the cluster management. The node that has inited the cluster will be the only _manager node_ in the cluster. If this node becomes unavailable, the cluster runs into an unhealthy state. Therefore, you should at least have three _manager nodes_ in your cluster.
36 |
37 | To join a new node as _manager node_, execute the following command on a master node and execute the provided command on the new node:
38 |
39 | ```bash
40 | docker swarm join-token manager
41 | ```
42 | The output of the command above should be executed on the worker nodes to join the cluster as managers.
43 |
44 | ```bash
45 | docker swarm join --token
46 | ```
47 |
48 | After executing these commands, the status of the cluster should look as follows:
49 |
50 | ```bash
51 | $ docker node ls
52 | ID HOSTNAME STATUS AVAILABILITY MANAGER STATUS ENGINE VERSION
53 | cqshak7jcuh97oqtznbcorkjp * debian10-vm1 Ready Active Leader 19.03.13
54 | deihndvm1vwbym9q9x3fyksev debian10-vm2 Ready Active Reachable 19.03.13
55 | 3rqp1te4d66tm56b7a1zzlpr2 debian10-vm3 Ready Active Reachable 19.03.13
56 | 7l21f6mdy0dytmiy4oh70ttjo debian10-vm4 Ready Active Reachable 19.03.13
57 | uttuejl2q48hwizz3bya5engw debian10-vm5 Ready Active Reachable 19.03.13
58 | ```
59 |
60 | __Note__: Per default, manager nodes also execute Docker containers. This can lead to the situation that a manager node becomes unreliable if a heavy workload is processed; the node is detected as dead, and the workload becomes re-scheduled even if all nodes of the cluster are available. To avoid such situations, in a real-world setup, manager nodes should only interact as manager nodes and not execute any workload. This can be done by executing `docker node update --availability drain ` for the manager nodes.
61 |
62 | ### Step 3 - Deploy the Services
63 |
64 | The Deployment of the services to Docker Swarm is done with a [Compose file](https://github.com/jnidzwetzki/mysql-ha-cloud/tree/main/deployment). This file descibes the services of the Docker Swarm cluster. The file can be downloaded and deployed as follows:
65 |
66 | ```bash
67 | wget https://raw.githubusercontent.com/jnidzwetzki/mysql-ha-cloud/main/deployment/mysql-docker-swarm.yml
68 | docker stack deploy --compose-file mysql-docker-swarm.yml mysql
69 | ```
70 |
71 | After the deployment is done, the stack should look as follows:
72 |
73 | ```
74 | $ docker stack ps mysql
75 | ID NAME IMAGE NODE DESIRED STATE CURRENT STATE ERROR PORTS
76 | zywtlmvswfz1 mysql_minio.1 minio/minio:RELEASE.2020-10-18T21-54-12Z debian10-vm4 Running Running 53 seconds ago
77 | v8hks8xa6vub mysql_mysql.1 jnidzwetzki/mysql-ha-cloud:latest debian10-vm2 Running Preparing about a minute ago
78 | bhsvp0muev51 mysql_consul.1 consul:1.8 debian10-vm1 Running Running about a minute ago *:8500->8500/tcp
79 | 4no74auuqpv0 mysql_mysql.2 jnidzwetzki/mysql-ha-cloud:latest debian10-vm3 Running Preparing about a minute ago
80 | t1dan93zja0e mysql_consul.2 consul:1.8 debian10-vm2 Running Running about a minute ago *:8500->8500/tcp
81 | 0b3pyj32v5db mysql_mysql.3 jnidzwetzki/mysql-ha-cloud:latest debian10-vm1 Running Preparing about a minute ago
82 | gptp9fpmkw4r mysql_consul.3 consul:1.8 debian10-vm4 Running Running about a minute ago *:8500->8500/tcp
83 | i2egrq1cbieu mysql_consul.4 consul:1.8 debian10-vm5 Running Running 32 seconds ago *:8500->8500/tcp
84 | vvsf1wwb1zr2 mysql_consul.5 consul:1.8 debian10-vm3 Running Running about a minute ago *:8500->8500/tcp
85 |
86 | $ docker stack services mysql
87 | ID NAME MODE REPLICAS IMAGE PORTS
88 | 0v8qhwaaawx5 mysql_minio replicated 1/1 minio/minio:RELEASE.2020-10-18T21-54-12Z *:9000->9000/tcp
89 | pro64635i2j4 mysql_mysql replicated 3/3 (max 1 per node) jnidzwetzki/mysql-ha-cloud:latest
90 | ya9luugwcri4 mysql_consul replicated 5/5 (max 1 per node) consul:1.8
91 | ```
92 |
93 | After the service is deployed, the state of the docker installation can be checked. On the Docker node, the following command can be excuted in one of the consul containers `a856acfc1635`:
94 |
95 |
96 | ```bash
97 | $ docker exec -t a856acfc1635 consul members
98 | Node Address Status Type Build Protocol DC Segment
99 | 234d94d9063f 10.0.3.3:8301 alive server 1.8.5 2 dc1
100 | 753784b1624a 10.0.3.5:8301 alive server 1.8.5 2 dc1
101 | cba13bbba731 10.0.3.2:8301 alive server 1.8.5 2 dc1
102 | f00780b002e8 10.0.3.6:8301 alive server 1.8.5 2 dc1
103 | f418f8ae1023 10.0.3.4:8301 alive server 1.8.5 2 dc1
104 | 0d744a098502 10.0.3.40:8301 alive client 1.8.4 2 dc1
105 | 72e398e0f1bc 10.0.3.41:8301 alive client 1.8.4 2 dc1
106 | 9e96a9596e76 10.0.3.42:8301 alive client 1.8.4 2 dc1
107 | ```
108 |
109 | In the output above can be seen that the deployment of the Consul servers was successful. Three servers are deployed, and from the MySQL installations, three agents are started.
110 |
111 | ### Step 4 - Check Deployment
112 |
113 | After the deployment is done, you can check which MySQL nodes are avaialable and which node is the replication leader:
114 |
115 | ```bash
116 | $ docker exec -t a856acfc1635 consul kv get -recurse mcm/instances
117 | mcm/instances/10.0.3.40:{"ip_address": "10.0.3.40", "server_id": 44, "mysql_version": "8.0.21"}
118 | mcm/instances/10.0.3.41:{"ip_address": "10.0.3.41", "server_id": 45, "mysql_version": "8.0.21"}
119 | mcm/instances/10.0.3.42:{"ip_address": "10.0.3.42", "server_id": 46, "mysql_version": "8.0.21"}
120 |
121 | $ docker exec -t a856acfc1635 consul kv get mcm/replication_leader
122 | {"ip_address": "10.0.3.41"}
123 | ```
124 |
125 | In addition, you can have a look at the MySQL replication configuration
126 |
127 | ```bash
128 | $ docker exec -t a856acfc1635 /bin/bash -c 'mysql -u root -p`echo $MYSQL_ROOT_PASSWORD` -e "SHOW SLAVE STATUS"'
129 | mysql: [Warning] Using a password on the command line interface can be insecure.
130 | +----------------------------------+-------------+------------------+-------------+---------------+-----------------+---------------------+-------------------------------+---------------+-----------------------+------------------+-------------------+-----------------+---------------------+--------------------+------------------------+-------------------------+-----------------------------+------------+------------+--------------+---------------------+-----------------+-----------------+----------------+---------------+--------------------+--------------------+--------------------+-----------------+-------------------+----------------+-----------------------+-------------------------------+---------------+---------------+----------------+----------------+-----------------------------+------------------+--------------------------------------+-------------------------+-----------+---------------------+--------------------------------------------------------+--------------------+-------------+-------------------------+--------------------------+----------------+--------------------+--------------------+----------------------------------------------------------------------------------+---------------+----------------------+--------------+--------------------+------------------------+-----------------------+-------------------+
131 | | Slave_IO_State | Master_Host | Master_User | Master_Port | Connect_Retry | Master_Log_File | Read_Master_Log_Pos | Relay_Log_File | Relay_Log_Pos | Relay_Master_Log_File | Slave_IO_Running | Slave_SQL_Running | Replicate_Do_DB | Replicate_Ignore_DB | Replicate_Do_Table | Replicate_Ignore_Table | Replicate_Wild_Do_Table | Replicate_Wild_Ignore_Table | Last_Errno | Last_Error | Skip_Counter | Exec_Master_Log_Pos | Relay_Log_Space | Until_Condition | Until_Log_File | Until_Log_Pos | Master_SSL_Allowed | Master_SSL_CA_File | Master_SSL_CA_Path | Master_SSL_Cert | Master_SSL_Cipher | Master_SSL_Key | Seconds_Behind_Master | Master_SSL_Verify_Server_Cert | Last_IO_Errno | Last_IO_Error | Last_SQL_Errno | Last_SQL_Error | Replicate_Ignore_Server_Ids | Master_Server_Id | Master_UUID | Master_Info_File | SQL_Delay | SQL_Remaining_Delay | Slave_SQL_Running_State | Master_Retry_Count | Master_Bind | Last_IO_Error_Timestamp | Last_SQL_Error_Timestamp | Master_SSL_Crl | Master_SSL_Crlpath | Retrieved_Gtid_Set | Executed_Gtid_Set | Auto_Position | Replicate_Rewrite_DB | Channel_Name | Master_TLS_Version | Master_public_key_path | Get_master_public_key | Network_Namespace |
132 | +----------------------------------+-------------+------------------+-------------+---------------+-----------------+---------------------+-------------------------------+---------------+-----------------------+------------------+-------------------+-----------------+---------------------+--------------------+------------------------+-------------------------+-----------------------------+------------+------------+--------------+---------------------+-----------------+-----------------+----------------+---------------+--------------------+--------------------+--------------------+-----------------+-------------------+----------------+-----------------------+-------------------------------+---------------+---------------+----------------+----------------+-----------------------------+------------------+--------------------------------------+-------------------------+-----------+---------------------+--------------------------------------------------------+--------------------+-------------+-------------------------+--------------------------+----------------+--------------------+--------------------+----------------------------------------------------------------------------------+---------------+----------------------+--------------+--------------------+------------------------+-----------------------+-------------------+
133 | | Waiting for master to send event | 10.0.3.41 | replication_user | 3306 | 60 | binlog.000024 | 196 | 82df8cfe97e2-relay-bin.000002 | 365 | binlog.000024 | Yes | Yes | | | | | | | 0 | | 0 | 196 | 581 | None | | 0 | No | | | | | | 0 | No | 0 | | 0 | | | 45 | f2260821-2ced-11eb-89ef-02420a000329 | mysql.slave_master_info | 0 | NULL | Slave has read all relay log; waiting for more updates | 86400 | | | | | | | 1256e020-2cfe-11eb-a273-02420a00032a:1, 4aa0562f-28ac-11eb-93fa-02420a000305:1-8 | 1 | | | | | 1 | |
134 | +----------------------------------+-------------+------------------+-------------+---------------+-----------------+---------------------+-------------------------------+---------------+-----------------------+------------------+-------------------+-----------------+---------------------+--------------------+------------------------+-------------------------+-----------------------------+------------+------------+--------------+---------------------+-----------------+-----------------+----------------+---------------+--------------------+--------------------+--------------------+-----------------+-------------------+----------------+-----------------------+-------------------------------+---------------+---------------+----------------+----------------+-----------------------------+------------------+--------------------------------------+-------------------------+-----------+---------------------+--------------------------------------------------------+--------------------+-------------+-------------------------+--------------------------+----------------+--------------------+--------------------+----------------------------------------------------------------------------------+---------------+----------------------+--------------+--------------------+------------------------+-----------------------+-------------------+
135 | ```
136 |
137 | Or list the available backups of the database:
138 |
139 | ```bash
140 | $ docker exec -t a856acfc1635 mc ls backup/mysqlbackup
141 | [2020-11-20 21:50:24 UTC] 1.6MiB mysql_backup_1605909015.0471048.tgz
142 | [2020-11-20 21:50:34 UTC] 1.6MiB mysql_backup_1605909024.6657646.tgz
143 | [2020-11-21 03:51:21 UTC] 1.6MiB mysql_backup_1605930672.1543853.tgz
144 | [2020-11-21 09:52:18 UTC] 1.6MiB mysql_backup_1605952329.1124055.tgz
145 | [2020-11-22 12:46:39 UTC] 1.6MiB mysql_backup_1606049190.0292351.tgz
146 | [2020-11-22 18:50:19 UTC] 1.6MiB mysql_backup_1606071009.6974795.tgz
147 | ```
148 |
149 | The DNS settings for the service discovery could also be tested:
150 |
151 | ```bash
152 | $ docker exec -t a856acfc1635 dig @127.0.0.1 -p 8600 _mysql._leader.service.consul SRV
153 |
154 | ; <<>> DiG 9.11.5-P4-5.1+deb10u2-Debian <<>> @127.0.0.1 -p 8600 _mysql._leader.service.consul SRV
155 | ; (1 server found)
156 | ;; global options: +cmd
157 | ;; Got answer:
158 | ;; ->>HEADER<<- opcode: QUERY, status: NOERROR, id: 61130
159 | ;; flags: qr aa rd; QUERY: 1, ANSWER: 1, AUTHORITY: 0, ADDITIONAL: 3
160 | ;; WARNING: recursion requested but not available
161 |
162 | ;; OPT PSEUDOSECTION:
163 | ; EDNS: version: 0, flags:; udp: 4096
164 | ;; QUESTION SECTION:
165 | ;_mysql._leader.service.consul. IN SRV
166 |
167 | ;; ANSWER SECTION:
168 | _mysql._leader.service.consul. 0 IN SRV 1 1 3306 cd1e7b5ae9a4.node.dc1.consul.
169 |
170 | ;; ADDITIONAL SECTION:
171 | cd1e7b5ae9a4.node.dc1.consul. 0 IN A 10.0.3.41
172 | cd1e7b5ae9a4.node.dc1.consul. 0 IN TXT "consul-network-segment="
173 |
174 | ;; Query time: 1 msec
175 | ;; SERVER: 127.0.0.1#8600(127.0.0.1)
176 | ;; WHEN: Tue Nov 24 07:06:10 UTC 2020
177 | ;; MSG SIZE rcvd: 158
178 |
179 |
180 |
181 | $ docker exec -t a856acfc1635 dig @127.0.0.1 -p 8600 _mysql._follower.service.consul SRV
182 |
183 | ; <<>> DiG 9.11.5-P4-5.1+deb10u2-Debian <<>> @127.0.0.1 -p 8600 _mysql._follower.service.consul SRV
184 | ; (1 server found)
185 | ;; global options: +cmd
186 | ;; Got answer:
187 | ;; ->>HEADER<<- opcode: QUERY, status: NOERROR, id: 46995
188 | ;; flags: qr aa rd; QUERY: 1, ANSWER: 2, AUTHORITY: 0, ADDITIONAL: 5
189 | ;; WARNING: recursion requested but not available
190 |
191 | ;; OPT PSEUDOSECTION:
192 | ; EDNS: version: 0, flags:; udp: 4096
193 | ;; QUESTION SECTION:
194 | ;_mysql._follower.service.consul. IN SRV
195 |
196 | ;; ANSWER SECTION:
197 | _mysql._follower.service.consul. 0 IN SRV 1 1 3306 f36ddfed8617.node.dc1.consul.
198 | _mysql._follower.service.consul. 0 IN SRV 1 1 3306 ddcadd280a98.node.dc1.consul.
199 |
200 | ;; ADDITIONAL SECTION:
201 | f36ddfed8617.node.dc1.consul. 0 IN A 10.0.3.40
202 | f36ddfed8617.node.dc1.consul. 0 IN TXT "consul-network-segment="
203 | ddcadd280a98.node.dc1.consul. 0 IN A 10.0.3.42
204 | ddcadd280a98.node.dc1.consul. 0 IN TXT "consul-network-segment="
205 |
206 | ;; Query time: 1 msec
207 | ;; SERVER: 127.0.0.1#8600(127.0.0.1)
208 | ;; WHEN: Tue Nov 24 07:06:20 UTC 2020
209 | ;; MSG SIZE rcvd: 260
210 | ```
211 |
212 | ### Step 5 - Use the highly-available MySQL-Server
213 |
214 | On port `3306/tcp` (the default MySQL port) on all Docker nodes, you can now reach the highly-available MySQL-Server. As user use `MYSQL_APPLICATION_USER` and the `MYSQL_APPLICATION_PASSWORD` from the docker-swarm file.
215 |
216 | For example:
217 |
218 | ```bash
219 | mysql -u mysql_user -pmysql_secret -h debian10-vm1
220 | ```
221 |
222 | While you work on the MySQL-Shell you can restart the Docker nodes. Docker Swarm will restart the missing sevices on other nodes and the MySQL orchestrator will reconfigure the replication setup in MySQL. The MySQL-Shell is usable all the time for read- and write requests.
223 |
--------------------------------------------------------------------------------
/docs/images/architecture.svg:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------