├── Gemfile ├── lib ├── ar_mysql_flexmaster.rb └── active_record │ └── connection_adapters │ └── mysql_flexmaster_adapter.rb ├── gemfiles ├── rails3.2.gemfile ├── rails4.2.gemfile ├── rails5.0.gemfile ├── rails3.2.gemfile.lock ├── rails4.2.gemfile.lock └── rails5.0.gemfile.lock ├── test ├── integration │ ├── run_integration_tests │ ├── no_traffic_test.rb │ ├── with_queries_to_be_killed_test.rb │ ├── wrong_setup_test.rb │ └── there_and_back_again_test.rb ├── integration_helper.rb ├── boot_slave ├── boot_mysql_env.rb └── ar_flexmaster_test.rb ├── .gitignore ├── Rakefile ├── .travis.yml ├── LICENSE ├── ar_mysql_flexmaster.gemspec ├── unplanned_failovers.md ├── README.md └── bin └── master_cut /Gemfile: -------------------------------------------------------------------------------- 1 | source 'https://rubygems.org' 2 | 3 | gemspec 4 | -------------------------------------------------------------------------------- /lib/ar_mysql_flexmaster.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | module ArMysqlFlexmaster 3 | end 4 | -------------------------------------------------------------------------------- /gemfiles/rails3.2.gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | 3 | gemspec :path => "../" 4 | 5 | gem "rails", "~> 3.2.0" 6 | gem "mysql2", "~> 0.3.0" 7 | -------------------------------------------------------------------------------- /test/integration/run_integration_tests: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | retval = true 4 | Dir.glob(File.dirname(__FILE__) + '/*_test.rb').each do |f| 5 | retval &= system("ruby #{f}") 6 | end 7 | exit retval 8 | -------------------------------------------------------------------------------- /gemfiles/rails4.2.gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | 3 | gemspec :path => "../" 4 | 5 | # Before https://github.com/rails/rails/commit/c6bf2b89471f14075d1de8f42b4c0eac9585ca98 6 | gem "rails", "~> 4.2.0", "< 4.2.8" 7 | -------------------------------------------------------------------------------- /gemfiles/rails5.0.gemfile: -------------------------------------------------------------------------------- 1 | source "https://rubygems.org" 2 | 3 | gemspec :path => "../" 4 | 5 | # Before https://github.com/rails/rails/commit/99c0cb58d189503da0f532c3e34bc0262cdff46c 6 | gem "rails", "~> 5.0.0", "< 5.0.1" 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | *.rbc 3 | .bundle 4 | .config 5 | .yardoc 6 | Gemfile.lock 7 | InstalledFiles 8 | _yardoc 9 | coverage 10 | doc/ 11 | lib/bundler/man 12 | pkg 13 | rdoc 14 | spec/reports 15 | test/tmp 16 | test/version_tmp 17 | tmp 18 | .*.sw* 19 | test/database.yml 20 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | require 'bundler/gem_tasks' 2 | require 'rake/testtask' 3 | 4 | require 'bump/tasks' 5 | require 'wwtd/tasks' 6 | 7 | Rake::TestTask.new(:test_units) do |test| 8 | test.libs << 'lib' << 'test' 9 | test.pattern = 'test/*_test.rb' 10 | test.verbose = true 11 | end 12 | 13 | task :test do 14 | retval = true 15 | retval &= Rake::Task[:test_units].invoke 16 | retval &= system(File.dirname(__FILE__) + "/test/integration/run_integration_tests") 17 | exit retval 18 | end 19 | 20 | task :default => 'wwtd:local' 21 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: ruby 2 | cache: bundler 3 | branches: 4 | only: master 5 | 6 | rvm: 7 | - 2.2 8 | - 2.3 9 | - 2.4 10 | 11 | gemfile: 12 | - gemfiles/rails3.2.gemfile 13 | - gemfiles/rails4.2.gemfile 14 | - gemfiles/rails5.0.gemfile 15 | 16 | sudo: required 17 | 18 | bundler_args: --no-deployment 19 | 20 | before_script: 21 | - sudo cp /usr/share/doc/mysql-server-5.6/examples/my-default.cnf /usr/share/mysql/my-default.cnf 22 | 23 | script: bundle exec rake test 24 | 25 | matrix: 26 | exclude: 27 | - rvm: 2.4 28 | gemfile: gemfiles/rails3.2.gemfile 29 | -------------------------------------------------------------------------------- /test/integration/no_traffic_test.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | require_relative "../integration_helper" 3 | 4 | class NoTrafficTest < Minitest::Test 5 | def test_basic_cutover 6 | $mysql_master.connection.query("set GLOBAL READ_ONLY=0") 7 | $mysql_slave.connection.query("set GLOBAL READ_ONLY=1") 8 | 9 | puts "testing basic cutover..." 10 | 11 | system "#{master_cut_script} 127.0.0.1:#{$mysql_master.port} 127.0.0.1:#{$mysql_slave.port} root -p ''" 12 | assert_ro($mysql_master.connection, 'master', true) 13 | assert_ro($mysql_slave.connection, 'master', false) 14 | end 15 | end 16 | -------------------------------------------------------------------------------- /test/integration_helper.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | require 'bundler/setup' 3 | require 'mysql2' 4 | require 'minitest/autorun' 5 | 6 | if !defined?(Minitest::Test) 7 | Minitest::Test = MiniTest::Unit::TestCase 8 | end 9 | 10 | require_relative 'boot_mysql_env' 11 | 12 | def assert_ro(cx, str, bool) 13 | expected = bool ? 1 : 0 14 | assert_equal expected, cx.query("select @@read_only as ro").first['ro'], "#{str} is #{bool ? 'read-write' : 'read-only'} but I expected otherwise!" 15 | end 16 | 17 | def master_cut_script 18 | File.expand_path(File.dirname(__FILE__)) + "/../bin/master_cut" 19 | end 20 | -------------------------------------------------------------------------------- /test/boot_slave: -------------------------------------------------------------------------------- 1 | require_relative 'isolated_server' 2 | 3 | # yeah, not technically isolated 4 | master = IsolatedServer::Mysql.new(port: 3306) 5 | 6 | slave = IsolatedServer::Mysql.new(data_path: "/Users/ben/.zendesk/var/mysql", allow_output: true, params: "--relay-log=footwa --skip-slave-start", port: 41756) 7 | slave.boot! 8 | puts "mysql slave booted on port #{slave.port} -- access with mysql -uroot -h127.0.0.1 --port=#{slave.port} mysql" 9 | slave.connection.query("set global server_id=123") 10 | slave.make_slave_of(master) 11 | slave.set_rw(false) 12 | 13 | uid_server = IsolatedServer::Mysql.new(data_path: "/Users/ben/.zendesk/var/mysql", allow_output: true, params: "--skip-slave-start", port: 41757) 14 | uid_server.boot! 15 | puts "mysql uid server booted on port #{uid_server.port} -- access with mysql -uroot -h127.0.0.1 --port=#{uid_server.port} mysql" 16 | sleep 17 | -------------------------------------------------------------------------------- /test/integration/with_queries_to_be_killed_test.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | require_relative "../integration_helper" 3 | class WithKillableQueries < Minitest::Test 4 | def test_with_queries_to_be_killed 5 | puts "testing with long running queries..." 6 | 7 | $mysql_master.connection.query("set GLOBAL READ_ONLY=0") 8 | $mysql_slave.connection.query("set GLOBAL READ_ONLY=1") 9 | 10 | thread = Thread.new { 11 | begin 12 | $mysql_master.connection.query("update flexmaster_test.users set name=sleep(600)") 13 | assert false, "Query did not get killed! Bad." 14 | exit 1 15 | rescue StandardError => e 16 | puts e 17 | end 18 | } 19 | 20 | system "#{master_cut_script} 127.0.0.1:#{$mysql_master.port} 127.0.0.1:#{$mysql_slave.port} root -p ''" 21 | 22 | thread.join 23 | 24 | $mysql_master.reconnect! 25 | assert_ro($mysql_master.connection, 'master', true) 26 | assert_ro($mysql_slave.connection, 'slave', false) 27 | end 28 | end 29 | -------------------------------------------------------------------------------- /test/integration/wrong_setup_test.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | require_relative "../integration_helper" 3 | class WrongSetupTest < Minitest::Test 4 | def assert_script_failed 5 | assert(!system("#{master_cut_script} 127.0.0.1:#{$mysql_master.port} 127.0.0.1:#{$mysql_slave.port} root -p ''")) 6 | end 7 | 8 | def test_wrong 9 | puts "testing cutover with incorrect master config..." 10 | $mysql_master.connection.query("set GLOBAL READ_ONLY=0") 11 | $mysql_slave.connection.query("set GLOBAL READ_ONLY=0") 12 | assert_script_failed 13 | 14 | puts "testing cutover with incorrect slave config..." 15 | $mysql_master.connection.query("set GLOBAL READ_ONLY=0") 16 | $mysql_slave.connection.query("set GLOBAL READ_ONLY=0") 17 | assert_script_failed 18 | 19 | puts "testing cutover with stopped slave" 20 | $mysql_master.connection.query("set GLOBAL READ_ONLY=0") 21 | $mysql_slave.connection.query("set GLOBAL READ_ONLY=1") 22 | $mysql_slave.connection.query("STOP SLAVE") 23 | assert_script_failed 24 | end 25 | end 26 | -------------------------------------------------------------------------------- /test/integration/there_and_back_again_test.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | require_relative "../integration_helper" 3 | 4 | class ThereAndBackAgain < Minitest::Test 5 | def test_there_and_back 6 | $mysql_master.connection.query("set GLOBAL READ_ONLY=0") 7 | $mysql_slave.connection.query("set GLOBAL READ_ONLY=1") 8 | 9 | puts "testing first cutover..." 10 | 11 | system "#{master_cut_script} 127.0.0.1:#{$mysql_master.port} 127.0.0.1:#{$mysql_slave.port} root -p '' -r -s" 12 | assert_ro($mysql_master.connection, 'original master', true) 13 | assert_ro($mysql_slave.connection, 'original slave', false) 14 | 15 | assert "Yes" == $mysql_master.connection.query("show slave status").first['Slave_IO_Running'] 16 | 17 | system "#{master_cut_script} 127.0.0.1:#{$mysql_slave.port} 127.0.0.1:#{$mysql_master.port} root -p '' -r" 18 | assert_ro($mysql_master.connection, 'original master', false) 19 | assert_ro($mysql_slave.connection, 'original slave', true) 20 | 21 | assert "No" == $mysql_slave.connection.query("show slave status").first['Slave_IO_Running'] 22 | end 23 | end 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012 Ben Osheroff 2 | 3 | MIT License 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining 6 | a copy of this software and associated documentation files (the 7 | "Software"), to deal in the Software without restriction, including 8 | without limitation the rights to use, copy, modify, merge, publish, 9 | distribute, sublicense, and/or sell copies of the Software, and to 10 | permit persons to whom the Software is furnished to do so, subject to 11 | the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be 14 | included in all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 19 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 20 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 22 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /ar_mysql_flexmaster.gemspec: -------------------------------------------------------------------------------- 1 | # -*- encoding: utf-8 -*- 2 | 3 | Gem::Specification.new do |gem| 4 | gem.authors = ["Ben Osheroff", "Benjamin Quorning", "Gabe Martin-Dempesy", "Michael Grosser", "Pierre Schambacher"] 5 | gem.email = ["bquorning@zendesk.com", "gabe@zendesk.com", "mgrosser@zendesk.com", "pschambacher@zendesk.com"] 6 | gem.description = "ar_mysql_flexmaster allows configuring N mysql servers in database.yml and auto-selects which is a master at runtime" 7 | gem.summary = "select a master at runtime from a list" 8 | gem.homepage = "http://github.com/zendesk/ar_mysql_flexmaster" 9 | 10 | gem.files = `git ls-files`.split($\) 11 | gem.executables = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) } 12 | gem.test_files = gem.files.grep(%r{^(test|spec|features)/}) 13 | gem.name = "ar_mysql_flexmaster" 14 | gem.require_paths = ["lib"] 15 | gem.version = "1.0.3" 16 | 17 | gem.required_ruby_version = ">= 2.2" 18 | 19 | gem.add_runtime_dependency("mysql2") 20 | gem.add_runtime_dependency("activerecord") 21 | gem.add_runtime_dependency("activesupport") 22 | gem.add_development_dependency("bundler") 23 | gem.add_development_dependency("rake") 24 | gem.add_development_dependency("wwtd") 25 | gem.add_development_dependency("minitest") 26 | gem.add_development_dependency("minitest-reporters") 27 | gem.add_development_dependency("mocha", "~> 1.1.0") 28 | gem.add_development_dependency("bump") 29 | gem.add_development_dependency("pry") 30 | gem.add_development_dependency("isolated_server") 31 | end 32 | -------------------------------------------------------------------------------- /unplanned_failovers.md: -------------------------------------------------------------------------------- 1 | Here's a logic flow I've been thinking about for dealing with master outages. In plain english, I'm generally 2 | thinking: 3 | 4 | - inside a transaction, we will never attempt to recover from a master failure or attempt to ensure proper connections 5 | - outside a transaction, we'll try to be liberal about recovering from bad states and limp along in a read-only mode 6 | as best we can. 7 | 8 | guard: 9 | begin 10 | yield 11 | rescue 'server gone away', "can't connect to server" 12 | retry-once 13 | end 14 | 15 | hard_verify(INSERTs) == verify correct connection, try for 5 seconds, crash and unset connection if you can't 16 | 17 | soft_verify(SELECTs) == every N requests, try to verify that your connection is the right one. 18 | 19 | If you're running a SELECT targeted at the master, and no master is online, it's acceptable to use either 20 | the old master (your existing connection) or a slave connection for the purposes of reads (until the next 21 | verify) 22 | 23 | This will allow us to limp along in read-only mode for a short time until a new master can be promoted. 24 | The downside of this approach is that in a pathological case we could be making decisions based on stale 25 | or incorrect data. I feel that the odds of this are long and are probably made up for by having a 26 | halfway decent read-only mode. 27 | 28 | ``` 29 | switch incoming_sql: 30 | BEGIN: 31 | - in transaction? 32 | -> do not verify, do not guard. 33 | 34 | - guard { hard-verify } 35 | - execute BEGIN statement (without guard). hard to reconnect here because of side effects 36 | 37 | INSERT/UPDATE/DELETE: 38 | - in transaction? 39 | -> do not verify, do not guard 40 | -> guard { hard-verify, execute } 41 | 42 | SELECT: 43 | - in transaction? 44 | -> no verify, no guard 45 | -> guard { soft-verify / execute } 46 | ``` 47 | 48 | -------------------------------------------------------------------------------- /test/boot_mysql_env.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # frozen_string_literal: true 3 | 4 | require "isolated_server" 5 | 6 | threads = [] 7 | threads << Thread.new do 8 | $mysql_master = IsolatedServer::Mysql.new(allow_output: false) 9 | $mysql_master.boot! 10 | 11 | puts "mysql master booted on port #{$mysql_master.port} -- access with mysql -uroot -h127.0.0.1 --port=#{$mysql_master.port} mysql" 12 | end 13 | 14 | threads << Thread.new do 15 | $mysql_slave = IsolatedServer::Mysql.new 16 | $mysql_slave.boot! 17 | 18 | puts "mysql slave booted on port #{$mysql_slave.port} -- access with mysql -uroot -h127.0.0.1 --port=#{$mysql_slave.port} mysql" 19 | end 20 | 21 | threads << Thread.new do 22 | $mysql_slave_2 = IsolatedServer::Mysql.new 23 | $mysql_slave_2.boot! 24 | 25 | puts "mysql chained slave booted on port #{$mysql_slave_2.port} -- access with mysql -uroot -h127.0.0.1 --port=#{$mysql_slave_2.port} mysql" 26 | end 27 | 28 | threads.each(&:join) 29 | 30 | $mysql_master.connection.query("CHANGE MASTER TO master_host='127.0.0.1', master_user='root', master_password=''") 31 | $mysql_slave.make_slave_of($mysql_master) 32 | $mysql_slave_2.make_slave_of($mysql_slave) 33 | 34 | $mysql_master.connection.query("GRANT ALL ON flexmaster_test.* to flex@localhost") 35 | $mysql_master.connection.query("CREATE DATABASE flexmaster_test") 36 | $mysql_master.connection.query("CREATE TABLE flexmaster_test.users (id INT(10) NOT NULL AUTO_INCREMENT PRIMARY KEY, name varchar(20))") 37 | $mysql_master.connection.query("INSERT INTO flexmaster_test.users set name='foo'") 38 | $mysql_slave.set_rw(false) 39 | $mysql_slave_2.set_rw(false) 40 | 41 | # let replication for the grants and such flow down. bleh. 42 | repl_sync = false 43 | while !repl_sync 44 | repl_sync = [[$mysql_master, $mysql_slave], [$mysql_slave, $mysql_slave_2]].all? do |master, slave| 45 | master_pos = master.connection.query("show master status").to_a.first["Position"] 46 | slave.connection.query("show slave status").to_a.first["Exec_Master_Log_Pos"] == master_pos 47 | end 48 | sleep 1 49 | end 50 | 51 | sleep if __FILE__ == $0 52 | -------------------------------------------------------------------------------- /gemfiles/rails3.2.gemfile.lock: -------------------------------------------------------------------------------- 1 | PATH 2 | remote: .. 3 | specs: 4 | ar_mysql_flexmaster (1.0.3) 5 | activerecord 6 | activesupport 7 | mysql2 8 | 9 | GEM 10 | remote: https://rubygems.org/ 11 | specs: 12 | actionmailer (3.2.22.5) 13 | actionpack (= 3.2.22.5) 14 | mail (~> 2.5.4) 15 | actionpack (3.2.22.5) 16 | activemodel (= 3.2.22.5) 17 | activesupport (= 3.2.22.5) 18 | builder (~> 3.0.0) 19 | erubis (~> 2.7.0) 20 | journey (~> 1.0.4) 21 | rack (~> 1.4.5) 22 | rack-cache (~> 1.2) 23 | rack-test (~> 0.6.1) 24 | sprockets (~> 2.2.1) 25 | activemodel (3.2.22.5) 26 | activesupport (= 3.2.22.5) 27 | builder (~> 3.0.0) 28 | activerecord (3.2.22.5) 29 | activemodel (= 3.2.22.5) 30 | activesupport (= 3.2.22.5) 31 | arel (~> 3.0.2) 32 | tzinfo (~> 0.3.29) 33 | activeresource (3.2.22.5) 34 | activemodel (= 3.2.22.5) 35 | activesupport (= 3.2.22.5) 36 | activesupport (3.2.22.5) 37 | i18n (~> 0.6, >= 0.6.4) 38 | multi_json (~> 1.0) 39 | ansi (1.5.0) 40 | arel (3.0.3) 41 | builder (3.0.4) 42 | bump (0.5.4) 43 | coderay (1.1.1) 44 | erubis (2.7.0) 45 | hike (1.2.3) 46 | i18n (0.8.6) 47 | isolated_server (0.4.12) 48 | journey (1.0.4) 49 | json (1.8.6) 50 | mail (2.5.5) 51 | mime-types (~> 1.16) 52 | treetop (~> 1.4.8) 53 | metaclass (0.0.4) 54 | method_source (0.8.2) 55 | mime-types (1.25.1) 56 | minitest (5.10.3) 57 | minitest-reporters (1.1.18) 58 | ansi 59 | builder 60 | minitest (>= 5.0) 61 | ruby-progressbar 62 | mocha (1.1.0) 63 | metaclass (~> 0.0.1) 64 | multi_json (1.12.1) 65 | mysql2 (0.3.21) 66 | polyglot (0.3.5) 67 | pry (0.10.4) 68 | coderay (~> 1.1.0) 69 | method_source (~> 0.8.1) 70 | slop (~> 3.4) 71 | rack (1.4.7) 72 | rack-cache (1.7.0) 73 | rack (>= 0.4) 74 | rack-ssl (1.3.4) 75 | rack 76 | rack-test (0.6.3) 77 | rack (>= 1.0) 78 | rails (3.2.22.5) 79 | actionmailer (= 3.2.22.5) 80 | actionpack (= 3.2.22.5) 81 | activerecord (= 3.2.22.5) 82 | activeresource (= 3.2.22.5) 83 | activesupport (= 3.2.22.5) 84 | bundler (~> 1.0) 85 | railties (= 3.2.22.5) 86 | railties (3.2.22.5) 87 | actionpack (= 3.2.22.5) 88 | activesupport (= 3.2.22.5) 89 | rack-ssl (~> 1.3.2) 90 | rake (>= 0.8.7) 91 | rdoc (~> 3.4) 92 | thor (>= 0.14.6, < 2.0) 93 | rake (12.0.0) 94 | rdoc (3.12.2) 95 | json (~> 1.4) 96 | ruby-progressbar (1.9.0) 97 | slop (3.6.0) 98 | sprockets (2.2.3) 99 | hike (~> 1.2) 100 | multi_json (~> 1.0) 101 | rack (~> 1.0) 102 | tilt (~> 1.1, != 1.3.0) 103 | thor (0.20.0) 104 | tilt (1.4.1) 105 | treetop (1.4.15) 106 | polyglot 107 | polyglot (>= 0.3.1) 108 | tzinfo (0.3.53) 109 | wwtd (1.3.0) 110 | 111 | PLATFORMS 112 | ruby 113 | 114 | DEPENDENCIES 115 | ar_mysql_flexmaster! 116 | bump 117 | bundler 118 | isolated_server 119 | minitest 120 | minitest-reporters 121 | mocha (~> 1.1.0) 122 | mysql2 (~> 0.3.0) 123 | pry 124 | rails (~> 3.2.0) 125 | rake 126 | wwtd 127 | 128 | BUNDLED WITH 129 | 1.16.0 130 | -------------------------------------------------------------------------------- /gemfiles/rails4.2.gemfile.lock: -------------------------------------------------------------------------------- 1 | PATH 2 | remote: .. 3 | specs: 4 | ar_mysql_flexmaster (1.0.3) 5 | activerecord 6 | activesupport 7 | mysql2 8 | 9 | GEM 10 | remote: https://rubygems.org/ 11 | specs: 12 | actionmailer (4.2.7.1) 13 | actionpack (= 4.2.7.1) 14 | actionview (= 4.2.7.1) 15 | activejob (= 4.2.7.1) 16 | mail (~> 2.5, >= 2.5.4) 17 | rails-dom-testing (~> 1.0, >= 1.0.5) 18 | actionpack (4.2.7.1) 19 | actionview (= 4.2.7.1) 20 | activesupport (= 4.2.7.1) 21 | rack (~> 1.6) 22 | rack-test (~> 0.6.2) 23 | rails-dom-testing (~> 1.0, >= 1.0.5) 24 | rails-html-sanitizer (~> 1.0, >= 1.0.2) 25 | actionview (4.2.7.1) 26 | activesupport (= 4.2.7.1) 27 | builder (~> 3.1) 28 | erubis (~> 2.7.0) 29 | rails-dom-testing (~> 1.0, >= 1.0.5) 30 | rails-html-sanitizer (~> 1.0, >= 1.0.2) 31 | activejob (4.2.7.1) 32 | activesupport (= 4.2.7.1) 33 | globalid (>= 0.3.0) 34 | activemodel (4.2.7.1) 35 | activesupport (= 4.2.7.1) 36 | builder (~> 3.1) 37 | activerecord (4.2.7.1) 38 | activemodel (= 4.2.7.1) 39 | activesupport (= 4.2.7.1) 40 | arel (~> 6.0) 41 | activesupport (4.2.7.1) 42 | i18n (~> 0.7) 43 | json (~> 1.7, >= 1.7.7) 44 | minitest (~> 5.1) 45 | thread_safe (~> 0.3, >= 0.3.4) 46 | tzinfo (~> 1.1) 47 | ansi (1.5.0) 48 | arel (6.0.4) 49 | builder (3.2.3) 50 | bump (0.5.4) 51 | coderay (1.1.1) 52 | concurrent-ruby (1.0.5) 53 | erubis (2.7.0) 54 | globalid (0.4.0) 55 | activesupport (>= 4.2.0) 56 | i18n (0.8.6) 57 | isolated_server (0.4.12) 58 | json (1.8.6) 59 | loofah (2.0.3) 60 | nokogiri (>= 1.5.9) 61 | mail (2.6.6) 62 | mime-types (>= 1.16, < 4) 63 | metaclass (0.0.4) 64 | method_source (0.8.2) 65 | mime-types (3.1) 66 | mime-types-data (~> 3.2015) 67 | mime-types-data (3.2016.0521) 68 | mini_portile2 (2.2.0) 69 | minitest (5.10.3) 70 | minitest-reporters (1.1.18) 71 | ansi 72 | builder 73 | minitest (>= 5.0) 74 | ruby-progressbar 75 | mocha (1.1.0) 76 | metaclass (~> 0.0.1) 77 | mysql2 (0.4.9) 78 | nokogiri (1.8.0) 79 | mini_portile2 (~> 2.2.0) 80 | pry (0.10.4) 81 | coderay (~> 1.1.0) 82 | method_source (~> 0.8.1) 83 | slop (~> 3.4) 84 | rack (1.6.8) 85 | rack-test (0.6.3) 86 | rack (>= 1.0) 87 | rails (4.2.7.1) 88 | actionmailer (= 4.2.7.1) 89 | actionpack (= 4.2.7.1) 90 | actionview (= 4.2.7.1) 91 | activejob (= 4.2.7.1) 92 | activemodel (= 4.2.7.1) 93 | activerecord (= 4.2.7.1) 94 | activesupport (= 4.2.7.1) 95 | bundler (>= 1.3.0, < 2.0) 96 | railties (= 4.2.7.1) 97 | sprockets-rails 98 | rails-deprecated_sanitizer (1.0.3) 99 | activesupport (>= 4.2.0.alpha) 100 | rails-dom-testing (1.0.8) 101 | activesupport (>= 4.2.0.beta, < 5.0) 102 | nokogiri (~> 1.6) 103 | rails-deprecated_sanitizer (>= 1.0.1) 104 | rails-html-sanitizer (1.0.3) 105 | loofah (~> 2.0) 106 | railties (4.2.7.1) 107 | actionpack (= 4.2.7.1) 108 | activesupport (= 4.2.7.1) 109 | rake (>= 0.8.7) 110 | thor (>= 0.18.1, < 2.0) 111 | rake (12.0.0) 112 | ruby-progressbar (1.9.0) 113 | slop (3.6.0) 114 | sprockets (3.7.1) 115 | concurrent-ruby (~> 1.0) 116 | rack (> 1, < 3) 117 | sprockets-rails (3.2.0) 118 | actionpack (>= 4.0) 119 | activesupport (>= 4.0) 120 | sprockets (>= 3.0.0) 121 | thor (0.20.0) 122 | thread_safe (0.3.6) 123 | tzinfo (1.2.3) 124 | thread_safe (~> 0.1) 125 | wwtd (1.3.0) 126 | 127 | PLATFORMS 128 | ruby 129 | 130 | DEPENDENCIES 131 | ar_mysql_flexmaster! 132 | bump 133 | bundler 134 | isolated_server 135 | minitest 136 | minitest-reporters 137 | mocha (~> 1.1.0) 138 | pry 139 | rails (~> 4.2.0, < 4.2.8) 140 | rake 141 | wwtd 142 | 143 | BUNDLED WITH 144 | 1.16.0 145 | -------------------------------------------------------------------------------- /gemfiles/rails5.0.gemfile.lock: -------------------------------------------------------------------------------- 1 | PATH 2 | remote: .. 3 | specs: 4 | ar_mysql_flexmaster (1.0.3) 5 | activerecord 6 | activesupport 7 | mysql2 8 | 9 | GEM 10 | remote: https://rubygems.org/ 11 | specs: 12 | actioncable (5.0.0.1) 13 | actionpack (= 5.0.0.1) 14 | nio4r (~> 1.2) 15 | websocket-driver (~> 0.6.1) 16 | actionmailer (5.0.0.1) 17 | actionpack (= 5.0.0.1) 18 | actionview (= 5.0.0.1) 19 | activejob (= 5.0.0.1) 20 | mail (~> 2.5, >= 2.5.4) 21 | rails-dom-testing (~> 2.0) 22 | actionpack (5.0.0.1) 23 | actionview (= 5.0.0.1) 24 | activesupport (= 5.0.0.1) 25 | rack (~> 2.0) 26 | rack-test (~> 0.6.3) 27 | rails-dom-testing (~> 2.0) 28 | rails-html-sanitizer (~> 1.0, >= 1.0.2) 29 | actionview (5.0.0.1) 30 | activesupport (= 5.0.0.1) 31 | builder (~> 3.1) 32 | erubis (~> 2.7.0) 33 | rails-dom-testing (~> 2.0) 34 | rails-html-sanitizer (~> 1.0, >= 1.0.2) 35 | activejob (5.0.0.1) 36 | activesupport (= 5.0.0.1) 37 | globalid (>= 0.3.6) 38 | activemodel (5.0.0.1) 39 | activesupport (= 5.0.0.1) 40 | activerecord (5.0.0.1) 41 | activemodel (= 5.0.0.1) 42 | activesupport (= 5.0.0.1) 43 | arel (~> 7.0) 44 | activesupport (5.0.0.1) 45 | concurrent-ruby (~> 1.0, >= 1.0.2) 46 | i18n (~> 0.7) 47 | minitest (~> 5.1) 48 | tzinfo (~> 1.1) 49 | ansi (1.5.0) 50 | arel (7.1.4) 51 | builder (3.2.3) 52 | bump (0.5.4) 53 | coderay (1.1.1) 54 | concurrent-ruby (1.0.5) 55 | erubis (2.7.0) 56 | globalid (0.4.0) 57 | activesupport (>= 4.2.0) 58 | i18n (0.8.6) 59 | isolated_server (0.4.12) 60 | loofah (2.0.3) 61 | nokogiri (>= 1.5.9) 62 | mail (2.6.6) 63 | mime-types (>= 1.16, < 4) 64 | metaclass (0.0.4) 65 | method_source (0.8.2) 66 | mime-types (3.1) 67 | mime-types-data (~> 3.2015) 68 | mime-types-data (3.2016.0521) 69 | mini_portile2 (2.2.0) 70 | minitest (5.10.3) 71 | minitest-reporters (1.1.18) 72 | ansi 73 | builder 74 | minitest (>= 5.0) 75 | ruby-progressbar 76 | mocha (1.1.0) 77 | metaclass (~> 0.0.1) 78 | mysql2 (0.4.9) 79 | nio4r (1.2.1) 80 | nokogiri (1.8.0) 81 | mini_portile2 (~> 2.2.0) 82 | pry (0.10.4) 83 | coderay (~> 1.1.0) 84 | method_source (~> 0.8.1) 85 | slop (~> 3.4) 86 | rack (2.0.3) 87 | rack-test (0.6.3) 88 | rack (>= 1.0) 89 | rails (5.0.0.1) 90 | actioncable (= 5.0.0.1) 91 | actionmailer (= 5.0.0.1) 92 | actionpack (= 5.0.0.1) 93 | actionview (= 5.0.0.1) 94 | activejob (= 5.0.0.1) 95 | activemodel (= 5.0.0.1) 96 | activerecord (= 5.0.0.1) 97 | activesupport (= 5.0.0.1) 98 | bundler (>= 1.3.0, < 2.0) 99 | railties (= 5.0.0.1) 100 | sprockets-rails (>= 2.0.0) 101 | rails-dom-testing (2.0.3) 102 | activesupport (>= 4.2.0) 103 | nokogiri (>= 1.6) 104 | rails-html-sanitizer (1.0.3) 105 | loofah (~> 2.0) 106 | railties (5.0.0.1) 107 | actionpack (= 5.0.0.1) 108 | activesupport (= 5.0.0.1) 109 | method_source 110 | rake (>= 0.8.7) 111 | thor (>= 0.18.1, < 2.0) 112 | rake (12.0.0) 113 | ruby-progressbar (1.9.0) 114 | slop (3.6.0) 115 | sprockets (3.7.1) 116 | concurrent-ruby (~> 1.0) 117 | rack (> 1, < 3) 118 | sprockets-rails (3.2.0) 119 | actionpack (>= 4.0) 120 | activesupport (>= 4.0) 121 | sprockets (>= 3.0.0) 122 | thor (0.20.0) 123 | thread_safe (0.3.6) 124 | tzinfo (1.2.3) 125 | thread_safe (~> 0.1) 126 | websocket-driver (0.6.5) 127 | websocket-extensions (>= 0.1.0) 128 | websocket-extensions (0.1.2) 129 | wwtd (1.3.0) 130 | 131 | PLATFORMS 132 | ruby 133 | 134 | DEPENDENCIES 135 | ar_mysql_flexmaster! 136 | bump 137 | bundler 138 | isolated_server 139 | minitest 140 | minitest-reporters 141 | mocha (~> 1.1.0) 142 | pry 143 | rails (~> 5.0.0, < 5.0.1) 144 | rake 145 | wwtd 146 | 147 | BUNDLED WITH 148 | 1.16.0 149 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | **ArMysqlFlexmaster is deprecated and will no longer be maintained.** 2 | 3 | Zendesk no longer needs to use this library, and it will no longer be maintained. 4 | 5 | ---- 6 | 7 | [![Build Status](https://travis-ci.org/zendesk/ar_mysql_flexmaster.svg?branch=master)](https://travis-ci.org/zendesk/ar_mysql_flexmaster) 8 | 9 | # Flexmaster 10 | 11 | Flexmaster is an adapter for ActiveRecord and MySQL that allows an application 12 | node to find a master among a list of potential masters at runtime. It trades 13 | some properties of a more traditional HA solution (load balancing, middleware) 14 | for simplicity of operation. 15 | 16 | ## Configuration: 17 | 18 | Your environment should be configured with 1 active master and N replicas. Each 19 | replica should have MySQL’s global `READ_ONLY` flag set to true (this is really 20 | best practices for your replicas anyway, but Flexmaster depends on it). 21 | 22 | database.yml should contain a list of hosts – all of them potential masters, all 23 | of them potential replicas. It should look like this: 24 | 25 | ``` 26 | production: 27 | adapter: mysql_flexmaster 28 | username: flex 29 | hosts: ["db01:3306", "db02:3306"] 30 | 31 | production_slave: 32 | adapter: mysql_flexmaster 33 | username: flex 34 | slave: true 35 | hosts: ["db01:3306", "db02:3306"] 36 | ``` 37 | 38 | In this example, we’ve configured two different connections for Rails to use. 39 | Note that they’re identical except for the `slave: true` key in the 40 | `production_slave` YAML block. Adding `slave: true` indicates to Flexmaster that 41 | this connection should prefer a read-only slave wherever possible. 42 | 43 | ## How it works 44 | 45 | ### Overview 46 | 47 | The MySQL `READ_ONLY` flag is used to indicate a current master amongst the 48 | cluster. Only one member of the replication chain may be read-write at any given 49 | time. The application picks in run time, based on the `READ_ONLY` flag, which 50 | host is correct. 51 | 52 | ### Boot time 53 | 54 | Your ActiveRecord application will pick a correct MySQL host for the given 55 | configuration by probing hosts until it finds the correct host. 56 | 57 | For master configurations (`slave: true` is not specified): 58 | 59 | The application will probe each host in turn, and find the MySQL candidate among 60 | these nodes that is read-write (`SET GLOBAL READ_ONLY=0`). 61 | 62 | If it finds more than one node where `READ_ONLY == 0`, it will abort. 63 | 64 | For slave configurations (`slave: true` specified): 65 | 66 | The application will choose a replica at random from amongst those where 67 | `READ_ONLY == 1`. If no active replicas are found, it will fall back to the 68 | master. 69 | 70 | ### Run time 71 | 72 | Before each transaction is opened on the master, the application checks the 73 | status of the `READ_ONLY` variable. If `READ_ONLY == 0` (our active connection 74 | is still to the current master), it will proceed with the transaction as normal. 75 | If `READ_ONLY == 1` (the master has been demoted), it will drop the current 76 | connection and re-poll the cluster, sleeping for up to a default of 5 seconds 77 | for a new master to be promoted. When it finds the new master, it will continue 78 | playing the transaction on it. 79 | 80 | ### Promoting a new master 81 | 82 | *The `bin/master_cut` script in this project will perform steps 3–5 for you.* 83 | 84 | The process of promoting a new master to head the cluster should be as follows: 85 | 86 | 1. Identify a new candidate master. 87 | 1. Ensure that all other replicas in the cluster are chained off the candidate 88 | master; you want the chain to look like this: 89 | 90 | ``` 91 | -> -> 92 | -> 93 | 94 | ``` 95 | 1. Set the old master to `READ_ONLY = 1`. 96 | 1. Record the master-bin-log position of the candidate master (if you want to 97 | re-use the old master as a replica). 98 | 1. Set the new master to `READ_ONLY = 0`. 99 | 100 | The application nodes will, in time, find that the old master is inactive and 101 | will move their connections to the new master. 102 | 103 | The application will also eventually shift slave traffic to another node in the 104 | cluster. 105 | 106 | ### Caveats and gotchas 107 | 108 | - Any explicit (`BEGIN` … `END`) transaction that are in-flight when the old 109 | master goes `READ_ONLY` will crash. In theory there’s a workaround for this 110 | problem, in pratice it’s rather unwieldy due to a lack of shared global 111 | variables in MySQL. 112 | - Connection variables are unsupported, due to the connection being able to go 113 | away at any time. 114 | 115 | ## Installation 116 | 117 | Add this line to your application’s Gemfile: 118 | 119 | gem 'ar_mysql_flexmaster' 120 | 121 | And then execute: 122 | 123 | $ bundle 124 | 125 | Or install it yourself as: 126 | 127 | $ gem install ar_mysql_flexmaster 128 | 129 | ## Contributing 130 | 131 | 1. Fork it 132 | 2. Create your feature branch (`git checkout -b my-new-feature`) 133 | 3. Commit your changes (`git commit -am 'Added some feature'`) 134 | 4. Push to the branch (`git push origin my-new-feature`) 135 | 5. Create new Pull Request 136 | -------------------------------------------------------------------------------- /bin/master_cut: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | require 'bundler/setup' 4 | require 'mysql2' 5 | require 'socket' 6 | require 'pp' 7 | require "getoptlong" 8 | 9 | Thread.abort_on_exception = false 10 | 11 | opts = GetoptLong.new( 12 | ["--password", "-p", GetoptLong::REQUIRED_ARGUMENT], 13 | ["--rehome-master", "-r", GetoptLong::NO_ARGUMENT], 14 | ["--start-slave", "-s", GetoptLong::NO_ARGUMENT] 15 | ) 16 | 17 | opts.each do |opt, arg| 18 | case opt 19 | when '--password' 20 | $password = arg 21 | when '--rehome-master' 22 | $rehome_master = true 23 | when '--start-slave' 24 | $start_slave = true 25 | $rehome_master = true 26 | end 27 | end 28 | 29 | def usage 30 | puts "Usage: master_cut OLD_MASTER NEW_MASTER ADMIN_USERNAME" 31 | puts " [-p,--password PASSWORD]" 32 | puts " [-r,--rehome-master]" 33 | puts " [-s,--start-slave]" 34 | exit false 35 | end 36 | 37 | $old_master, $new_master, $username = *ARGV 38 | unless $old_master && $new_master && $username 39 | usage 40 | end 41 | 42 | def open_cx(host) 43 | host, port = host.split(":") 44 | port = port.to_i if port 45 | Mysql2::Client.new(:host => host, :username => $username, :password => $password, :port => port) 46 | end 47 | 48 | def set_rw(cx) 49 | cx.query("SET GLOBAL READ_ONLY=0") 50 | end 51 | 52 | def set_ro(cx) 53 | cx.query("SET GLOBAL READ_ONLY=1") 54 | end 55 | 56 | $swapped_ok = false 57 | 58 | def fail(reason) 59 | puts "Failed preflight check: #{reason}" 60 | exit false 61 | end 62 | 63 | def ask_for_password 64 | return unless $password.nil? 65 | 66 | $stdout.write("Password for #{$username}: ") 67 | begin 68 | system "stty -echo" 69 | $password = $stdin.gets.chomp 70 | ensure 71 | system "stty echo" 72 | end 73 | end 74 | 75 | def preflight_check 76 | cx = open_cx($old_master) 77 | rw = cx.query("select @@read_only as read_only").first['read_only'] 78 | fail("old-master #{$old_master} is read-only!") if rw != 0 79 | 80 | slave_cx = open_cx($new_master) 81 | rw = slave_cx.query("select @@read_only as read_only").first['read_only'] 82 | fail("new-master #{$old_master} is read-write!") if rw != 1 83 | 84 | slave_info = slave_cx.query("show slave status").first 85 | fail("no slave configured!") if slave_info.nil? 86 | fail("slave is stopped!") unless slave_info['Slave_IO_Running'] == 'Yes' && slave_info['Slave_SQL_Running'] == 'Yes' 87 | fail("slave is delayed") if slave_info['Seconds_Behind_Master'].nil? || slave_info['Seconds_Behind_Master'] > 0 88 | 89 | masters_slave_info = cx.query("show slave status").first 90 | if $rehome_master && (masters_slave_info.nil? || masters_slave_info['Master_User'] == 'test') 91 | fail("I can't rehome the original master -- it has no slave user or password.") 92 | end 93 | 94 | master_ip, slave_master_ip = [$old_master, slave_info['Master_Host']].map do |h| 95 | h = h.split(':').first 96 | Socket.gethostbyname(h)[3].unpack("CCCC") 97 | end 98 | 99 | if master_ip != slave_master_ip 100 | fail("slave does not appear to be replicating off master! (master: #{master_ip.join('.')}, slave's master: #{slave_master_ip.join('.')})") 101 | end 102 | end 103 | 104 | def process_kill_thread 105 | Thread.new do 106 | cx = open_cx($old_master) 107 | sleep 5 108 | while !$swapped_ok 109 | my_id = cx.query("SELECT CONNECTION_ID() as id").first['id'] 110 | processlist = cx.query("show processlist") 111 | processlist.each do |process| 112 | next if process['Info'] =~ /SET GLOBAL READ_ONLY/ 113 | next if process['Id'].to_i == my_id.to_i 114 | puts "killing #{process}" 115 | kill_query!(cx, process['Id']) 116 | end 117 | sleep 0.1 118 | end 119 | end 120 | end 121 | 122 | def wait_for_slave_catchup(master, slave) 123 | loop do 124 | master_info = master.query("show master status").first 125 | slave_info = slave.query("show slave status").first 126 | break if master_info['Position'] <= slave_info['Exec_Master_Log_Pos'] 127 | end 128 | end 129 | 130 | def kill_query!(cx, id) 131 | cx.query("kill #{id}") 132 | rescue Mysql2::Error => e 133 | raise e unless e.errno == 1094 # unknown thread id error 134 | end 135 | 136 | def swap_thread 137 | Thread.new do 138 | master = open_cx($old_master) 139 | slave = open_cx($new_master) 140 | set_ro(master) 141 | # wait for catchup 142 | 143 | wait_for_slave_catchup(master, slave) 144 | 145 | slave.query("STOP SLAVE") 146 | new_master_info = slave.query("show master status").first 147 | set_rw(slave) 148 | $swapped_ok = true 149 | puts "Swapped #{$old_master} and #{$new_master}" 150 | puts "New master information at time of swap: " 151 | pp new_master_info 152 | if $rehome_master 153 | rehome_master(new_master_info, $start_slave) 154 | end 155 | exit 156 | end 157 | end 158 | 159 | def rehome_master(info, start_slave) 160 | puts "Reconfiguring #{$old_master} to be a slave of #{$new_master}..." 161 | host, port = $new_master.split(":") 162 | port_clause = port ? "master_port = #{port}," : "" 163 | cx = open_cx($old_master) 164 | cx.query("change master to master_host='#{host}', #{port_clause} master_log_file = '#{info['File']}', master_log_pos=#{info['Position']}") 165 | cx.query("START SLAVE") if start_slave 166 | end 167 | 168 | ask_for_password 169 | preflight_check 170 | 171 | threads = [] 172 | threads << swap_thread 173 | threads << process_kill_thread 174 | threads.each(&:join) 175 | 176 | rehome_master 177 | -------------------------------------------------------------------------------- /lib/active_record/connection_adapters/mysql_flexmaster_adapter.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | require 'active_record' 3 | require 'active_record/connection_adapters/mysql2_adapter' 4 | require 'timeout' 5 | 6 | module ActiveRecord 7 | class Base 8 | def self.mysql_flexmaster_connection(config) 9 | config = config.symbolize_keys 10 | 11 | # fallback to :host or :localhost 12 | config[:hosts] ||= config.key?(:host) ? [config[:host]] : ['localhost'] 13 | config[:username] = 'root' if config[:username].nil? 14 | 15 | if Mysql2::Client.const_defined? :FOUND_ROWS 16 | config[:flags] = Mysql2::Client::FOUND_ROWS 17 | end 18 | 19 | ConnectionAdapters::MysqlFlexmasterAdapter.new(logger, config) 20 | end 21 | end 22 | 23 | module ConnectionAdapters 24 | class MysqlFlexmasterAdapter < Mysql2Adapter 25 | class NoActiveMasterException < StandardError; end 26 | class TooManyMastersException < StandardError; end 27 | class NoServerAvailableException < StandardError; end 28 | 29 | CHECK_EVERY_N_SELECTS = 10 30 | DEFAULT_CONNECT_TIMEOUT = 1 31 | DEFAULT_CONNECT_ATTEMPTS = 3 32 | DEFAULT_TX_HOLD_TIMEOUT = 5 33 | 34 | def initialize(logger, config) 35 | @select_counter = 0 36 | @config = config 37 | @rw = config[:slave] ? :read : :write 38 | @tx_hold_timeout = @config[:tx_hold_timeout] || DEFAULT_TX_HOLD_TIMEOUT 39 | @connection_timeout = @config[:connection_timeout] || DEFAULT_CONNECT_TIMEOUT 40 | @connection_attempts = @config[:connection_attempts] || DEFAULT_CONNECT_ATTEMPTS 41 | 42 | connection = find_correct_host(@rw) 43 | 44 | raise_no_server_available! unless connection 45 | super(connection, logger, [], config) 46 | end 47 | 48 | def begin_db_transaction 49 | if !in_transaction? 50 | with_lost_cx_guard { hard_verify } 51 | end 52 | super 53 | end 54 | 55 | def execute(sql, name = nil) 56 | if in_transaction? 57 | super # no way to rescue any lost cx or wrong-host errors at this point. 58 | else 59 | with_lost_cx_guard do 60 | if has_side_effects?(sql) 61 | hard_verify 62 | else 63 | soft_verify 64 | end 65 | 66 | super 67 | end 68 | end 69 | end 70 | 71 | # after a cluster recovers from a bad state, an insert or SELECT will bring us back 72 | # into sanity, but sometimes would we never get there and would get stuck crashing in this function instead. 73 | def quote(*args) 74 | if !@connection 75 | soft_verify 76 | end 77 | super 78 | end 79 | 80 | def quote_string(*args) 81 | if !@connection 82 | soft_verify 83 | end 84 | super 85 | end 86 | 87 | def current_host 88 | @connection.query_options[:host] 89 | end 90 | 91 | def current_port 92 | @connection.query_options[:port] 93 | end 94 | 95 | private 96 | 97 | def in_transaction? 98 | open_transactions > 0 99 | end 100 | 101 | # never try to carry on if inside a transaction 102 | # otherwise try to detect when the master/slave has crashed and retry stuff. 103 | def with_lost_cx_guard 104 | retried = false 105 | 106 | begin 107 | yield 108 | rescue Mysql2::Error, ActiveRecord::StatementInvalid => e 109 | if retryable_error?(e) && !retried 110 | retried = true 111 | @connection = nil 112 | retry 113 | else 114 | raise e 115 | end 116 | end 117 | end 118 | 119 | AR_MESSAGES = [/^Mysql2::Error: MySQL server has gone away/, 120 | /^Mysql2::Error: Lost connection to MySQL server during query/, 121 | /^Mysql2::Error: Can't connect to MySQL server/] 122 | def retryable_error?(e) 123 | case e 124 | when Mysql2::Error 125 | # 2006 is gone-away 126 | # 2013 is lost connection during query 127 | # 2003 is can't-connect (applicable when reconnect is true) 128 | [2006, 2013, 2003].include?(e.errno) 129 | when ActiveRecord::StatementInvalid 130 | AR_MESSAGES.any? { |m| e.message.match(m) } 131 | end 132 | end 133 | 134 | # when either doing BEGIN or INSERT/UPDATE/DELETE etc, ensure a correct connection 135 | # and crash if wrong 136 | def hard_verify 137 | if !@connection || !cx_correct? 138 | refind_correct_host! 139 | end 140 | end 141 | 142 | # on select statements, check every 10 statements to see if we need to switch hosts, 143 | # but don't crash if the cx is wrong, and don't sleep trying to find a correct one. 144 | def soft_verify 145 | if !@connection 146 | @connection = find_correct_host(@rw) 147 | else 148 | @select_counter += 1 149 | return unless @select_counter % CHECK_EVERY_N_SELECTS == 0 150 | 151 | if !cx_correct? 152 | cx = find_correct_host(@rw) 153 | @connection = cx if cx 154 | end 155 | end 156 | 157 | if @rw == :write && !@connection 158 | # desperation mode: we've been asked for the master, but it's just not available. 159 | # we'll go ahead and return a connection to the slave, understanding that it'll never work 160 | # for writes. (we'll call hard_verify and crash) 161 | @connection = find_correct_host(:read) 162 | end 163 | end 164 | 165 | def has_side_effects?(sql) 166 | sql =~ /^\s*(INSERT|UPDATE|DELETE|ALTER|CHANGE|REPLACE)/i 167 | end 168 | 169 | def connect 170 | @connection = find_correct_host(@rw) 171 | raise_no_server_available! unless @connection 172 | end 173 | 174 | def raise_no_server_available! 175 | raise NoServerAvailableException.new(errors_to_message) 176 | end 177 | 178 | def collected_errors 179 | @collected_errors ||= [] 180 | end 181 | 182 | def clear_collected_errors! 183 | @collected_errors = [] 184 | end 185 | 186 | def errors_to_message 187 | "Errors encountered while trying #{@config[:hosts].inspect}: " + 188 | collected_errors.map { |e| "#{e.class.name}: #{e.message}" }.uniq.join(",") 189 | end 190 | 191 | def refind_correct_host! 192 | clear_collected_errors! 193 | 194 | sleep_interval = 0.1 195 | timeout_at = Time.now.to_f + @tx_hold_timeout 196 | 197 | loop do 198 | @connection = find_correct_host(@rw) 199 | return if @connection 200 | 201 | sleep(sleep_interval) 202 | 203 | break unless Time.now.to_f < timeout_at 204 | end 205 | 206 | raise_no_server_available! 207 | end 208 | 209 | def hosts_and_ports 210 | @hosts_and_ports ||= @config[:hosts].map do |hoststr| 211 | host, port = hoststr.split(':') 212 | port = port.to_i unless port.nil? 213 | [host, port] 214 | end 215 | end 216 | 217 | def find_correct_host(rw) 218 | cxs = hosts_and_ports.map do |host, port| 219 | initialize_connection(host, port) 220 | end.compact 221 | 222 | correct_cxs = cxs.select { |cx| cx_correct?(cx) } 223 | 224 | chosen_cx = nil 225 | case rw 226 | when :write 227 | # for master connections, we make damn sure that we have just one master 228 | if correct_cxs.size == 1 229 | chosen_cx = correct_cxs.first 230 | else 231 | # nothing read-write, or too many read-write 232 | if correct_cxs.size > 1 233 | collected_errors << TooManyMastersException.new("found #{correct_cxs.size} read-write servers") 234 | else 235 | collected_errors << NoActiveMasterException.new("no read-write servers found") 236 | end 237 | 238 | chosen_cx = nil 239 | end 240 | when :read 241 | # for slave connections (or master-gone-away scenarios), we just return a random RO candidate or the master if none are available 242 | if correct_cxs.empty? 243 | chosen_cx = cxs.first 244 | else 245 | chosen_cx = correct_cxs.shuffle.first 246 | end 247 | end 248 | cxs.each { |cx| cx.close unless chosen_cx == cx } 249 | chosen_cx 250 | end 251 | 252 | def initialize_connection(host, port) 253 | attempts = 1 254 | begin 255 | Timeout.timeout(@connection_timeout) do 256 | cfg = @config.merge(:host => host, :port => port) 257 | Mysql2::Client.new(cfg).tap do |cx| 258 | cx.query_options.merge!(:as => :array) 259 | end 260 | end 261 | rescue Mysql2::Error, Timeout::Error => e 262 | if attempts < @connection_attempts 263 | attempts += 1 264 | retry 265 | else 266 | collected_errors << e 267 | nil 268 | end 269 | end 270 | end 271 | 272 | def cx_correct?(cx = @connection) 273 | res = cx.query("SELECT @@read_only as ro").first 274 | 275 | if @rw == :write 276 | res.first == 0 277 | else 278 | res.first == 1 279 | end 280 | end 281 | end 282 | end 283 | end 284 | -------------------------------------------------------------------------------- /test/ar_flexmaster_test.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | require 'bundler/setup' 3 | require 'ar_mysql_flexmaster' 4 | require 'active_support' 5 | require 'active_record' 6 | require 'minitest/autorun' 7 | require "minitest/reporters" 8 | require 'mocha/mini_test' 9 | require 'logger' 10 | 11 | if !defined?(Minitest::Test) 12 | Minitest::Test = MiniTest::Unit::TestCase 13 | end 14 | 15 | Minitest::Reporters.use!(Minitest::Reporters::SpecReporter.new(color: true)) 16 | 17 | require_relative 'boot_mysql_env' 18 | 19 | File.open(File.dirname(File.expand_path(__FILE__)) + "/database.yml", "w+") do |f| 20 | f.write <<-EOL 21 | common: &common 22 | adapter: mysql_flexmaster 23 | username: flex 24 | hosts: ["127.0.0.1:#{$mysql_master.port}", "127.0.0.1:#{$mysql_slave.port}", "127.0.0.1:#{$mysql_slave_2.port}"] 25 | database: flexmaster_test 26 | 27 | test: 28 | <<: *common 29 | 30 | test_slave: 31 | <<: *common 32 | slave: true 33 | 34 | reconnect: 35 | <<: *common 36 | reconnect: true 37 | 38 | reconnect_slave: 39 | <<: *common 40 | reconnect: true 41 | slave: true 42 | EOL 43 | end 44 | 45 | ActiveRecord::Base.configurations = YAML.load(IO.read(File.dirname(__FILE__) + '/database.yml')) 46 | ActiveRecord::Base.establish_connection(:test) 47 | 48 | class User < ActiveRecord::Base 49 | end 50 | 51 | class UserSlave < ActiveRecord::Base 52 | establish_connection(:test_slave) 53 | self.table_name = "users" 54 | end 55 | 56 | class Reconnect < ActiveRecord::Base 57 | establish_connection(:reconnect) 58 | self.table_name = "users" 59 | end 60 | 61 | class ReconnectSlave < ActiveRecord::Base 62 | establish_connection(:reconnect_slave) 63 | self.table_name = "users" 64 | end 65 | 66 | # $mysql_master and $mysql_slave are separate references to the master and slave that we 67 | # use to send control-channel commands on 68 | 69 | $original_master_port = $mysql_master.port 70 | 71 | class TestArFlexmaster < Minitest::Test 72 | def setup 73 | ActiveRecord::Base.establish_connection(:test) 74 | 75 | $mysql_master.set_rw(true) if $mysql_master 76 | $mysql_slave.set_rw(false) if $mysql_slave 77 | $mysql_slave_2.set_rw(false) if $mysql_slave_2 78 | end 79 | 80 | def test_should_raise_without_a_rw_master 81 | [$mysql_master, $mysql_slave].each do |m| 82 | m.set_rw(false) 83 | end 84 | 85 | e = assert_raises(ActiveRecord::ConnectionAdapters::MysqlFlexmasterAdapter::NoServerAvailableException) do 86 | ActiveRecord::Base.connection 87 | end 88 | 89 | assert e.message =~ /NoActiveMasterException/ 90 | end 91 | 92 | def test_should_select_the_master_on_boot 93 | assert_equal $mysql_master, master_connection 94 | end 95 | 96 | def test_should_hold_txs_until_timeout_then_abort 97 | ActiveRecord::Base.connection 98 | 99 | $mysql_master.set_rw(false) 100 | start_time = Time.now.to_i 101 | assert_raises(ActiveRecord::ConnectionAdapters::MysqlFlexmasterAdapter::NoServerAvailableException) do 102 | User.create(:name => "foo") 103 | end 104 | end_time = Time.now.to_i 105 | assert end_time - start_time >= 5 106 | end 107 | 108 | def test_should_hold_txs_and_then_continue 109 | ActiveRecord::Base.connection 110 | $mysql_master.set_rw(false) 111 | Thread.new do 112 | sleep 1 113 | $mysql_slave.set_rw(true) 114 | end 115 | User.create(:name => "foo") 116 | assert_equal $mysql_slave, master_connection 117 | if ActiveRecord::VERSION::MAJOR >= 4 118 | assert User.where(:name => "foo").exists? 119 | else 120 | assert User.first(:conditions => { :name => "foo" }) 121 | end 122 | end 123 | 124 | def test_should_hold_implicit_txs_and_then_continue 125 | User.create!(:name => "foo") 126 | $mysql_master.set_rw(false) 127 | Thread.new do 128 | sleep 1 129 | $mysql_slave.set_rw(true) 130 | end 131 | User.update_all(:name => "bar") 132 | 133 | assert_equal $mysql_slave, master_connection 134 | 135 | assert_equal "bar", User.first.name 136 | end 137 | 138 | def test_should_let_in_flight_txs_crash 139 | User.transaction do 140 | $mysql_master.set_rw(false) 141 | assert_raises(ActiveRecord::StatementInvalid) do 142 | User.update_all(:name => "bar") 143 | end 144 | end 145 | end 146 | 147 | def test_should_eventually_pick_up_new_master_on_selects 148 | ActiveRecord::Base.connection 149 | $mysql_master.set_rw(false) 150 | $mysql_slave.set_rw(true) 151 | assert_equal $mysql_master, master_connection 152 | 100.times do 153 | User.first 154 | end 155 | assert_equal $mysql_slave, master_connection 156 | end 157 | 158 | # there's a small window in which the old master is read-only but the new slave hasn't come online yet. 159 | # Allow side-effect free statements to continue. 160 | def test_should_not_crash_selects_in_the_double_read_only_window 161 | ActiveRecord::Base.connection 162 | $mysql_master.set_rw(false) 163 | $mysql_slave.set_rw(false) 164 | assert_equal $mysql_master, master_connection 165 | 100.times do 166 | User.first 167 | end 168 | end 169 | 170 | def test_should_expose_the_current_master_and_port 171 | cx = ActiveRecord::Base.connection 172 | assert_equal "127.0.0.1", cx.current_host 173 | assert_equal $mysql_master.port, cx.current_port 174 | end 175 | 176 | def test_should_move_off_the_slave_after_it_becomes_master 177 | UserSlave.first 178 | User.create! 179 | $mysql_master.set_rw(false) 180 | $mysql_slave.set_rw(true) 181 | 182 | 20.times do 183 | UserSlave.connection.execute("select 1") 184 | end 185 | 186 | assert [$mysql_master, $mysql_slave_2].include?(slave_connection) 187 | end 188 | 189 | def test_xxx_non_responsive_master 190 | return if ENV['TRAVIS'] # something different about 127.0.0.2 in travis, I guess. 191 | ActiveRecord::Base.configurations["test"]["hosts"] << "127.0.0.2:1235" 192 | start_time = Time.now.to_i 193 | User.connection.reconnect! 194 | assert Time.now.to_i - start_time >= 5, "only took #{Time.now.to_i - start_time} to timeout" 195 | ensure 196 | ActiveRecord::Base.configurations["test"]["hosts"].pop 197 | end 198 | 199 | def test_limping_along_with_a_slave_acting_as_a_master 200 | User.create! 201 | $mysql_master.down! 202 | 203 | # the test here is that even though we've asserted we want the master, 204 | # since we're doing a SELECT we'll stay limping along by running the SELECT on a slave instead. 205 | User.first 206 | 207 | assert [$mysql_slave, $mysql_slave_2].include?(master_connection) 208 | ensure 209 | $mysql_master.up! 210 | end 211 | 212 | def test_recovering_after_losing_connection_to_the_master 213 | User.create! 214 | assert User.connection.instance_variable_get("@connection") 215 | 216 | $mysql_master.down! 217 | # trying to do an INSERT with the master down puts is into a precious state -- 218 | # we've got a nil @connection object. There's two possible solutions here; 219 | # 220 | # 1 - substitute a slave connection in for the master object but raise an exception anyway 221 | # 2 - deal with a nil connection object later 222 | # 223 | # opting for (2) now 224 | # 225 | assert_raises(ActiveRecord::ConnectionAdapters::MysqlFlexmasterAdapter::NoServerAvailableException) do 226 | User.create! 227 | end 228 | 229 | assert_nil User.connection.instance_variable_get("@connection") 230 | 231 | # this proxies to @connection and has been the cause of some crashes 232 | assert User.connection.quote("foo") 233 | ensure 234 | $mysql_master.up! 235 | end 236 | 237 | def test_quote_string_should_recover_connection 238 | User.create! 239 | assert User.connection.instance_variable_get("@connection") 240 | User.connection.instance_variable_set("@connection", nil) 241 | 242 | assert User.connection.quote_string("foo") 243 | end 244 | 245 | def test_recovering_after_the_master_is_back_up 246 | User.create! 247 | $mysql_master.down! 248 | 249 | assert_raises(ActiveRecord::ConnectionAdapters::MysqlFlexmasterAdapter::NoServerAvailableException) do 250 | User.create! 251 | end 252 | # bad state again. 253 | 254 | # now a dba or someone comes along and flips the read-only bit on the slave 255 | $mysql_slave.set_rw(true) 256 | User.create! 257 | UserSlave.first 258 | 259 | assert_equal $mysql_slave, master_connection 260 | ensure 261 | $mysql_master.up! 262 | end 263 | 264 | def test_losing_the_server_with_reconnect_on 265 | Reconnect.create! 266 | ReconnectSlave.first 267 | 268 | $mysql_master.down! 269 | 270 | assert Reconnect.first 271 | assert ReconnectSlave.first 272 | 273 | assert_raises(ActiveRecord::ConnectionAdapters::MysqlFlexmasterAdapter::NoServerAvailableException) do 274 | Reconnect.create! 275 | end 276 | 277 | $mysql_slave.set_rw(true) 278 | Reconnect.create! 279 | ReconnectSlave.first 280 | ensure 281 | $mysql_master.up! 282 | end 283 | 284 | # test that when nothing else is available we can fall back to the master in a slave role 285 | def test_master_can_act_as_slave 286 | $mysql_slave.down! 287 | $mysql_slave_2.down! 288 | 289 | UserSlave.first 290 | assert_equal $mysql_master, slave_connection 291 | ensure 292 | $mysql_slave.up! 293 | $mysql_slave_2.up! 294 | end 295 | 296 | def test_connection_multiple_attempts 297 | # We're simulating connection timeout, so mocha's Expectation#times doesn't register the calls 298 | attempts = 0 299 | null_logger = Logger.new('/dev/null') 300 | config = { hosts: ['localhost'], connection_timeout: 0.01, connection_attempts: 5 } 301 | 302 | Mysql2::Client.stubs(:new).with do 303 | attempts += 1 304 | sleep 1 305 | end 306 | assert_raises(ActiveRecord::ConnectionAdapters::MysqlFlexmasterAdapter::NoServerAvailableException) do 307 | ActiveRecord::ConnectionAdapters::MysqlFlexmasterAdapter.new(null_logger, config) 308 | end 309 | assert_equal 5, attempts 310 | end 311 | 312 | private 313 | 314 | def port_for_class(klass) 315 | klass.connection.execute("show global variables like 'port'").first.last.to_i 316 | end 317 | 318 | def main_connection_is_original_master? 319 | port = port_for_class(ActiveRecord::Base) 320 | port == $original_master_port 321 | end 322 | 323 | def connection_for_class(klass) 324 | port = port_for_class(klass) 325 | [$mysql_master, $mysql_slave, $mysql_slave_2].find { |cx| cx.port == port } 326 | end 327 | 328 | def master_connection 329 | connection_for_class(User) 330 | end 331 | 332 | def slave_connection 333 | connection_for_class(UserSlave) 334 | end 335 | end 336 | --------------------------------------------------------------------------------