├── LICENSE ├── NullEscape.c ├── README.md ├── archive_mysql_binlogs.py ├── backup_tester.py ├── binlog_rotator.py ├── check_mysql_replication.py ├── check_shard_mappings.py ├── clean_up_unfinished_migration.py ├── fence_server.py ├── fence_shutdown_mysql.py ├── find_gtid_for_timestamp.py ├── find_shard_mismatches.py ├── find_unused_db_servers.py ├── finish_shard_migration.py ├── fix_orphaned_shards.py ├── get_recent_checksums.py ├── kill_backups.py ├── launch_amazon_mysql_server.py ├── launch_replacement_db_host.py ├── lib ├── __init__.py ├── backup.py ├── host_utils.py ├── mysql_connect.py ├── mysql_lib.py └── timeout.py ├── maxwell-3306.conf ├── modify_mysql_zk.py ├── mysql_backup.py ├── mysql_backup_csv.py ├── mysql_backup_logical.py ├── mysql_backup_status.py ├── mysql_backup_xtrabackup.py ├── mysql_checksum.py ├── mysql_cli.py ├── mysql_cnf_builder.py ├── mysql_cnf_config ├── 5.5 ├── 5.6 ├── 5.7 ├── c3.8xlarge ├── default_my.cnf ├── i2.2xlarge ├── i2.4xlarge ├── maxwell.template ├── modsharddb ├── myzenfollower16db ├── phabricator ├── pinlatertestdb ├── pt_heartbeat.template ├── pt_kill.template ├── r3.2xlarge ├── r3.xlarge └── sharddb ├── mysql_failover.py ├── mysql_grants.py ├── mysql_init_server.py ├── mysql_record_table_size.py ├── mysql_replica_mappings.py ├── mysql_restore.py ├── mysql_shard_config.py ├── mysql_shard_mappings.py ├── mysql_shard_status.py ├── mysqld_multi ├── mysqlops.sql ├── other_slave_running_etl.py ├── pt-heartbeat.conf ├── pt-kill.conf ├── restart_daemons.py ├── retirement_queue.py ├── safe_uploader.py ├── safe_uploader_repeater.py ├── schema_verifier.py ├── start_shard_migration.py ├── tcollector └── mysql.py └── zdict_gen ├── README.md ├── test_zdict_freqs.py ├── test_zdict_gen.py ├── zdict_freqs.py └── zdict_gen.py /NullEscape.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | /* 5 | This program has been created to handle an incompatibility between how mysqldump escapes 6 | some characters and how hive interprets those escaped chars. It does the following: 7 | 8 | If you see an 0x5c30 in the input sequence 9 | 10 | a. and there is no or even number of 0x5c before 0x5c30, translate this 0x5c30 to 0x00 11 | 12 | b. if there is odd number of 0x5c before 0x5c30, don't do anything. 13 | 14 | Some sample transforms: 15 | 16 | 0x5c30 => 0x00 17 | 18 | 0x5c5c30 => 0x5c5c30 19 | 20 | 0x5c5c5c30 => 0x5c5c00 21 | 22 | 0x5c5c5c5c30 => 0x5c5c5c5c30 23 | 24 | 0x5c5c5c3030 => 0x5c5c0030 25 | 26 | 0x5c5c5c5c3030 => 0x5c5c5c5c3030 27 | 28 | 0x5c5c5c40 => 0x5c5c5c40 29 | 30 | 0x5c5c5c5c40 => 0x5c5c5c5c40 31 | 32 | Here is another way to test: 33 | 34 | - Create table with blob content: create table MyTest (id integer, value1 varchar(20), content blob, value2 double, primary key(id)); 35 | 36 | - Insert into blob content: insert into MyTest (id, value1, content, value2) values (1, "data1", 0x3020090d0a2227005c30, 2.2); 37 | 38 | - checking content: select hex(content) from MyTest; 39 | 40 | - chmod a+rw /tmp/dump 41 | 42 | - mysqldump -u root --tab=/tmp/dump --single-transaction -- create-options test 43 | 44 | - see content: hexdump /tmp/dump/MyTest.txt 45 | 46 | hexdump of original dump file: 47 | 48 | 0000000 31 09 64 61 74 61 31 09 30 20 5c 09 0d 5c 0a 22 49 | 50 | 0000010 27 5c 30 5c 5c 30 09 32 2e 32 0a 51 | 52 | 000001b 53 | 54 | 55 | hexdump after passing through this program: 56 | 57 | 0000000 31 09 64 61 74 61 31 09 30 20 5c 09 0d 5c 0a 22 58 | 59 | 0000010 27 00 5c 5c 30 09 32 2e 32 0a 60 | 61 | 000001a 62 | 63 | Author : vamsi Nov 2015 64 | 65 | */ 66 | 67 | #define FALSE 0 68 | #define TRUE 1 69 | 70 | #define RBUFFERLEN 65536 71 | 72 | char bufferR[RBUFFERLEN]; 73 | int ibufferR = 0; /* index in bufferR */ 74 | int nbufferR = 0; /* number of valid chars in bufferR */ 75 | 76 | 77 | #define WBUFFERLEN 65536 78 | 79 | char bufferW[WBUFFERLEN]; 80 | int ibufferW = 0; /* index in bufferW upto which we wrote */ 81 | 82 | /* wrapper for efficieny. Returns if eof reached. If not, puts next char in *addrc */ 83 | int getcharWrapper(char *addrc) { 84 | int error = 0; 85 | 86 | if ((nbufferR <= ibufferR) && !feof(stdin)) { 87 | /* we used up what we read earlier */ 88 | 89 | nbufferR = fread(bufferR, 1, RBUFFERLEN, stdin); 90 | ibufferR = 0; 91 | 92 | if ((nbufferR != RBUFFERLEN) && !feof(stdin)) { 93 | error = ferror(stdin); 94 | fprintf(stderr, "Read failed with error %d\n", error); 95 | exit(1); 96 | } 97 | } 98 | 99 | if (nbufferR <= ibufferR) { 100 | return TRUE; 101 | } 102 | 103 | *addrc = bufferR[ibufferR]; 104 | ibufferR++; 105 | return FALSE; 106 | } 107 | 108 | void flush() { 109 | int error = 0; 110 | int written = 0; 111 | written = fwrite(bufferW, 1, ibufferW, stdout); 112 | 113 | if (written != ibufferW) { 114 | error = ferror(stdout); 115 | fprintf(stderr, "Write failed with error %d\n", error); 116 | exit(2); 117 | } 118 | 119 | ibufferW = 0; 120 | } 121 | 122 | /* wrapper to buffer */ 123 | void putcharWrapper(char c) { 124 | int error = 0; 125 | int written = 0; 126 | 127 | if (ibufferW >= WBUFFERLEN) { 128 | /* buffer full */ 129 | flush(); 130 | } 131 | 132 | if (ibufferW >= WBUFFERLEN) { 133 | fprintf(stderr, "Buffer full even after flush %d\n", error); 134 | exit(3); 135 | } 136 | 137 | bufferW[ibufferW] = c; 138 | ibufferW++; 139 | } 140 | 141 | int main(int argc, char** argv) { 142 | char c; 143 | int eof = FALSE; 144 | while (!(eof = getcharWrapper(&c))) { 145 | if (c != 0x5c) { 146 | putcharWrapper(c); 147 | continue; 148 | } 149 | 150 | /* 151 | * if we reach here, we have one outstanding 0x5c that we have not yet put in output. 152 | * let us count consecutive 0x5c that we see excluding the outstanding one. 153 | */ 154 | long count = 0; 155 | 156 | /* we found 0x5c. Keep reading until we get EOF or something other than 0x5c */ 157 | while (!(eof = getcharWrapper(&c))) { 158 | if (c == 0x30) { 159 | if (count % 2 == 0) { 160 | /* we saw 0 or even number of 0x5c before 0x5c30 */ 161 | putcharWrapper(0x00); 162 | break; 163 | } else { 164 | /* 165 | * we saw odd number of 0x5c before 0x5c30. put the outstanding 0c5c in the output, 166 | * and then 0x30 167 | */ 168 | putcharWrapper(0x5c); 169 | putcharWrapper(0x30); 170 | break; 171 | } 172 | } else if (c == 0x5c) { 173 | putcharWrapper(0x5c); 174 | count++; 175 | } else { 176 | /* put the outstanding 0x5c and the char we just read in output */ 177 | putcharWrapper(0x5c); 178 | putcharWrapper(c); 179 | break; 180 | } 181 | } 182 | 183 | if (eof) { 184 | /* put the outstanding 0x5c */ 185 | putcharWrapper(0x5c); 186 | flush(); 187 | fflush(stdout); 188 | return 0; 189 | } 190 | 191 | /* if we reach here we should not have any outstanding 0x5c. just continbue reading. */ 192 | } 193 | 194 | flush(); 195 | fflush(stdout); 196 | } 197 | -------------------------------------------------------------------------------- /archive_mysql_binlogs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import datetime 4 | import os 5 | import boto 6 | import boto.s3.key 7 | import logging 8 | import subprocess 9 | import time 10 | import traceback 11 | 12 | import binlog_rotator 13 | import safe_uploader 14 | from lib import host_utils 15 | from lib import mysql_lib 16 | from lib import environment_specific 17 | 18 | BINLOG_ARCHIVING_TABLE = """CREATE TABLE IF NOT EXISTS {db}.{tbl} ( 19 | `hostname` varchar(90) NOT NULL, 20 | `port` int(11) NOT NULL, 21 | `binlog` varchar(90) NOT NULL, 22 | `binlog_creation` datetime NULL, 23 | `uploaded` datetime NOT NULL, 24 | PRIMARY KEY (`binlog`), 25 | INDEX `instance` (`hostname`, `port`), 26 | INDEX `uploaded` (`uploaded`), 27 | INDEX `binlog_creation` (`binlog_creation`) 28 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1""" 29 | BINLOG_S3_BASE_DIR = 'binlogs' 30 | STANDARD_RETENTION_BINLOG_S3_DIR = 'standard_retention' 31 | BINLOG_LOCK_SOCKET = 'archivebinlogs' 32 | BINLOG_INFINITE_REPEATER_TERM_FILE = '/tmp/archive_mysql_binlogs_infinite.die' 33 | MAX_ERRORS = 5 34 | TMP_DIR = '/tmp/' 35 | 36 | log = logging.getLogger(__name__) 37 | 38 | 39 | def main(): 40 | parser = argparse.ArgumentParser(description='Upload binlogs to s3') 41 | parser.add_argument('-p', 42 | '--port', 43 | help='Port of instance to backup. Default is 3306', 44 | default=3306) 45 | parser.add_argument('--dry_run', 46 | help='Do not upload binlogs, just display output', 47 | default=False, 48 | action='store_true') 49 | args = parser.parse_args() 50 | archive_mysql_binlogs(args.port, args.dry_run) 51 | 52 | 53 | def archive_mysql_binlogs(port, dry_run): 54 | """ Flush logs and upload all binary logs that don't exist to s3 55 | 56 | Arguments: 57 | port - Port of the MySQL instance on which to act 58 | dry_run - Display output but do not uplad 59 | """ 60 | binlog_rotator.rotate_binlogs_if_needed(port, dry_run) 61 | zk = host_utils.MysqlZookeeper() 62 | instance = host_utils.HostAddr(':'.join((host_utils.HOSTNAME, 63 | str(port)))) 64 | 65 | if zk.get_replica_set_from_instance(instance) is None: 66 | log.info('Instance is not in production, exiting') 67 | return 68 | 69 | ensure_binlog_archiving_table_sanity(instance) 70 | log.info('Taking binlog archiver lock') 71 | lock_handle = host_utils.bind_lock_socket(BINLOG_LOCK_SOCKET) 72 | log_bin_dir = host_utils.get_cnf_setting('log_bin', port) 73 | bin_logs = mysql_lib.get_master_logs(instance) 74 | logged_uploads = get_logged_binlog_uploads(instance) 75 | for binlog in bin_logs[:-1]: 76 | err_count = 0 77 | local_file = os.path.join(os.path.dirname(log_bin_dir), 78 | binlog['Log_name']) 79 | if already_uploaded(instance, local_file, logged_uploads): 80 | continue 81 | success = False 82 | while not success: 83 | try: 84 | upload_binlog(instance, local_file, dry_run) 85 | success = True 86 | except: 87 | if err_count > MAX_ERRORS: 88 | log.error('Error count in thread > MAX_THREAD_ERROR. ' 89 | 'Aborting :(') 90 | raise 91 | 92 | log.error('error: {e}'.format(e=traceback.format_exc())) 93 | err_count = err_count + 1 94 | time.sleep(err_count * 2) 95 | 96 | host_utils.release_lock_socket(lock_handle) 97 | log.info('Archiving complete') 98 | 99 | 100 | def already_uploaded(instance, binlog, logged_uploads): 101 | """ Check to see if a binlog has already been uploaded 102 | 103 | Args: 104 | instance - a hostAddr object 105 | binlog - the full path to the binlog file 106 | logged_uploads - a set of all uploaded binlogs for this instance 107 | 108 | Returns True if already uploaded, False otherwise. 109 | """ 110 | if os.path.basename(binlog) in logged_uploads: 111 | log.debug('Binlog already logged as uploaded') 112 | return True 113 | 114 | # we should hit this code rarely, only when uploads have not been logged 115 | boto_conn = boto.connect_s3() 116 | bucket = boto_conn.get_bucket(environment_specific.BACKUP_BUCKET_UPLOAD_MAP[host_utils.get_iam_role()], 117 | validate=False) 118 | if bucket.get_key(s3_binlog_path(instance, os.path.basename((binlog)))): 119 | log.debug("Binlog already uploaded but not logged {b}".format(b=binlog)) 120 | log_binlog_upload(instance, binlog) 121 | return True 122 | 123 | return False 124 | 125 | 126 | def upload_binlog(instance, binlog, dry_run): 127 | """ Upload a binlog file to s3 128 | 129 | Args: 130 | instance - a hostAddr object 131 | binlog - the full path to the binlog file 132 | dry_run - if set, do not actually upload a binlog 133 | """ 134 | s3_upload_path = s3_binlog_path(instance, binlog) 135 | bucket = environment_specific.BACKUP_BUCKET_UPLOAD_MAP[host_utils.get_iam_role()] 136 | 137 | if dry_run: 138 | log.info('In dry_run mode, skipping compression and upload') 139 | return 140 | 141 | procs = dict() 142 | procs['lzop'] = subprocess.Popen(['lzop', binlog, '--to-stdout'], 143 | stdout=subprocess.PIPE) 144 | safe_uploader.safe_upload(precursor_procs=procs, 145 | stdin=procs['lzop'].stdout, 146 | bucket=bucket, 147 | key=s3_upload_path, 148 | verbose=True) 149 | log_binlog_upload(instance, binlog) 150 | 151 | 152 | def log_binlog_upload(instance, binlog): 153 | """ Log to the master that a binlog has been uploaded 154 | 155 | Args: 156 | instance - a hostAddr object 157 | binlog - the full path to the binlog file 158 | """ 159 | zk = host_utils.MysqlZookeeper() 160 | binlog_creation = datetime.datetime.fromtimestamp(os.stat(binlog).st_atime) 161 | replica_set = zk.get_replica_set_from_instance(instance) 162 | master = zk.get_mysql_instance_from_replica_set(replica_set) 163 | conn = mysql_lib.connect_mysql(master, 'dbascript') 164 | cursor = conn.cursor() 165 | sql = ("REPLACE INTO {metadata_db}.{tbl} " 166 | "SET hostname = %(hostname)s, " 167 | " port = %(port)s, " 168 | " binlog = %(binlog)s, " 169 | " binlog_creation = %(binlog_creation)s, " 170 | " uploaded = NOW() ").format(metadata_db=mysql_lib.METADATA_DB, 171 | tbl=environment_specific.BINLOG_ARCHIVING_TABLE_NAME) 172 | metadata = {'hostname': instance.hostname, 173 | 'port': str(instance.port), 174 | 'binlog': os.path.basename(binlog), 175 | 'binlog_creation': binlog_creation} 176 | cursor.execute(sql, metadata) 177 | conn.commit() 178 | 179 | 180 | def get_logged_binlog_uploads(instance): 181 | """ Get all binlogs that have been logged as uploaded 182 | 183 | Args: 184 | instance - a hostAddr object to run against and check 185 | 186 | Returns: 187 | A set of binlog file names 188 | """ 189 | conn = mysql_lib.connect_mysql(instance, 'dbascript') 190 | cursor = conn.cursor() 191 | sql = ("SELECT binlog " 192 | "FROM {metadata_db}.{tbl} " 193 | "WHERE hostname = %(hostname)s AND " 194 | " port = %(port)s " 195 | "".format(metadata_db=mysql_lib.METADATA_DB, 196 | tbl=environment_specific.BINLOG_ARCHIVING_TABLE_NAME)) 197 | cursor.execute(sql, {'hostname': instance.hostname, 198 | 'port': str(instance.port)}) 199 | ret = set() 200 | for binlog in cursor.fetchall(): 201 | ret.add(binlog['binlog']) 202 | 203 | return ret 204 | 205 | 206 | def ensure_binlog_archiving_table_sanity(instance): 207 | """ Create binlog archiving log table if missing, purge old data 208 | 209 | Args: 210 | instance - A hostAddr object. Note: this function will find the master of 211 | the instance if the instance is not a master 212 | """ 213 | zk = host_utils.MysqlZookeeper() 214 | replica_set = zk.get_replica_set_from_instance(instance) 215 | master = zk.get_mysql_instance_from_replica_set(replica_set) 216 | conn = mysql_lib.connect_mysql(master, 'dbascript') 217 | cursor = conn.cursor() 218 | if not mysql_lib.does_table_exist(master, mysql_lib.METADATA_DB, 219 | environment_specific.BINLOG_ARCHIVING_TABLE_NAME): 220 | log.debug('Creating missing metadata table') 221 | cursor.execute(BINLOG_ARCHIVING_TABLE.format( 222 | db=mysql_lib.METADATA_DB, 223 | tbl=environment_specific.BINLOG_ARCHIVING_TABLE_NAME)) 224 | sql = ("DELETE FROM {metadata_db}.{tbl} " 225 | "WHERE binlog_creation < now() - INTERVAL {d} DAY" 226 | "").format(metadata_db=mysql_lib.METADATA_DB, 227 | tbl=environment_specific.BINLOG_ARCHIVING_TABLE_NAME, 228 | d=(environment_specific.S3_BINLOG_RETENTION+1)) 229 | log.info(sql) 230 | cursor.execute(sql) 231 | conn.commit() 232 | 233 | 234 | def s3_binlog_path(instance, binlog): 235 | """ Determine the path in s3 for a binlog 236 | 237 | Args: 238 | instance - A hostAddr instance 239 | binlog - A binlog filename 240 | 241 | Returns: 242 | A path in S3 where the file should be stored. 243 | """ 244 | # At some point in the near future we will probably use reduced 245 | # retention for pinlater 246 | return os.path.join(BINLOG_S3_BASE_DIR, 247 | STANDARD_RETENTION_BINLOG_S3_DIR, 248 | instance.hostname_prefix, 249 | instance.hostname, 250 | str(instance.port), 251 | ''.join((os.path.basename(binlog), 252 | '.lzo'))) 253 | 254 | 255 | if __name__ == "__main__": 256 | environment_specific.initialize_logger() 257 | main() 258 | -------------------------------------------------------------------------------- /backup_tester.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import logging 4 | import multiprocessing 5 | import pprint 6 | 7 | from lib import backup 8 | from lib import environment_specific 9 | from lib import host_utils 10 | import launch_replacement_db_host 11 | 12 | log = logging.getLogger(__name__) 13 | 14 | AGE_START_TESTING = 55 15 | AGE_ALARM = 60 16 | MAX_LAUNCHED = 10 17 | 18 | 19 | def main(): 20 | parser = argparse.ArgumentParser() 21 | parser.add_argument('--dry_run', 22 | help=('Do not actually launch servers'), 23 | default=False, 24 | action='store_true') 25 | args = parser.parse_args() 26 | launch_restores_as_needed(dry_run=args.dry_run) 27 | 28 | 29 | def launch_restores_as_needed(dry_run=True): 30 | """ Launch a bunch of hosts to test restore process 31 | 32 | Args: 33 | dry_run - Don't actully launch hosts 34 | """ 35 | zk = host_utils.MysqlZookeeper() 36 | launched = 0 37 | pool = multiprocessing.Pool(processes=multiprocessing.cpu_count()) 38 | results = pool.map(backup.get_age_last_restore, 39 | zk.get_all_mysql_replica_sets()) 40 | restore_age = dict() 41 | for result in results: 42 | if not result: 43 | continue 44 | if result[0] not in restore_age: 45 | restore_age[result[0]] = set() 46 | restore_age[result[0]].add(result[1]) 47 | 48 | launched = 0 49 | min_launches = min_test_launches() 50 | log.info('Current restore age: {}' 51 | ''.format(pprint.pformat(restore_age))) 52 | for days in sorted(restore_age.keys(), reverse=True): 53 | for replica_set in restore_age[days]: 54 | launch = False 55 | if launched > MAX_LAUNCHED: 56 | raise Exception('Cowardly refusing to consider launching ' 57 | 'servers as we have launched {launched} which ' 58 | 'is greater than the limit of {max_launched}' 59 | ''.format(launched=launched, 60 | max_launched=MAX_LAUNCHED)) 61 | elif days > AGE_START_TESTING: 62 | launch = True 63 | log.info('Will replace a host in {rs} as days since last restore ' 64 | 'is {days} days and we will always launch after ' 65 | '{always} days' 66 | ''.format(rs=replica_set, 67 | days=days, 68 | always=AGE_START_TESTING)) 69 | elif launched < min_launches: 70 | launch = True 71 | log.info('Will replace a host in {rs} as launched ' 72 | '{launched} < min {min}' 73 | ''.format(rs=replica_set, 74 | launched=launched, 75 | min=min_launches)) 76 | 77 | if launch: 78 | launched = launched + 1 79 | if not dry_run: 80 | try: 81 | launch_a_slave_replacement(replica_set) 82 | except Exception as e: 83 | log.error('Could not launch replacement due to error: ' 84 | '{e}'.format(e=e)) 85 | 86 | 87 | def launch_a_slave_replacement(replica_set): 88 | """ Choose a slave to replace and launch it 89 | 90 | Args: 91 | replica - A MySQL replica set 92 | """ 93 | zk = host_utils.MysqlZookeeper() 94 | instance = zk.get_mysql_instance_from_replica_set(replica_set, 95 | host_utils.REPLICA_ROLE_DR_SLAVE) 96 | if not instance: 97 | instance = zk.get_mysql_instance_from_replica_set(replica_set, 98 | host_utils.REPLICA_ROLE_SLAVE) 99 | launch_replacement_db_host.launch_replacement_db_host(instance, 100 | reason='restore age', 101 | replace_again=True) 102 | 103 | 104 | def min_test_launches(): 105 | """ Figure out what is the least number of test launches we should run 106 | 107 | Returns an int of the most test launches we should run 108 | """ 109 | zk = host_utils.MysqlZookeeper() 110 | # So the idea here is that often an upgrade will cause a large burst of 111 | # replacements which will then potentially cause not many servers to be 112 | # launched for a while. This will smooth out the number of services launch. 113 | return len(zk.get_all_mysql_replica_sets()) / AGE_ALARM 114 | 115 | if __name__ == "__main__": 116 | environment_specific.initialize_logger() 117 | main() 118 | -------------------------------------------------------------------------------- /binlog_rotator.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import datetime 4 | import logging 5 | import os 6 | 7 | from lib import environment_specific 8 | from lib import host_utils 9 | from lib import mysql_lib 10 | 11 | MAX_AGE = 600 12 | 13 | log = logging.getLogger(__name__) 14 | 15 | def main(): 16 | parser = argparse.ArgumentParser(description='Binlog rotator') 17 | parser.add_argument('-p', 18 | '--port', 19 | help='Port of instance to backup. Default is 3306', 20 | default=3306) 21 | parser.add_argument('--dry_run', 22 | help='Do not upload binlogs, just display output', 23 | default=False, 24 | action='store_true') 25 | args = parser.parse_args() 26 | rotate_binlogs_if_needed(args.port, args.dry_run) 27 | 28 | 29 | def rotate_binlogs_if_needed(port, dry_run): 30 | instance = host_utils.HostAddr(':'.join((host_utils.HOSTNAME, 31 | str(port)))) 32 | log_bin_dir = host_utils.get_cnf_setting('log_bin', port) 33 | binlog = os.path.join(os.path.dirname(log_bin_dir), 34 | mysql_lib.get_master_status(instance)['File']) 35 | # We don't update access time, so this is creation time. 36 | creation = datetime.datetime.fromtimestamp(os.stat(binlog).st_atime) 37 | age = (datetime.datetime.utcnow() - creation).seconds 38 | if age > MAX_AGE: 39 | log.info('Age of current binlog is {age} which is greater than ' 40 | 'MAX_AGE ({MAX_AGE})'.format(age=age, 41 | MAX_AGE=MAX_AGE)) 42 | if not dry_run: 43 | log.info('Flushing bin log') 44 | mysql_lib.flush_master_log(instance) 45 | else: 46 | log.info('Age of current binlog is {age} which is less than ' 47 | 'MAX_AGE ({MAX_AGE})'.format(age=age, 48 | MAX_AGE=MAX_AGE)) 49 | 50 | 51 | if __name__ == "__main__": 52 | environment_specific.initialize_logger() 53 | main() 54 | -------------------------------------------------------------------------------- /check_mysql_replication.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | 4 | from lib import host_utils 5 | from lib import mysql_lib 6 | 7 | 8 | def main(): 9 | parser = argparse.ArgumentParser(description='MySQL replication checker') 10 | parser.add_argument('replica', 11 | help='Replica MySQL instance to sanity check ' 12 | 'hostname[:port]') 13 | parser.add_argument('-w', 14 | '--watch_for_catch_up', 15 | help='Watch replication for catch up ', 16 | default=False, 17 | action='store_true') 18 | args = parser.parse_args() 19 | slave_hostaddr = host_utils.HostAddr(args.replica) 20 | 21 | if args.watch_for_catch_up: 22 | mysql_lib.wait_for_catch_up(slave_hostaddr) 23 | else: 24 | ret = mysql_lib.calc_slave_lag(slave_hostaddr) 25 | print "Heartbeat_seconds_behind: {sbm}".format(sbm=ret['sbm']) 26 | print "Slave_IO_Running: {Slave_IO_Running} ".format(Slave_IO_Running=ret['ss']['Slave_IO_Running']) 27 | print "IO_lag_bytes: {io_bytes}".format(io_bytes=ret['io_bytes']) 28 | print "IO_lag_binlogs: {io_binlogs}".format(io_binlogs=ret['io_binlogs']) 29 | print "Slave_SQL_Running: {Slave_IO_Running} ".format(Slave_IO_Running=ret['ss']['Slave_SQL_Running']) 30 | print "SQL_lag_bytes: {sql_bytes}".format(sql_bytes=ret['sql_bytes']) 31 | print "SQL_lag_binlogs: {sql_binlogs}".format(sql_binlogs=ret['sql_binlogs']) 32 | 33 | 34 | if __name__ == "__main__": 35 | main() 36 | -------------------------------------------------------------------------------- /check_shard_mappings.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import json 4 | 5 | from lib import host_utils 6 | from lib import mysql_lib 7 | 8 | from lib.environment_specific import TEST_MYSQL_SHARDS_CONFIG_LOCAL_PATH 9 | from lib.environment_specific import MYSQL_SHARDS_CONFIG_LOCAL_PATH 10 | 11 | DEFAULT = 'default' 12 | OUTPUT_FORMAT = ('{replica_set:/dev/null'] 109 | 110 | pipeline = list() 111 | pipeline.append(dict(args=' '.join(binlog_cmd), shell=True)) 112 | pipeline.append(dict(args='/bin/egrep created', shell=True)) 113 | procs = pipe_runner(pipeline) 114 | results = pipe_wait(procs) 115 | 116 | try: 117 | (date, time) = results.split()[-2:] 118 | timestamp = dt.datetime.strptime('{} {}'.format(date, time), 119 | BINLOG_DT_FORMAT) 120 | return timestamp 121 | except Exception as e: 122 | log.error("Invalid value/format for binlog create time: {}".format(e)) 123 | raise 124 | 125 | 126 | def check_one_binlog(timestamp, binlog_file, instance, username, password): 127 | """ See if there are any GTIDs in the supplied binlog 128 | that match the timestamp we're looking for. 129 | 130 | Args: 131 | timestamp: the timestamp to look for 132 | binlog_file: the binlog file 133 | instance: a hostaddr object 134 | username: the username to connect as 135 | password: the password to connect as 136 | Returns: 137 | A GTID if we've found one matching our timestamp 138 | or None if we haven't. 139 | """ 140 | ts_minus_one = (timestamp - DELTA_ONE_SECOND).strftime(MYSQL_DT_FORMAT) 141 | ts_plus_one = (timestamp + DELTA_ONE_SECOND).strftime(MYSQL_DT_FORMAT) 142 | 143 | binlog_cmd = ['/usr/bin/mysqlbinlog', 144 | '--read-from-remote-master=BINLOG-DUMP-GTIDS', 145 | '--host={}'.format(instance.hostname), 146 | '--user={}'.format(username), 147 | '--password={}'.format(password), 148 | '--start-datetime="{}"'.format(ts_minus_one), 149 | '--stop-datetime="{}"'.format(ts_plus_one), 150 | binlog_file, '2>/dev/null'] 151 | 152 | log.debug(' '.join(binlog_cmd)) 153 | 154 | pipeline = list() 155 | pipeline.append(dict(args=' '.join(binlog_cmd), shell=True)) 156 | pipeline.append(dict(args='/bin/egrep -A1 GTID.*commit', shell=True)) 157 | pipeline.append(dict(args='/bin/egrep GTID_NEXT', shell=True)) 158 | pipeline.append(dict(args='head -1', shell=True)) 159 | 160 | procs = pipe_runner(pipeline) 161 | results = pipe_wait(procs) 162 | if results: 163 | return results.split("'")[1] 164 | 165 | 166 | def find_gtid_for_timestamp(instance, timestamp): 167 | """ Find the GTID for the supplied timestamp on the specified 168 | instance. 169 | 170 | Args: 171 | instance: a HostAddr object 172 | timestamp: the timestamp to search for 173 | Returns: 174 | If the instance doesn't support GTID, return None. 175 | If no GTID was found in the binlogs for the supplied 176 | timestamp, return a blank string. 177 | Otherwise, return a GTID. 178 | """ 179 | vars = mysql_lib.get_global_variables(instance) 180 | 181 | # we are not generating GTIDs / no GTID support 182 | if vars['gtid_mode'] == 'OFF' or vars['gtid_deployment_step'] == 'ON': 183 | log.warning('This replica set does not currently support GTID') 184 | return None 185 | 186 | # go in reverse order, because odds are that the log we want 187 | # is closer to the end than the beginning. 188 | master_logs = list(reversed(mysql_lib.get_master_logs(instance))) 189 | 190 | (username, password) = mysql_lib.get_mysql_user_for_role('replication') 191 | for binlog in master_logs: 192 | # if the timestamp we want is prior to the first entry in the 193 | # binlog, it can't possibly be in there. 194 | log_start = get_binlog_start(binlog['Log_name'], instance, username, 195 | password) 196 | if timestamp < log_start: 197 | log.debug('Skipping binlog {bl} because desired {ts} < ' 198 | '{ls}'.format(bl=binlog['Log_name'], ts=timestamp, 199 | ls=log_start)) 200 | continue 201 | 202 | # The binlog that we end up checking, if we check one at all, 203 | # is the first one that could possibly contain our GTID, so 204 | # if it isn't in this one, we're not going to find anything. 205 | log.debug('Checking for matching GTID in {}'.format(binlog['Log_name'])) 206 | gtid = check_one_binlog(timestamp, binlog['Log_name'], 207 | instance, username, password) 208 | if gtid: 209 | return gtid 210 | else: 211 | break 212 | 213 | log.warning("No matching GTID was found for that timestamp.") 214 | return '' 215 | 216 | 217 | def main(): 218 | parser = argparse.ArgumentParser(description=DESCRIPTION) 219 | parser.add_argument('-i', 220 | '--instance', 221 | help='The instance to query. This should ' 222 | 'be the master of a replica set, but ' 223 | 'if you supply a non-master, the script ' 224 | 'will query the master anyway.') 225 | parser.add_argument('timestamp', 226 | help='The timestamp to rewind to. This must ' 227 | 'be in MySQL format: YYYY-MM-DD HH:MM:SS') 228 | args = parser.parse_args() 229 | try: 230 | instance = host_utils.HostAddr(args.instance) 231 | zk = host_utils.MysqlZookeeper() 232 | rt = zk.get_replica_type_from_instance(instance) 233 | if rt != host_utils.REPLICA_ROLE_MASTER: 234 | instance = zk.get_mysql_instance_from_replica_set( 235 | zk.get_replica_set_from_instance(instance), 236 | host_utils.REPLICA_ROLE_MASTER) 237 | log.info('Detected master of {i} as {m}'.format(i=args.instance, 238 | m=instance)) 239 | timestamp = dt.datetime.strptime(args.timestamp, MYSQL_DT_FORMAT) 240 | except Exception as e: 241 | log.error("Error in argument parsing: {}".format(e)) 242 | 243 | gtid = find_gtid_for_timestamp(instance, timestamp) 244 | if gtid: 245 | print gtid 246 | else: 247 | sys.exit(255) 248 | 249 | 250 | if __name__ == "__main__": 251 | environment_specific.initialize_logger() 252 | main() 253 | -------------------------------------------------------------------------------- /find_shard_mismatches.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import logging 4 | 5 | from lib import environment_specific 6 | from lib import host_utils 7 | from lib import mysql_lib 8 | 9 | log = logging.getLogger(__name__) 10 | 11 | 12 | def main(): 13 | description = ("MySQL orpahned shard detector\n\n" 14 | "This utility will attempt to find orphaned databases " 15 | "across sharded MySQL systems") 16 | 17 | parser = argparse.ArgumentParser(description=description, 18 | formatter_class=argparse.RawTextHelpFormatter) 19 | parser.add_argument('-i', 20 | '--instance', 21 | help='Check a single instance rather than all', 22 | default=False) 23 | args = parser.parse_args() 24 | 25 | if args.instance: 26 | instance = host_utils.HostAddr(args.instance) 27 | else: 28 | instance = False 29 | 30 | orphaned, orphaned_but_used, missing = find_shard_mismatches(instance) 31 | 32 | for o in orphaned: 33 | log.info('Orphan dbs: {host} {dbs}'.format( 34 | host=o, dbs=','.join(orphaned[o]))) 35 | 36 | for obu in orphaned_but_used: 37 | log.info('Orphan, but still used, dbs: {host} {dbs}'.format( 38 | host=obu, dbs=','.join(orphaned_but_used[obu]))) 39 | 40 | for m in missing: 41 | log.info('Missing dbs:{host} {dbs}'.format( 42 | host=m, dbs=','.join(missing[m]))) 43 | 44 | if not (orphaned or orphaned_but_used or missing): 45 | log.info('No problems found!') 46 | 47 | 48 | def find_shard_mismatches(instance=False): 49 | """ Find shards that are missing or unexpected in a sharded dataset 50 | 51 | Args: 52 | instance - If supplied, only check this instance. 53 | 54 | Returns: 55 | orphaned - A dict of unexpected and (according to table statistics) 56 | unused dbs. Key is master instance, value is a set. 57 | orphaned_but_used - A dict of unexpected and but used dbs. 58 | Data structure is the same as orphaned. 59 | missing - A dict of expected but missing dbs. 60 | Data structure is the same as orphaned. 61 | """ 62 | orphaned = dict() 63 | orphaned_but_used = dict() 64 | missing_dbs = dict() 65 | 66 | zk = host_utils.MysqlZookeeper() 67 | rs_dbs_map = zk.get_sharded_dbs_by_replica_set() 68 | 69 | if instance: 70 | rs = zk.get_replica_set_from_instance(instance) 71 | rs_dbs_map = {rs: rs_dbs_map[rs]} 72 | 73 | for rs in rs_dbs_map: 74 | # non-sharded replica sets 75 | if not len(rs_dbs_map[rs]): 76 | continue 77 | 78 | expected_dbs = rs_dbs_map[rs] 79 | instance = zk.get_mysql_instance_from_replica_set(rs) 80 | 81 | activity = mysql_lib.get_dbs_activity(instance) 82 | actual_dbs = mysql_lib.get_dbs(instance) 83 | unexpected_dbs = actual_dbs.difference(expected_dbs) 84 | missing = expected_dbs.difference(actual_dbs) 85 | if missing: 86 | missing_dbs[instance] = expected_dbs.difference(actual_dbs) 87 | 88 | for db in unexpected_dbs: 89 | if activity[db]['ROWS_CHANGED'] != 0: 90 | if instance not in orphaned_but_used: 91 | orphaned_but_used[instance] = set() 92 | orphaned_but_used[instance].add(db) 93 | else: 94 | if instance not in orphaned: 95 | orphaned[instance] = set() 96 | orphaned[instance].add(db) 97 | 98 | return orphaned, orphaned_but_used, missing_dbs 99 | 100 | 101 | if __name__ == "__main__": 102 | environment_specific.initialize_logger() 103 | main() 104 | -------------------------------------------------------------------------------- /find_unused_db_servers.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import re 4 | import datetime 5 | 6 | import boto.utils 7 | 8 | from lib import environment_specific 9 | from lib import host_utils 10 | import retirement_queue 11 | 12 | 13 | def main(): 14 | parser = argparse.ArgumentParser() 15 | parser.add_argument('-a', 16 | '--add_retirement_queue', 17 | help="Add the servers to the retirement queue", 18 | action='store_true') 19 | args = parser.parse_args() 20 | 21 | hosts_not_in_zk = find_unused_db_servers() 22 | for host in sorted(hosts_not_in_zk): 23 | if args.add_retirement_queue: 24 | retirement_queue.add_to_queue(hostname=host, dry_run=False) 25 | else: 26 | print host 27 | 28 | 29 | def get_db_host_prefix(hostname): 30 | """ This function finds the host prefix for a db host 31 | 32 | Argument: 33 | hostname - a hostname 34 | 35 | Returns: 36 | a prefix of the hostname 37 | """ 38 | prefix_match = re.match('(.+db)', hostname) 39 | if prefix_match is None: 40 | prefix_match = re.match('([a-z]+)', hostname) 41 | if prefix_match is None: 42 | prefix = None 43 | else: 44 | prefix = prefix_match.group(0) 45 | return prefix 46 | 47 | 48 | def find_unused_db_servers(): 49 | """ Compare zk and AWS to determine which servers are likely not in use 50 | 51 | Returns: 52 | A set of hosts that appear to not be in use 53 | """ 54 | 55 | # First find out what servers we know about from zk, and make a 56 | # of hostname prefixes that we think we own. 57 | zk = host_utils.MysqlZookeeper() 58 | config = zk.get_all_mysql_instances() 59 | zk_servers = set() 60 | zk_prefixes = set() 61 | mysql_aws_hosts = set() 62 | for db in config: 63 | host = db.hostname 64 | zk_servers.add(host) 65 | prefix = get_db_host_prefix(host) 66 | zk_prefixes.add(prefix) 67 | 68 | cmdb_servers = environment_specific.get_all_server_metadata() 69 | for host in cmdb_servers: 70 | match = False 71 | for prefix in zk_prefixes: 72 | if host.startswith(prefix): 73 | match = True 74 | if not match: 75 | continue 76 | 77 | # We need to give servers a chance to build and then add themselves 78 | # to zk, so we will ignore server for a week. 79 | creation = boto.utils.parse_ts(cmdb_servers[host]['launch_time']) 80 | if creation < datetime.datetime.now()-datetime.timedelta(weeks=1): 81 | mysql_aws_hosts.add(host) 82 | 83 | hosts_not_in_zk = mysql_aws_hosts.difference(zk_servers) 84 | hosts_not_protected = hosts_not_in_zk.difference(retirement_queue.get_protected_hosts('set')) 85 | return hosts_not_protected 86 | 87 | if __name__ == "__main__": 88 | main() 89 | -------------------------------------------------------------------------------- /fix_orphaned_shards.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import logging 4 | import sys 5 | 6 | import find_shard_mismatches 7 | from lib import environment_specific 8 | from lib import host_utils 9 | from lib import mysql_lib 10 | 11 | 12 | DB_PREPEND = 'dropme_' 13 | log = logging.getLogger(__name__) 14 | 15 | 16 | def main(): 17 | action_desc = """Action description 18 | 19 | rename - after checking no recent changes and shard not in zk, 20 | create a db with the old name appended to 'dropme_'. Then 21 | copy all tables to the new db 22 | revert_rename - Copy all tables back from a 'dropme_' to their original table 23 | drop - This should be run a few days after a rename. Drop the empty original 24 | db, and drop the 'dropme_' db. 25 | """ 26 | 27 | parser = argparse.ArgumentParser( 28 | description='MySQL shard cleanup utility', 29 | epilog=action_desc, 30 | formatter_class=argparse.RawDescriptionHelpFormatter) 31 | 32 | parser.add_argument('-i', 33 | '--instance', 34 | help='Instance to act on if other than localhost:3306', 35 | default=':'.join((host_utils.HOSTNAME, '3306'))) 36 | parser.add_argument('-a', 37 | '--action', 38 | choices=('rename', 39 | 'revert_rename', 40 | 'drop'), 41 | required=True) 42 | parser.add_argument('-d', 43 | '--dbs', 44 | help="Comma separated list of dbs to act upon") 45 | parser.add_argument('--dry_run', 46 | help="Do not change any state", 47 | default=False, 48 | action='store_true') 49 | 50 | args = parser.parse_args() 51 | dbs = set(args.dbs.split(',')) if args.dbs else None 52 | instance = host_utils.HostAddr(args.instance) 53 | 54 | if args.action == 'rename': 55 | rename_db_to_drop(instance, dbs, args.dry_run) 56 | elif args.action == 'revert_rename': 57 | for db in dbs: 58 | mysql_lib.move_db_contents(instance, 59 | old_db=''.join((DB_PREPEND, db)), 60 | new_db=db, 61 | dry_run=args.dry_run) 62 | elif args.action == 'drop': 63 | drop_db_after_rename(instance, dbs, args.dry_run) 64 | 65 | 66 | def rename_db_to_drop(instance, dbs=None, dry_run=False, skip_check=False): 67 | """ Create a new empty db and move the contents of the original db 68 | into it 69 | 70 | Args: 71 | instance - a hostaddr object 72 | dbs - a set of database names 73 | dry_run - bool, will make no changes to anything 74 | skip_check - Do not verify that db is not in production 75 | """ 76 | 77 | orphaned, _, _ = find_shard_mismatches.find_shard_mismatches(instance) 78 | if not dbs: 79 | if instance not in orphaned: 80 | log.info("No orphaned shards, returning now.") 81 | return 82 | 83 | dbs = orphaned[instance] 84 | log.info('Detected orphaned shareds: {}'.format(dbs)) 85 | 86 | if not skip_check: 87 | # confirm db is not in ZK and not in use. 88 | if not orphaned: 89 | log.info("No orphans detected, returning now.") 90 | return 91 | 92 | instance_orphans = orphaned[instance] 93 | unexpected = dbs.difference(instance_orphans) 94 | if unexpected: 95 | raise Exception('Cowardly refusing to act on the following' 96 | 'dbs: {}'.format(unexpected)) 97 | 98 | # confirm that renames would not be blocked by an existing table 99 | conn = mysql_lib.connect_mysql(instance) 100 | 101 | cursor = conn.cursor() 102 | for db in dbs: 103 | # already dealt with 104 | if db.startswith(DB_PREPEND): 105 | continue 106 | 107 | renamed_db = ''.join((DB_PREPEND, db)) 108 | sql = ''.join(("SELECT CONCAT(t2.TABLE_SCHEMA, \n", 109 | " '.', t2.TABLE_NAME) as tbl \n", 110 | "FROM information_schema.tables t1 \n", 111 | "INNER JOIN information_schema.tables t2 \n", 112 | " USING(TABLE_NAME) \n", 113 | "WHERE t1.TABLE_SCHEMA = %(old_db)s AND \n" 114 | " t2.TABLE_SCHEMA = %(new_db)s;")) 115 | 116 | params = {'old_db': db, 117 | 'new_db': renamed_db} 118 | cursor = conn.cursor() 119 | cursor.execute(sql, params) 120 | dups = cursor.fetchall() 121 | 122 | if dups: 123 | for dup in dups: 124 | log.error('Table rename blocked by {}'.format(dup['tbl'])) 125 | sys.exit(1) 126 | 127 | # We should be safe to create the new db and rename 128 | if not dry_run: 129 | mysql_lib.create_db(instance, renamed_db) 130 | mysql_lib.move_db_contents(instance, 131 | old_db=db, 132 | new_db=renamed_db, 133 | dry_run=dry_run) 134 | 135 | if dbs and not dry_run: 136 | log.info('To finish cleanup, wait a bit and then run:') 137 | log.info('/usr/local/bin/mysql_utils/fix_orphaned_shards.py -a' 138 | 'drop -i {}'.format(instance)) 139 | 140 | 141 | def drop_db_after_rename(instance, dbs=None, dry_run=False): 142 | """ Drop the original empty db and a non-empty rename db 143 | 144 | Args: 145 | instance - a hostaddr object 146 | dbs - a set of database names 147 | dry_run - bool, will make no changes to the servers 148 | """ 149 | if not dbs: 150 | dbs = set() 151 | for db in mysql_lib.get_dbs(instance): 152 | if db.startswith(DB_PREPEND): 153 | dbs.add(db[len(DB_PREPEND):]) 154 | 155 | # confirm db is not in zk and not in use 156 | orphaned, _, _ = find_shard_mismatches.find_shard_mismatches(instance) 157 | instance_orphans = orphaned[instance] 158 | unexpected = dbs.difference(instance_orphans) 159 | if unexpected: 160 | raise Exception('Cowardly refusing to act on the following ' 161 | 'dbs: {}'.format(unexpected)) 162 | 163 | # make sure the original db is empty 164 | for db in dbs: 165 | if mysql_lib.get_tables(instance, db): 166 | raise Exception('Cowardly refusing to drop non-empty ' 167 | 'db: {}'.format(db)) 168 | 169 | for db in dbs: 170 | renamed_db = ''.join((DB_PREPEND, db)) 171 | if dry_run: 172 | log.info('dry_run is enabled, not dropping ' 173 | 'dbs: {db} {renamed}'.format(db=db, renamed=renamed_db)) 174 | else: 175 | mysql_lib.drop_db(instance, db) 176 | mysql_lib.drop_db(instance, renamed_db) 177 | 178 | 179 | if __name__ == "__main__": 180 | environment_specific.initialize_logger() 181 | main() 182 | -------------------------------------------------------------------------------- /get_recent_checksums.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import socket 4 | import sys 5 | from lib import host_utils 6 | from lib import mysql_lib 7 | 8 | LINE_TEMPLATE = ('{master_instance: padding[key]: 92 | line_length += len(str(checksum[key])) - padding[key] 93 | padding[key] = len(str(checksum[key])) 94 | 95 | # regenerate the output template based on padding. 96 | format_str = LINE_TEMPLATE.replace( 97 | 'MSPC', str(padding['master_instance'] + 3)).replace( 98 | 'RSPC', str(padding['instance'] + 3)).replace( 99 | 'DBSPC', str(padding['db'] + 3)).replace( 100 | 'TSPC', str(padding['tbl'] + 3)).replace( 101 | 'RCSPC', str(padding['row_count'] + 3)).replace( 102 | 'DCSPC', str(padding['row_diffs'] + 3)) 103 | 104 | return format_str, line_length 105 | 106 | 107 | def get_checksums(instance, db=False): 108 | """ Get recent mysql replication checksums 109 | 110 | Args: 111 | instance - a hostaddr object for what server to pull results for 112 | db - a string of a data to for which to restrict results 113 | 114 | Returns: 115 | A list of dicts from a select * on the relevant rows 116 | """ 117 | 118 | vars_for_query = dict() 119 | vars_for_query['instance'] = instance 120 | 121 | zk = host_utils.MysqlZookeeper() 122 | host_shard_map = zk.get_host_shard_map() 123 | 124 | # extra SQL if this is a sharded data set. 125 | SHARD_DB_IN_SQL = ' AND db in ({sp}) ' 126 | 127 | if db is False: 128 | cnt = 0 129 | shard_param_set = set() 130 | try: 131 | for entry in host_shard_map[instance.__str__()]: 132 | key = ''.join(('shard', str(cnt))) 133 | vars_for_query[key] = entry 134 | shard_param_set.add(key) 135 | cnt += 1 136 | shard_param = ''.join(('%(', 137 | ')s,%('.join(shard_param_set), 138 | ')s')) 139 | except KeyError: 140 | # if this is not a sharded data set, don't use this. 141 | shard_param = None 142 | 143 | else: 144 | shard_param = '%(shard1)s' 145 | vars_for_query['shard1'] = db 146 | 147 | # connect to the instance we care about and get some data. 148 | conn = mysql_lib.connect_mysql(instance, 'dbascript') 149 | 150 | # We only care about the most recent checksum 151 | cursor = conn.cursor() 152 | 153 | sql_base = ("SELECT detail.master_instance, " 154 | " detail.instance, " 155 | " detail.db, " 156 | " detail.tbl, " 157 | " detail.reported_at, " 158 | " detail.checksum_status, " 159 | " detail.rows_checked, " 160 | " detail.row_count, " 161 | " detail.row_diffs " 162 | "FROM " 163 | " (SELECT master_instance," 164 | " instance, " 165 | " db, " 166 | " tbl, " 167 | " MAX(reported_at) AS reported_at " 168 | " FROM test.checksum_detail " 169 | " WHERE master_instance=%(instance)s " 170 | " {in_db}" 171 | " GROUP BY 1,2,3,4 " 172 | " ) AS most_recent " 173 | "JOIN test.checksum_detail AS detail " 174 | "USING(master_instance, instance, db, " 175 | "tbl, reported_at) ") 176 | 177 | # and then fill in the variables. 178 | if shard_param: 179 | sql = sql_base.format(in_db=SHARD_DB_IN_SQL.format(sp=shard_param)) 180 | else: 181 | sql = sql_base.format(in_db='') 182 | 183 | cursor.execute(sql, vars_for_query) 184 | checksums = cursor.fetchall() 185 | return checksums 186 | 187 | 188 | if __name__ == "__main__": 189 | main() 190 | -------------------------------------------------------------------------------- /kill_backups.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import logging 3 | import os 4 | import subprocess 5 | 6 | from lib import backup 7 | from lib import host_utils 8 | from lib import mysql_lib 9 | from lib import environment_specific 10 | 11 | TOUCH_STOP_KILLING = '/etc/mysql/no_backup_killing' 12 | 13 | log = logging.getLogger(__name__) 14 | 15 | 16 | def main(): 17 | instance = host_utils.HostAddr(host_utils.HOSTNAME) 18 | if os.path.isfile(TOUCH_STOP_KILLING): 19 | log.info('Found {path}. Will not kill backups.\n' 20 | 'Exiting now.'.format(path=TOUCH_STOP_KILLING)) 21 | return 22 | kill_mysql_backup(instance) 23 | kill_xtrabackup() 24 | 25 | 26 | def kill_mysql_backup(instance): 27 | """ Kill sql, csv and xtrabackup backups 28 | 29 | Args: 30 | instance - Instance to kill backups, does not apply to csv or sql 31 | """ 32 | (username, _) = mysql_lib.get_mysql_user_for_role(backup.USER_ROLE_MYSQLDUMP) 33 | mysql_lib.kill_user_queries(instance, username) 34 | kill_xtrabackup() 35 | 36 | 37 | def kill_xtrabackup(): 38 | """ Kill any running xtrabackup processes """ 39 | subprocess.Popen('pkill -f xtrabackup', shell=True).wait() 40 | subprocess.Popen('pkill -f gof3r', shell=True).wait() 41 | 42 | 43 | if __name__ == "__main__": 44 | environment_specific.initialize_logger() 45 | main() 46 | -------------------------------------------------------------------------------- /launch_amazon_mysql_server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import inspect 4 | 5 | import boto.ec2 6 | 7 | import launch_replacement_db_host 8 | from lib import host_utils 9 | from lib import mysql_lib 10 | from lib import environment_specific 11 | 12 | log = environment_specific.setup_logging_defaults(__name__) 13 | 14 | 15 | def main(): 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument('--hostname', 18 | required=True) 19 | parser.add_argument('--instance_type', 20 | choices=sorted(environment_specific.SUPPORTED_HARDWARE, 21 | reverse=True), 22 | required=True) 23 | parser.add_argument('--vpc_security_group', 24 | default=None, 25 | choices=environment_specific.VPC_SECURITY_GROUPS.keys()) 26 | parser.add_argument('--availability_zone', 27 | choices=environment_specific.SUPPORTED_AZ, 28 | required=True) 29 | parser.add_argument('--ssh_group', 30 | default=None, 31 | choices=environment_specific.SSH_IAM_MAPPING.keys()) 32 | parser.add_argument('--mysql_major_version', 33 | choices=environment_specific.SUPPORTED_MYSQL_MAJOR_VERSIONS, 34 | default=launch_replacement_db_host.DEFAULT_MYSQL_MAJOR_VERSION, 35 | help='Default: {}'.format(launch_replacement_db_host.DEFAULT_MYSQL_MAJOR_VERSION)) 36 | parser.add_argument('--mysql_minor_version', 37 | choices=environment_specific.SUPPORTED_MYSQL_MINOR_VERSIONS, 38 | default=launch_replacement_db_host.DEFAULT_MYSQL_MINOR_VERSION, 39 | help='Default: {}'.format(launch_replacement_db_host.DEFAULT_MYSQL_MINOR_VERSION)) 40 | parser.add_argument('--os_flavor', 41 | choices=environment_specific.SUPPORTED_OS_FLAVORS, 42 | default=launch_replacement_db_host.DEFAULT_OS_FLAVOR, 43 | help='Default: {}'.format(launch_replacement_db_host.DEFAULT_OS_FLAVOR)) 44 | parser.add_argument('--dry_run', 45 | help=('Do not actually launch an instance, just show ' 46 | 'the intended configuration'), 47 | default=False, 48 | action='store_true') 49 | 50 | args = parser.parse_args() 51 | 52 | launch_amazon_mysql_server(hostname=args.hostname, 53 | instance_type=args.instance_type, 54 | vpc_security_group=args.vpc_security_group, 55 | availability_zone=args.availability_zone, 56 | ssh_group=args.ssh_group, 57 | mysql_major_version=args.mysql_major_version, 58 | mysql_minor_version=args.mysql_minor_version, 59 | os_flavor=args.os_flavor, 60 | dry_run=args.dry_run) 61 | 62 | 63 | def launch_amazon_mysql_server(hostname, instance_type, vpc_security_group, 64 | availability_zone, ssh_group, mysql_major_version, 65 | mysql_minor_version, os_flavor, dry_run, 66 | skip_name_check=False): 67 | """ Launch a mysql server in aws 68 | 69 | Args: 70 | hostname - hostname of new server 71 | instance_type - hardware type 72 | vpc_security_group - VPC firewall rules. 73 | availability_zone - AWS availability zone 74 | ssh_group - What IAM/SSH zone to use 75 | mysql_major_version - MySQL major version. Example 5.5 or 5.6 76 | mysql_minor_version - Which "branch" to use. Values are 'stable', 'staging' 77 | and 'latest'. 78 | os_flavor - Which OS to target - 'precise' or 'trusty' at the moment 79 | dry_run - Do not actually launch a host, just show the expected config. 80 | skip_name_check - Do not check if a hostname has already been used or log 81 | usage. The assumption is the caller has already done this 82 | 83 | Returns: 84 | An amazon instance id. 85 | """ 86 | args, _, _, values = inspect.getargvalues(inspect.currentframe()) 87 | for param in args: 88 | log.info("Requested {param} = {value}".format(param=param, 89 | value=values[param])) 90 | 91 | if host_utils.get_security_role() not in environment_specific.ROLE_TO_LAUNCH_INSTANCE: 92 | raise Exception(environment_specific.ROLE_ERROR_MSG) 93 | 94 | config = {'key_name': environment_specific.PEM_KEY, 95 | 'placement': availability_zone, 96 | 'instance_profile_name': environment_specific.INSTANCE_PROFILE_NAME, 97 | 'image_id': environment_specific.SUPPORTED_HARDWARE[instance_type]['ami'][os_flavor], 98 | 'instance_type': instance_type} 99 | 100 | (subnet_name, config['subnet_id']) = get_subnet_from_sg(vpc_security_group, 101 | availability_zone) 102 | 103 | ssh_security = environment_specific.SSH_SECURITY_MAP[subnet_name]['ssh'] 104 | config['instance_profile_name'] = environment_specific.SSH_SECURITY_MAP[subnet_name]['iam'] 105 | config['security_group_ids'] = [environment_specific.VPC_SECURITY_GROUPS[vpc_security_group]] 106 | 107 | if ssh_group: 108 | if ssh_group >= ssh_security and ssh_group in environment_specific.SSH_IAM_MAPPING.keys(): 109 | ssh_security = ssh_group 110 | config['instance_profile_name'] = environment_specific.SSH_IAM_MAPPING[ssh_group] 111 | else: 112 | raise Exception("We are not allowed to provision a host in {0} env " 113 | "with a weaker access policy than {1} it's existing or default " 114 | "config".format(ssh_group, ssh_security)) 115 | 116 | hiera_config = environment_specific.HIERA_FORMAT.format( 117 | ssh_security=ssh_security, 118 | mysql_major_version=mysql_major_version.replace('.', ''), 119 | mysql_minor_version=mysql_minor_version) 120 | 121 | if hiera_config not in environment_specific.SUPPORTED_HIERA_CONFIGS: 122 | raise Exception('Hiera config {hiera_config} is not supported.' 123 | 'Supported configs are: {supported}' 124 | ''.format(hiera_config=hiera_config, 125 | supported=environment_specific.SUPPORTED_HIERA_CONFIGS)) 126 | 127 | config['user_data'] = ('#cloud-config\n' 128 | 'pinfo_team: {pinfo_team}\n' 129 | 'pinfo_env: {pinfo_env}\n' 130 | 'pinfo_role: {hiera_config}\n' 131 | 'hostname: {hostname}\n' 132 | 'raid: true\n' 133 | 'raid_fs: xfs\n' 134 | 'raid_mount: {raid_mount}' 135 | ''.format(pinfo_team=environment_specific.PINFO_TEAM, 136 | pinfo_env=environment_specific.PINFO_ENV, 137 | raid_mount=environment_specific.RAID_MOUNT, 138 | hiera_config=hiera_config, 139 | hostname=hostname)) 140 | 141 | log.info('Config for new server:\n{config}'.format(config=config)) 142 | conn = mysql_lib.get_mysqlops_connections() 143 | if not skip_name_check and not launch_replacement_db_host.is_hostname_new(hostname, conn): 144 | raise Exception('Hostname {hostname} has already been used!' 145 | ''.format(hostname=hostname)) 146 | if dry_run: 147 | log.info('In dry run mode, returning now') 148 | return 149 | else: 150 | conn = boto.ec2.connect_to_region(environment_specific.EC2_REGION) 151 | instance_id = conn.run_instances(**config).instances[0].id 152 | log.info('Launched instance {id}'.format(id=instance_id)) 153 | return instance_id 154 | 155 | 156 | def get_subnet_from_sg(sg, az): 157 | """ Given a VPC security group and a availiability zone 158 | return a subnet 159 | 160 | Args: 161 | sg - A security group 162 | az - An availibilty zone 163 | 164 | Returns - An AWS subnet 165 | """ 166 | vpc_subnet = None 167 | for entry in environment_specific.VPC_SUBNET_SG_MAP.keys(): 168 | if sg in environment_specific.VPC_SUBNET_SG_MAP[entry]: 169 | vpc_subnet = entry 170 | 171 | if not vpc_subnet: 172 | raise Exception('Could not determine subnet for sg:{sg}'.format(sg=sg)) 173 | vpc_az_subnet = environment_specific.VPC_AZ_SUBNET_MAP[vpc_subnet][az] 174 | 175 | log.info('Will use subnet "{vpc_az_subnet}" in "{vpc_subnet}" based upon ' 176 | 'security group {sg} and availibility zone {az}' 177 | ''.format(vpc_az_subnet=vpc_az_subnet, 178 | vpc_subnet=vpc_subnet, 179 | sg=sg, 180 | az=az)) 181 | return (vpc_subnet, vpc_az_subnet) 182 | 183 | if __name__ == "__main__": 184 | main() 185 | -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pinterest/mysql_utils/7ab237699b85de8b503b09f36e0309ac807689fe/lib/__init__.py -------------------------------------------------------------------------------- /lib/mysql_connect.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import json 4 | 5 | AUTH_FILE = '/var/config/config.services.mysql_auth' 6 | MYSQL_DS_ZK = '/var/config/config.services.dataservices.mysql_databases' 7 | MYSQL_GEN_ZK = '/var/config/config.services.general_mysql_databases_config' 8 | MASTER = 'master' 9 | SLAVE = 'slave' 10 | DR_SLAVE = 'dr_slave' 11 | REPLICA_ROLES = [MASTER, SLAVE, DR_SLAVE] 12 | 13 | 14 | def main(): 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument('replica_set_name', 17 | help='The replica_set to pull hostname/port/username/password') 18 | parser.add_argument('user_role', 19 | help=('Which user role to pull username and password for.' 20 | 'Default is scriptro/scriptrw'), 21 | default=None, 22 | nargs='?') 23 | parser.add_argument('--writeable', 24 | help='Should the user have a writeable connection?', 25 | default=False, 26 | action='store_true') 27 | parser.add_argument('--replica_set_role', 28 | help='Pull the hostname/password for a server other than the master', 29 | default=None, 30 | choices=REPLICA_ROLES) 31 | args = parser.parse_args() 32 | 33 | ret = get_mysql_connection(args.replica_set_name, writeable=args.writeable, 34 | user_role=args.user_role, replica_set_role=args.replica_set_role) 35 | print "{hostname} {port} {username} {password}".format(hostname=ret[0], 36 | port=ret[1], 37 | username=ret[2], 38 | password=ret[3]) 39 | 40 | 41 | def get_mysql_connection(replica_set_name, writeable=False, 42 | user_role=None, replica_set_role=None): 43 | """ Get MySQL connection information. This code also exists in 44 | the wiki and is copied numberous places across the pinterest code base. 45 | 46 | Args: 47 | replica_set_name - The name of a replica set in zk, ie db00047 48 | or blackopsdb001. 49 | writeable - If the connection should be writeable. 50 | user_role - A named user role to pull. If this is supplied, writeable 51 | is not respected. 52 | replica_set_role - Default role is master, can also be slave or dr_slave. 53 | If this is supplied, writeable is not respected. 54 | 55 | Returns: 56 | hostname - (str) The master host of the named replica set 57 | port - The port of the named replica set. Please do not assume 3306. 58 | username - The MySQL username to be used. 59 | password - The password that corrosponds to the username 60 | """ 61 | hostname = None 62 | port = None 63 | password = None 64 | username = None 65 | 66 | if user_role is None: 67 | if (replica_set_role == MASTER or writeable is True): 68 | user_role = 'scriptrw' 69 | else: 70 | user_role = 'scriptro' 71 | 72 | if replica_set_role is None: 73 | replica_set_role = MASTER 74 | 75 | with open(MYSQL_DS_ZK) as f: 76 | ds = json.loads(f.read()) 77 | 78 | for entry in ds.iteritems(): 79 | if replica_set_name == entry[0]: 80 | hostname = entry[1][replica_set_role]['host'] 81 | port = entry[1][replica_set_role]['port'] 82 | 83 | if hostname is None or port is None: 84 | with open(MYSQL_GEN_ZK) as f: 85 | gen = json.loads(f.read()) 86 | 87 | for entry in gen.iteritems(): 88 | if replica_set_name == entry[0]: 89 | hostname = entry[1][replica_set_role]['host'] 90 | port = entry[1][replica_set_role]['port'] 91 | 92 | if hostname is None or port is None: 93 | err = ("Replica set '{rs}' does not exist in zk" 94 | ''.format(rs=replica_set_name)) 95 | raise NameError(err) 96 | 97 | with open(AUTH_FILE) as f: 98 | grants = json.loads(f.read()) 99 | 100 | for entry in grants.iteritems(): 101 | if user_role == entry[0]: 102 | for user in entry[1]['users']: 103 | if user['enabled'] is True: 104 | username = user['username'] 105 | password = user['password'] 106 | 107 | if username is None or password is None: 108 | err = ("Userrole '{role}' does not exist in zk" 109 | ''.format(role=user_role)) 110 | raise NameError(err) 111 | return hostname, port, username, password 112 | 113 | 114 | if __name__ == "__main__": 115 | main() 116 | -------------------------------------------------------------------------------- /lib/timeout.py: -------------------------------------------------------------------------------- 1 | import signal 2 | from contextlib import contextmanager 3 | 4 | 5 | @contextmanager 6 | def timeout(seconds): 7 | """ Wrapper for signals handling a timeout for being 8 | used as a decorator. """ 9 | def timeout_handler(signum, frame): 10 | pass 11 | 12 | original_handler = signal.signal(signal.SIGALRM, timeout_handler) 13 | 14 | try: 15 | signal.alarm(seconds) 16 | yield 17 | finally: 18 | signal.alarm(0) 19 | signal.signal(signal.SIGALRM, original_handler) 20 | -------------------------------------------------------------------------------- /maxwell-3306.conf: -------------------------------------------------------------------------------- 1 | description "Maxwell binlog-to-kafka bridge" 2 | 3 | start on filesystem and net-device-up IFACE=lo 4 | respawn limit 15 5 5 | 6 | env CONF="/etc/mysql/maxwell-3306.conf" 7 | 8 | pre-start script 9 | [ -r $CONF ] || { 10 | echo "I can't find or read $CONF">&2 11 | exit 1 12 | } 13 | end script 14 | 15 | console log 16 | chdir /usr/local/bin/maxwell/maxwell-1.8.1 17 | 18 | script 19 | echo $$ > /var/run/maxwell-3306.pid 20 | exec /usr/local/bin/maxwell/maxwell-1.8.1/bin/maxwell --config /etc/mysql/maxwell-3306.conf 21 | end script 22 | 23 | post-stop script 24 | rm -f /var/run/maxwell-3306.pid 25 | end script 26 | -------------------------------------------------------------------------------- /modify_mysql_zk.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | import argparse 3 | import copy 4 | import pprint 5 | import simplejson 6 | 7 | from lib import host_utils 8 | from lib import mysql_lib 9 | from lib import environment_specific 10 | 11 | log = environment_specific.setup_logging_defaults(__name__) 12 | chat_handler = environment_specific.BufferingChatHandler() 13 | log.addHandler(chat_handler) 14 | 15 | 16 | def main(): 17 | parser = argparse.ArgumentParser() 18 | parser.add_argument('action', 19 | help=("What modification to make. If 'auto', the host " 20 | "replacement log will be used to determine what " 21 | "what role to use. Default is auto."), 22 | choices=['add_slave', 'add_dr_slave', 23 | 'swap_master_and_slave', 24 | 'swap_slave_and_dr_slave'], 25 | default='auto') 26 | parser.add_argument('instance', 27 | help='What instance to act upon') 28 | parser.add_argument('--dry_run', 29 | help=('Do not actually modify zk, just show ' 30 | 'what would be modify'), 31 | default=False, 32 | action='store_true') 33 | parser.add_argument('--dangerous', 34 | help=('If you need to swap_master_and_slave in zk' 35 | 'outside of the failover script, that is ' 36 | 'dangerous and you will need this flag.'), 37 | default=False, 38 | action='store_true') 39 | args = parser.parse_args() 40 | action = args.action 41 | instance = host_utils.HostAddr(args.instance) 42 | 43 | if args.dry_run: 44 | log.removeHandler(chat_handler) 45 | 46 | if action == 'add_slave': 47 | add_replica_to_zk(instance, host_utils.REPLICA_ROLE_SLAVE, 48 | args.dry_run) 49 | elif action == 'add_dr_slave': 50 | add_replica_to_zk(instance, host_utils.REPLICA_ROLE_DR_SLAVE, 51 | args.dry_run) 52 | elif action == 'swap_master_and_slave': 53 | if args.dangerous: 54 | swap_master_and_slave(instance, args.dry_run) 55 | else: 56 | raise Exception('To swap_master_and_slave in zk outside of the ' 57 | 'failover script is very dangerous and the ' 58 | '--dangerous flag was not supplied.') 59 | elif action == 'swap_slave_and_dr_slave': 60 | swap_slave_and_dr_slave(instance, args.dry_run) 61 | else: 62 | raise Exception('Invalid action: {action}'.format(action=action)) 63 | 64 | 65 | def auto_add_instance_to_zk(port, dry_run): 66 | """ Try to do right thing in adding a server to zk 67 | 68 | Args: 69 | port - The port of replacement instance on localhost 70 | dry_run - If set, do not modify zk 71 | """ 72 | instance = host_utils.HostAddr(':'.join([host_utils.HOSTNAME, str(port)])) 73 | try: 74 | conn = mysql_lib.get_mysqlops_connections() 75 | log.info('Determining replacement for port {}'.format(port)) 76 | instance_id = host_utils.get_local_instance_id() 77 | role = determine_replacement_role(conn, instance_id) 78 | log.info('Adding server as role: {role}'.format(role=role)) 79 | except Exception, e: 80 | log.exception(e) 81 | raise 82 | add_replica_to_zk(instance, role, dry_run) 83 | 84 | if not dry_run: 85 | log.info('Updating host_replacement_log') 86 | update_host_replacement_log(conn, instance_id) 87 | 88 | 89 | def determine_replacement_role(conn, instance_id): 90 | """ Try to determine the role an instance should be placed into 91 | 92 | Args: 93 | conn - A connection to the reporting server 94 | instance - The replacement instance 95 | 96 | Returns: 97 | The replication role which should be either 'slave' or 'dr_slave' 98 | """ 99 | zk = host_utils.MysqlZookeeper() 100 | cursor = conn.cursor() 101 | sql = ("SELECT old_host " 102 | "FROM mysqlops.host_replacement_log " 103 | "WHERE new_instance = %(new_instance)s ") 104 | params = {'new_instance': instance_id} 105 | cursor.execute(sql, params) 106 | log.info(cursor._executed) 107 | result = cursor.fetchone() 108 | if result is None: 109 | raise Exception('Could not determine replacement host') 110 | 111 | old_host = host_utils.HostAddr(result['old_host']) 112 | log.info('Host to be replaced is {old_host}' 113 | ''.format(old_host=old_host.hostname)) 114 | 115 | repl_type = zk.get_replica_type_from_instance(old_host) 116 | 117 | if repl_type == host_utils.REPLICA_ROLE_MASTER: 118 | raise Exception('Corwardly refusing to replace a master!') 119 | elif repl_type is None: 120 | raise Exception('Could not determine replacement role') 121 | else: 122 | return repl_type 123 | 124 | 125 | def get_zk_node_for_replica_set(kazoo_client, replica_set): 126 | """ Figure out what node holds the configuration of a replica set 127 | 128 | Args: 129 | kazoo_client - A kazoo_client 130 | replica_set - A name for a replica set 131 | 132 | Returns: 133 | zk_node - The node that holds the replica set 134 | parsed_data - The deserialized data from json in the node 135 | """ 136 | for zk_node in [environment_specific.DS_ZK, environment_specific.GEN_ZK]: 137 | znode_data, meta = kazoo_client.get(zk_node) 138 | parsed_data = simplejson.loads(znode_data) 139 | if replica_set in parsed_data: 140 | return (zk_node, parsed_data, meta.version) 141 | raise Exception('Could not find replica_set {replica_set} ' 142 | 'in zk_nodes'.format(replica_set=replica_set)) 143 | 144 | 145 | def remove_auth(zk_record): 146 | """ Remove passwords from zk records 147 | 148 | Args: 149 | zk_record - A dict which may or not have a passwd or userfield. 150 | 151 | Returns: 152 | A dict which if a passwd or user field is present will have the 153 | values redacted 154 | """ 155 | ret = copy.deepcopy(zk_record) 156 | if 'passwd' in ret: 157 | ret['passwd'] = 'REDACTED' 158 | 159 | if 'user' in ret: 160 | ret['user'] = 'REDACTED' 161 | 162 | return ret 163 | 164 | 165 | def add_replica_to_zk(instance, replica_type, dry_run): 166 | """ Add a replica to zk 167 | 168 | Args: 169 | instance - A hostaddr object of the replica to add to zk 170 | replica_type - Either 'slave' or 'dr_slave'. 171 | dry_run - If set, do not modify zk 172 | """ 173 | try: 174 | if replica_type not in [host_utils.REPLICA_ROLE_DR_SLAVE, 175 | host_utils.REPLICA_ROLE_SLAVE]: 176 | raise Exception('Invalid value "{}" for argument ' 177 | "replica_type").format(replica_type) 178 | 179 | log.info('Instance is {}'.format(instance)) 180 | mysql_lib.assert_replication_sanity(instance) 181 | mysql_lib.assert_replication_unlagged( 182 | instance, 183 | mysql_lib.REPLICATION_TOLERANCE_NORMAL) 184 | master = mysql_lib.get_master_from_instance(instance) 185 | 186 | zk_local = host_utils.MysqlZookeeper() 187 | kazoo_client = environment_specific.get_kazoo_client() 188 | if not kazoo_client: 189 | raise Exception('Could not get a zk connection') 190 | 191 | if master not in zk_local.get_all_mysql_instances_by_type( 192 | host_utils.REPLICA_ROLE_MASTER): 193 | raise Exception('Instance {} is not a master in zk' 194 | ''.format(master)) 195 | 196 | log.info('Detected master of {instance} ' 197 | 'as {master}'.format(instance=instance, 198 | master=master)) 199 | 200 | replica_set = zk_local.get_replica_set_from_instance(master) 201 | log.info('Detected replica_set as {}'.format(replica_set)) 202 | old_instance = zk_local.get_mysql_instance_from_replica_set( 203 | replica_set, 204 | repl_type=replica_type) 205 | 206 | if replica_type == host_utils.REPLICA_ROLE_SLAVE: 207 | (zk_node, 208 | parsed_data, version) = get_zk_node_for_replica_set(kazoo_client, 209 | replica_set) 210 | log.info('Replica set {replica_set} is held in zk_node ' 211 | '{zk_node}'.format(zk_node=zk_node, 212 | replica_set=replica_set)) 213 | log.info('Existing config:') 214 | log.info(pprint.pformat(remove_auth(parsed_data[replica_set]))) 215 | new_data = copy.deepcopy(parsed_data) 216 | new_data[replica_set][host_utils.REPLICA_ROLE_SLAVE]['host'] = \ 217 | instance.hostname 218 | new_data[replica_set][host_utils.REPLICA_ROLE_SLAVE]['port'] = \ 219 | instance.port 220 | log.info('New config:') 221 | log.info(pprint.pformat(remove_auth(new_data[replica_set]))) 222 | 223 | if new_data == parsed_data: 224 | raise Exception('No change would be made to zk, ' 225 | 'will not write new config') 226 | elif dry_run: 227 | log.info('dry_run is set, therefore not modifying zk') 228 | else: 229 | log.info('Pushing new configuration for ' 230 | '{replica_set}:'.format(replica_set=replica_set)) 231 | kazoo_client.set(zk_node, simplejson.dumps(new_data), version) 232 | elif replica_type == host_utils.REPLICA_ROLE_DR_SLAVE: 233 | znode_data, dr_meta = kazoo_client.get(environment_specific.DR_ZK) 234 | parsed_data = simplejson.loads(znode_data) 235 | new_data = copy.deepcopy(parsed_data) 236 | if replica_set in parsed_data: 237 | log.info('Existing dr config:') 238 | log.info(pprint.pformat(remove_auth(parsed_data[replica_set]))) 239 | else: 240 | log.info('Replica set did not previously have a dr slave') 241 | 242 | new_data[replica_set] = \ 243 | {host_utils.REPLICA_ROLE_DR_SLAVE: {'host': instance.hostname, 244 | 'port': instance.port}} 245 | log.info('New dr config:') 246 | log.info(pprint.pformat(remove_auth(new_data[replica_set]))) 247 | 248 | if new_data == parsed_data: 249 | raise Exception('No change would be made to zk, ' 250 | 'will not write new config') 251 | elif dry_run: 252 | log.info('dry_run is set, therefore not modifying zk') 253 | else: 254 | log.info('Pushing new dr configuration for ' 255 | '{replica_set}:'.format(replica_set=replica_set)) 256 | kazoo_client.set(environment_specific.DR_ZK, 257 | simplejson.dumps(new_data), dr_meta.version) 258 | else: 259 | # we should raise an exception above rather than getting to here 260 | pass 261 | if not dry_run: 262 | log.info('Stopping replication and event scheduler on {} ' 263 | 'being taken out of use'.format(old_instance)) 264 | try: 265 | mysql_lib.stop_replication(old_instance) 266 | mysql_lib.stop_event_scheduler(old_instance) 267 | except: 268 | log.info('Could not stop replication on {}' 269 | ''.format(old_instance)) 270 | 271 | except Exception, e: 272 | log.exception(e) 273 | raise 274 | 275 | 276 | def swap_master_and_slave(instance, dry_run): 277 | """ Swap a master and slave in zk. Warning: this does not sanity checks 278 | and does nothing more than update zk. YOU HAVE BEEN WARNED! 279 | 280 | Args: 281 | instance - An instance in the replica set. This function will figure 282 | everything else out. 283 | dry_run - If set, do not modify configuration. 284 | """ 285 | zk_local = host_utils.MysqlZookeeper() 286 | kazoo_client = environment_specific.get_kazoo_client() 287 | if not kazoo_client: 288 | raise Exception('Could not get a zk connection') 289 | 290 | log.info('Instance is {}'.format(instance)) 291 | 292 | replica_set = zk_local.get_replica_set_from_instance(instance) 293 | log.info('Detected replica_set as {}'.format(replica_set)) 294 | 295 | (zk_node, 296 | parsed_data, 297 | version) = get_zk_node_for_replica_set(kazoo_client, replica_set) 298 | log.info('Replica set {replica_set} is held in zk_node ' 299 | '{zk_node}'.format(zk_node=zk_node, 300 | replica_set=replica_set)) 301 | 302 | log.info('Existing config:') 303 | log.info(pprint.pformat(remove_auth(parsed_data[replica_set]))) 304 | new_data = copy.deepcopy(parsed_data) 305 | new_data[replica_set][host_utils.REPLICA_ROLE_MASTER] = \ 306 | parsed_data[replica_set][host_utils.REPLICA_ROLE_SLAVE] 307 | new_data[replica_set][host_utils.REPLICA_ROLE_SLAVE] = \ 308 | parsed_data[replica_set][host_utils.REPLICA_ROLE_MASTER] 309 | 310 | log.info('New config:') 311 | log.info(pprint.pformat(remove_auth(new_data[replica_set]))) 312 | 313 | if new_data == parsed_data: 314 | raise Exception('No change would be made to zk, ' 315 | 'will not write new config') 316 | elif dry_run: 317 | log.info('dry_run is set, therefore not modifying zk') 318 | else: 319 | log.info('Pushing new configuration for ' 320 | '{replica_set}:'.format(replica_set=replica_set)) 321 | kazoo_client.set(zk_node, simplejson.dumps(new_data), version) 322 | 323 | 324 | def swap_slave_and_dr_slave(instance, dry_run): 325 | """ Swap a slave and a dr_slave in zk 326 | 327 | Args: 328 | instance - An instance that is either a slave or dr_slave 329 | """ 330 | zk_local = host_utils.MysqlZookeeper() 331 | kazoo_client = environment_specific.get_kazoo_client() 332 | if not kazoo_client: 333 | raise Exception('Could not get a zk connection') 334 | 335 | log.info('Instance is {}'.format(instance)) 336 | replica_set = zk_local.get_replica_set_from_instance(instance) 337 | 338 | log.info('Detected replica_set as {}'.format(replica_set)) 339 | (zk_node, 340 | parsed_data, 341 | version) = get_zk_node_for_replica_set(kazoo_client, replica_set) 342 | log.info('Replica set {replica_set} is held in zk_node ' 343 | '{zk_node}'.format(zk_node=zk_node, 344 | replica_set=replica_set)) 345 | 346 | log.info('Existing config:') 347 | log.info(pprint.pformat(remove_auth(parsed_data[replica_set]))) 348 | new_data = copy.deepcopy(parsed_data) 349 | 350 | dr_znode_data, dr_meta = kazoo_client.get(environment_specific.DR_ZK) 351 | dr_parsed_data = simplejson.loads(dr_znode_data) 352 | new_dr_data = copy.deepcopy(dr_parsed_data) 353 | if replica_set not in parsed_data: 354 | raise Exception('Replica set {replica_set} is not present ' 355 | 'in dr_node'.format(replica_set=replica_set)) 356 | log.info('Existing dr config:') 357 | log.info(pprint.pformat(remove_auth(dr_parsed_data[replica_set]))) 358 | 359 | new_data[replica_set][host_utils.REPLICA_ROLE_SLAVE] = \ 360 | dr_parsed_data[replica_set][host_utils.REPLICA_ROLE_DR_SLAVE] 361 | new_dr_data[replica_set][host_utils.REPLICA_ROLE_DR_SLAVE] = \ 362 | parsed_data[replica_set][host_utils.REPLICA_ROLE_SLAVE] 363 | 364 | log.info('New config:') 365 | log.info(pprint.pformat(remove_auth(new_data[replica_set]))) 366 | 367 | log.info('New dr config:') 368 | log.info(pprint.pformat(remove_auth(new_dr_data[replica_set]))) 369 | 370 | if dry_run: 371 | log.info('dry_run is set, therefore not modifying zk') 372 | else: 373 | log.info('Pushing new configuration for ' 374 | '{replica_set}:'.format(replica_set=replica_set)) 375 | kazoo_client.set(zk_node, simplejson.dumps(new_data), version) 376 | try: 377 | kazoo_client.set(environment_specific.DR_ZK, 378 | simplejson.dumps(new_dr_data), dr_meta.version) 379 | except: 380 | raise Exception('DR node is incorrect due to a different change ' 381 | 'blocking this change. Manual intervention ' 382 | 'is required.') 383 | 384 | 385 | def update_host_replacement_log(conn, instance_id): 386 | """ Mark a replacement as completed 387 | 388 | conn - A connection to the reporting server 389 | instance - The replacement instance 390 | """ 391 | cursor = conn.cursor() 392 | sql = ("UPDATE mysqlops.host_replacement_log " 393 | "SET is_completed = 1 " 394 | "WHERE new_instance = %(new_instance)s ") 395 | params = {'new_instance': instance_id} 396 | cursor.execute(sql, params) 397 | log.info(cursor._executed) 398 | conn.commit() 399 | 400 | 401 | if __name__ == "__main__": 402 | main() 403 | -------------------------------------------------------------------------------- /mysql_backup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import os 4 | import time 5 | 6 | from lib import backup 7 | from lib import environment_specific 8 | from lib import host_utils 9 | from lib import mysql_lib 10 | 11 | log = environment_specific.setup_logging_defaults(__name__) 12 | 13 | 14 | def main(): 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument('-p', 17 | '--port', 18 | help='Port to backup on localhost (default: 3306)', 19 | default='3306') 20 | parser.add_argument('-b', 21 | '--backup_type', 22 | help='Type of backup to run.', 23 | default=backup.BACKUP_TYPE_XBSTREAM, 24 | choices=(backup.BACKUP_TYPE_LOGICAL, 25 | backup.BACKUP_TYPE_XBSTREAM)) 26 | args = parser.parse_args() 27 | instance = host_utils.HostAddr(':'.join((host_utils.HOSTNAME, args.port))) 28 | mysql_backup(instance, args.backup_type) 29 | 30 | 31 | def mysql_backup(instance, backup_type=backup.BACKUP_TYPE_XBSTREAM, 32 | initial_build=False, lock_handle=None): 33 | """ Run a file based backup on a supplied local instance 34 | 35 | Args: 36 | instance - A hostaddr object 37 | backup_type - backup.BACKUP_TYPE_LOGICAL or backup.BACKUP_TYPE_XBSTREAM 38 | initial_build - Boolean, if this is being created right after the server 39 | was built 40 | lock_handle - A lock handle, if we have one from the caller. 41 | """ 42 | 43 | if backup_type == backup.BACKUP_TYPE_XBSTREAM and \ 44 | os.path.isfile(backup.XTRABACKUP_SKIP_FILE): 45 | log.info('Found {}. Skipping xtrabackup ' 46 | 'run.'.format(backup.XTRABACKUP_SKIP_FILE)) 47 | return 48 | 49 | log.info('Confirming sanity of replication (if applicable)') 50 | zk = host_utils.MysqlZookeeper() 51 | try: 52 | replica_type = zk.get_replica_type_from_instance(instance) 53 | except: 54 | # instance is not in production 55 | replica_type = None 56 | 57 | if replica_type and replica_type != host_utils.REPLICA_ROLE_MASTER: 58 | mysql_lib.assert_replication_sanity(instance) 59 | 60 | log.info('Logging initial status to mysqlops') 61 | start_timestamp = time.localtime() 62 | backup_id = mysql_lib.start_backup_log(instance, backup_type, 63 | start_timestamp) 64 | 65 | # Take a lock to prevent multiple backups from running concurrently 66 | # unless we already have a lock from the caller. This means we 67 | # also don't have to release the lock at the end; either we 68 | # exit the script entirely, and it gets cleaned up or the caller 69 | # maintains it. 70 | if lock_handle is None: 71 | log.info('Taking backup lock') 72 | lock_handle = host_utils.bind_lock_socket(backup.STD_BACKUP_LOCK_SOCKET) 73 | else: 74 | log.info('Not acquiring backup lock, we already have one.') 75 | 76 | # Actually run the backup 77 | log.info('Running backup') 78 | if backup_type == backup.BACKUP_TYPE_XBSTREAM: 79 | backup_file = backup.xtrabackup_instance(instance, start_timestamp, 80 | initial_build) 81 | elif backup_type == backup.BACKUP_TYPE_LOGICAL: 82 | # We don't need a backup-skip file here since this isn't 83 | # regularly scheduled. 84 | backup_file = backup.logical_backup_instance(instance, start_timestamp, 85 | initial_build) 86 | else: 87 | raise Exception('Unsupported backup type {}'.format(backup_type)) 88 | 89 | # Update database with additional info now that backup is done. 90 | if backup_id: 91 | log.info("Updating database log entry with final backup info") 92 | mysql_lib.finalize_backup_log(backup_id, backup_file) 93 | else: 94 | log.info("The backup is complete, but we were not able to " 95 | "write to the central log DB.") 96 | 97 | 98 | if __name__ == "__main__": 99 | main() 100 | -------------------------------------------------------------------------------- /mysql_backup_logical.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import mysql_backup 4 | from lib import backup 5 | from lib import host_utils 6 | 7 | if __name__ == "__main__": 8 | instance = host_utils.HostAddr(host_utils.HOSTNAME) 9 | mysql_backup.mysql_backup(instance, backup.BACKUP_TYPE_LOGICAL) 10 | -------------------------------------------------------------------------------- /mysql_backup_xtrabackup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import mysql_backup 4 | from lib import backup 5 | from lib import host_utils 6 | 7 | if __name__ == "__main__": 8 | instance = host_utils.HostAddr(host_utils.HOSTNAME) 9 | mysql_backup.mysql_backup(instance, backup.BACKUP_TYPE_XBSTREAM) 10 | -------------------------------------------------------------------------------- /mysql_cli.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import logging 4 | import socket 5 | import string 6 | import subprocess 7 | 8 | from lib import environment_specific 9 | from lib import host_utils 10 | from lib import mysql_lib 11 | 12 | MYSQL_CLI = ('/usr/bin/mysql -A -h {host} -P {port} {sql_safe} ' 13 | '--user={user} --password={password} ' 14 | '--prompt="\h:\p \d \u> " {db}') 15 | 16 | # if we just want to run a command and disconnect, no 17 | # point in setting a prompt. 18 | MYSQL_CLI_EX = ('/usr/bin/mysql -A -h {host} -P {port} {sql_safe} ' 19 | '--user={user} --password={password} ' 20 | '{db} -e "{execute}"') 21 | 22 | DEFAULT_ROLE = 'read-only' 23 | 24 | log = logging.getLogger(__name__) 25 | 26 | def main(): 27 | parser = argparse.ArgumentParser() 28 | parser.add_argument('db', 29 | help='What server, shard or replica set to connect to ' 30 | '(e.g., sharddb-21-2[:3306], db00003, pbdata03862, ' 31 | 'zenfollowermysql_zendata001002, ' 32 | 'zenshared_video_zendata000002, etc.)') 33 | parser.add_argument('-p', 34 | '--privileges', 35 | help=''.join(('Default is ', DEFAULT_ROLE)), 36 | default=DEFAULT_ROLE, 37 | choices=environment_specific.CLI_ROLES.keys()) 38 | parser.add_argument('-l', 39 | '--longquery', 40 | default=False, 41 | action='store_true', 42 | help='For standard read or write access, use this ' 43 | 'flag if you expect the query to take more than ' 44 | '10 seconds.') 45 | parser.add_argument('--trust_me_im_a_doctor', 46 | default=False, 47 | action='store_true', 48 | help='If this is set, we bypass any paranoid replica ' 49 | 'set checks. User assumes all risk.') 50 | parser.add_argument('-e', 51 | '--execute', 52 | help='An optional SQL command to run.', 53 | default=False) 54 | args = parser.parse_args() 55 | zk = host_utils.MysqlZookeeper() 56 | host = None 57 | db = '' 58 | 59 | role_modifier = 'default' 60 | long_query = '' 61 | if args.longquery: 62 | role_modifier = 'long' 63 | long_query = '(long queries enabled)' 64 | 65 | # check if db exists in dns, if so the supplied argument will be considered 66 | # a hostname, otherwise a replica set. 67 | try: 68 | socket.gethostbyname(args.db) 69 | host = host_utils.HostAddr(args.db) 70 | log.info('{} appears to be a hostname'.format(args.db)) 71 | except: 72 | log.info('{} appears not to be a hostname'.format(args.db)) 73 | 74 | # Maybe it is a replica set 75 | if not host: 76 | try: 77 | host = zk.get_mysql_instance_from_replica_set(args.db) 78 | log.info('{} appears to be a replica set'.format(args.db)) 79 | except: 80 | log.info('{} appears not to be a replica set'.format(args.db)) 81 | 82 | # Perhaps a shard? 83 | if not host: 84 | try: 85 | (replica_set, db) = zk.map_shard_to_replica_and_db(args.db) 86 | host = zk.get_mysql_instance_from_replica_set(replica_set) 87 | log.info('{} appears to be a shard'.format(args.db)) 88 | except: 89 | log.info('{} appears not to be a shard'.format(args.db)) 90 | raise 91 | 92 | if not host: 93 | raise Exception('Could not determine what host to connect to') 94 | 95 | log.info('Will connect to {host} with {privileges} ' 96 | 'privileges {lq}'.format(host=host, 97 | privileges=args.privileges, 98 | lq=long_query)) 99 | (username, password) = mysql_lib.get_mysql_user_for_role( 100 | environment_specific.CLI_ROLES[args.privileges][role_modifier]) 101 | 102 | # we may or may not know what replica set we're connecting to at 103 | # this point. 104 | sql_safe = '' 105 | try: 106 | replica_set = zk.get_replica_set_from_instance(host) 107 | except Exception as e: 108 | if 'is not in zk' in e.message: 109 | log.warning('SERVER IS NOT IN ZK!!!') 110 | replica_set = None 111 | else: 112 | raise 113 | 114 | if not args.trust_me_im_a_doctor: 115 | try: 116 | # do we need a prompt? 117 | if replica_set in environment_specific.EXTRA_PARANOID_REPLICA_SETS: 118 | warn = environment_specific.EXTRA_PARANOID_ALERTS.get(replica_set) 119 | if args.privileges in ['read-write', 'admin']: 120 | resp = raw_input("You've asked for {priv} access to replica " 121 | "set {rs}. Are you sure? (Y/N): ".format( 122 | priv=args.privileges, 123 | rs=replica_set)) 124 | if not resp or resp[0] not in ['Y', 'y']: 125 | raise Exception('Connection aborted by user!') 126 | else: 127 | print warn 128 | 129 | # should we enable safe-updates? 130 | if replica_set in environment_specific.PARANOID_REPLICA_SETS: 131 | if args.privileges in ['read-write', 'admin']: 132 | sql_safe = '--init-command="SET SESSION SQL_SAFE_UPDATES=ON"' 133 | 134 | except Exception as e: 135 | log.error("Unable to continue: {}".format(e)) 136 | return 137 | else: 138 | log.warning("OK, we trust you know what you're doing, but " 139 | "don't say we didn't warn you.") 140 | 141 | if args.execute: 142 | execute_escaped = string.replace(args.execute, '"', '\\"') 143 | cmd = MYSQL_CLI_EX.format(host=host.hostname, 144 | port=host.port, 145 | db=db, 146 | user=username, 147 | password=password, 148 | sql_safe=sql_safe, 149 | execute=execute_escaped) 150 | else: 151 | cmd = MYSQL_CLI.format(host=host.hostname, 152 | port=host.port, 153 | db=db, 154 | user=username, 155 | password=password, 156 | sql_safe=sql_safe) 157 | log.info(cmd) 158 | proc = subprocess.Popen(cmd, shell=True) 159 | proc.wait() 160 | 161 | if __name__ == "__main__": 162 | environment_specific.initialize_logger() 163 | main() 164 | -------------------------------------------------------------------------------- /mysql_cnf_config/5.5: -------------------------------------------------------------------------------- 1 | [mysqld3306] 2 | remove_innodb_io_capacity_max=100 3 | remove_innodb_lru_scan_depth=100 4 | innodb_file_per_table=1 5 | -------------------------------------------------------------------------------- /mysql_cnf_config/5.6: -------------------------------------------------------------------------------- 1 | [mysqld3306] 2 | innodb_checksum_algorithm = CRC32 3 | innodb_flush_neighbors = 0 4 | innodb_io_capacity_max = 800 5 | innodb_lru_scan_depth = 1000 6 | master_info_repository = FILE 7 | relay_log_info_repository = TABLE 8 | loose_rpl_semi_sync_master_enabled = 1 9 | loose_rpl_semi_sync_slave_enabled = 1 10 | loose_rpl_semi_sync_master_timeout = 60000 11 | loose_rpl_semi_sync_master_wait_no_slave = OFF 12 | table_open_cache_instances = 8 13 | metadata_locks_hash_instances = 256 14 | slave_pending_jobs_size_max = 48M 15 | 16 | # this is intentionally empty due to changes as of 5.6.34 17 | secure_file_priv = 18 | 19 | remove_enforce_storage_engine = stop_doing_this 20 | default_storage_engine = InnoDB 21 | -------------------------------------------------------------------------------- /mysql_cnf_config/5.7: -------------------------------------------------------------------------------- 1 | [mysqld3306] 2 | remove_performance_schema = WE_SHOULD_START_USING_THIS 3 | remove_innodb_file_format = BARRACUDA_IS_NOW_DEFAULT 4 | remove_innodb_purge_threads = 4_IS_NOW_DEFAULT 5 | remove_innodb_log_block_size = NOT_USED_ANYMORE 6 | remove_enforce_storage_engine = DISABLE_THEM_INSTEAD 7 | remove_log_warnings_suppress = HAS_BEEN_REMOVED 8 | remove_innodb_max_dirty_pages_pct = 75_IS_DEFAULT_NOW 9 | 10 | default_storage_engine = InnoDB 11 | disabled_storage_engines = "MyISAM,FEDERATED,ARCHIVE,MRG_MyISAM,CSV" 12 | sql_mode="NO_ENGINE_SUBSTITUTION,NO_AUTO_CREATE_USER" 13 | 14 | log_error_verbosity = 3 15 | log_statements_unsafe_for_binlog = 0 16 | innodb_page_cleaners = 16 17 | innodb_flush_neighbors = 0 18 | innodb_io_capacity_max = 800 19 | 20 | master_info_repository = FILE 21 | relay_log_info_repository = TABLE 22 | loose_rpl_semi_sync_master_enabled = 1 23 | loose_rpl_semi_sync_slave_enabled = 1 24 | loose_rpl_semi_sync_master_timeout = 60000 25 | loose_rpl_semi_sync_master_wait_no_slave = OFF 26 | 27 | optimizer_trace_max_mem_size = 1M 28 | metadata_locks_hash_instances = 256 29 | 30 | # this is intentionally empty due to changes as of 5.7.7 31 | secure_file_priv = 32 | -------------------------------------------------------------------------------- /mysql_cnf_config/c3.8xlarge: -------------------------------------------------------------------------------- 1 | [mysqld3306] 2 | innodb_io_capacity = 5000 3 | innodb_io_capacity_max = 10000 4 | innodb_buffer_pool_size = 24000M 5 | innodb_log_file_size = 1G 6 | innodb_lru_scan_depth = 2048 7 | max_binlog_files = 300 8 | -------------------------------------------------------------------------------- /mysql_cnf_config/default_my.cnf: -------------------------------------------------------------------------------- 1 | [mysqld] 2 | innodb_data_file_path = ibdata1:10M;ibdata2:10M:autoextend 3 | 4 | [mysqld3306] 5 | # Paths 6 | basedir = /usr 7 | datadir = __ROOT__/mysql/3306/data/ 8 | general_log_file = __ROOT__/mysql/3306/logs/general.log 9 | log_bin = __ROOT__/mysql/3306/binlogs/__HOSTNAME__-bin 10 | log_error = __ROOT__/mysql/3306/logs/mysqld.err 11 | pid_file = /var/run/mysqld/mysqld_3306.pid 12 | slow_query_log_file = __ROOT__/mysql/3306/logs/mysql-slow.log 13 | socket = /var/run/mysqld/mysqld_3306.sock 14 | tmpdir = __ROOT__/mysql/3306/tmp/ 15 | 16 | # InnoDB 17 | innodb_adaptive_hash_index = OFF 18 | innodb_buffer_pool_size = 1000M 19 | innodb_data_file_path = ibdata1:10M;ibdata2:10M:autoextend 20 | innodb_file_format = Barracuda 21 | innodb_flush_log_at_trx_commit = 1 22 | innodb_flush_method = O_DIRECT 23 | innodb_lock_wait_timeout = 25 24 | innodb_log_block_size = 4096 25 | innodb_max_dirty_pages_pct = 75 26 | innodb_io_capacity = 200 27 | innodb_log_buffer_size = 32M 28 | innodb_log_file_size = 1G 29 | innodb_open_files = 65535 30 | innodb_purge_threads = 4 31 | innodb_read_io_threads = 8 32 | innodb_write_io_threads = 8 33 | 34 | # Replication 35 | binlog_format = MIXED 36 | binlog_cache_size = 1M 37 | expire_logs_days = 7 38 | log_slave_updates = ON 39 | max_binlog_size = 100M 40 | report_host = __HOSTNAME__ 41 | server_id = 657952340 42 | relay_log_recovery = ON 43 | slave_net_timeout = 300 44 | sync_binlog = 1 45 | 46 | # Connections 47 | user = mysql 48 | port = 3306 49 | max_user_connections = 4500 50 | max_connections = 5000 51 | max_connect_errors = 1844674407370954752 52 | 53 | # Logging 54 | loose_query_response_time_range_base = 2 55 | loose_query_response_time_stats = ON 56 | loose_log_slow_verbosity = full 57 | loose_slow_query_log_use_global_control = all 58 | slow_query_log = 1 59 | long_query_time = 0.1 60 | userstat = on 61 | log_warnings_suppress='1592' 62 | 63 | # because the audit log plugin eats disk 64 | loose_audit_log_policy = NONE 65 | 66 | 67 | # Miscellaneous 68 | performance_schema = off 69 | query_cache_size = 0 70 | skip_name_resolve = 1 71 | max_allowed_packet = 32M 72 | table_open_cache = 32768 73 | table_definition_cache = 8192 74 | max_heap_table_size = 128M 75 | tmp_table_size = 128M 76 | wait_timeout = 600 77 | 78 | [client] 79 | socket = /var/run/mysqld/mysqld_3306.sock 80 | -------------------------------------------------------------------------------- /mysql_cnf_config/i2.2xlarge: -------------------------------------------------------------------------------- 1 | [mysqld3306] 2 | innodb_io_capacity = 5000 3 | innodb_io_capacity_max = 10000 4 | innodb_buffer_pool_size = 30000M 5 | innodb_log_file_size = 2G 6 | innodb_lru_scan_depth = 2048 7 | max_binlog_files = 2000 8 | -------------------------------------------------------------------------------- /mysql_cnf_config/i2.4xlarge: -------------------------------------------------------------------------------- 1 | [mysqld3306] 2 | innodb_io_capacity = 8000 3 | innodb_io_capacity_max = 10000 4 | innodb_buffer_pool_size = 85000M 5 | innodb_buffer_pool_instances = 16 6 | innodb_log_file_size = 4G 7 | innodb_lru_scan_depth = 2048 8 | max_binlog_files = 4000 9 | -------------------------------------------------------------------------------- /mysql_cnf_config/maxwell.template: -------------------------------------------------------------------------------- 1 | log_level=info 2 | 3 | host={master_host} 4 | port={master_port} 5 | user={username} 6 | password={password} 7 | 8 | replication_host={instance_host} 9 | replication_port={instance_port} 10 | replication_user={username} 11 | replication_password={password} 12 | 13 | client_id={client_id} 14 | 15 | schema_database=test 16 | 17 | producer={output} 18 | output_file=/dev/null 19 | binlog_connector=true 20 | 21 | kafka_topic_generator={generator} 22 | kafka.bootstrap.servers={kafka_servers} 23 | kafka_topic={namespace}.{kafka_topic} 24 | kafka.retries=5 25 | kafka.max.request.size=3145728 26 | kafka.max.in.flight.requests.per.connection=1 27 | zen_service_name={zen_service} 28 | 29 | output_binlog_position=true 30 | output_gtid_position={gtid_mode} 31 | gtid_mode={gtid_mode} 32 | exclude_dbs={excludes} 33 | 34 | # Maxwell collects metrics via dropwizard. These can be exposed through the 35 | # base logging mechanism (slf4j), JMX or HTTP. 36 | # Options: [jmx, slf4j, http] 37 | # Supplying multiple is allowed. 38 | metrics_reporting_type=jmx 39 | # When metrics_reporting_type includes slf4j this is the frequency metrics are emitted to the log, in seconds 40 | metrics_reporting_interval=60 41 | # When metrics_reporting_type includes http this is the port the server will bind to. 42 | metrics_reporting_port=8080 43 | -------------------------------------------------------------------------------- /mysql_cnf_config/modsharddb: -------------------------------------------------------------------------------- 1 | [mysqld3306] 2 | binlog_format = ROW 3 | gtid_mode = ON 4 | enforce_gtid_consistency = ON 5 | binlog_gtid_simple_recovery = 1 6 | slave_parallel_workers=16 7 | -------------------------------------------------------------------------------- /mysql_cnf_config/myzenfollower16db: -------------------------------------------------------------------------------- 1 | [mysqld3306] 2 | # suggested (not the specific values, but changes from default) by https://bugs.mysql.com/bug.php?id=70209 for low-write workloads 3 | innodb_io_capacity = 1000 4 | innodb_io_capacity_max = 2000 5 | innodb_flushing_avg_loops = 100 6 | innodb_max_dirty_pages_pct = 40 7 | -------------------------------------------------------------------------------- /mysql_cnf_config/phabricator: -------------------------------------------------------------------------------- 1 | [mysqld3306] 2 | default_storage_engine=InnoDB 3 | remove_enforce_storage_engine=InnoDB 4 | -------------------------------------------------------------------------------- /mysql_cnf_config/pinlatertestdb: -------------------------------------------------------------------------------- 1 | [mysqld3306] 2 | innodb_flush_log_at_trx_commit = 2 3 | sync_binlog = 0 4 | slave_parallel_workers = 16 5 | innodb_thread_concurrency = 32 6 | max_binlog_files = 500 7 | -------------------------------------------------------------------------------- /mysql_cnf_config/pt_heartbeat.template: -------------------------------------------------------------------------------- 1 | defaults-file={defaults_file} 2 | host=127.0.0.1 3 | pass={password} 4 | user={username} 5 | database={metadata_db} 6 | replace 7 | -------------------------------------------------------------------------------- /mysql_cnf_config/pt_kill.template: -------------------------------------------------------------------------------- 1 | host=127.0.0.1 2 | busy-time={busy_time} 3 | interval=1 4 | kill 5 | print 6 | victims=all 7 | user={username} 8 | password={password} 9 | ignore-user=({ignore_users}) 10 | -------------------------------------------------------------------------------- /mysql_cnf_config/r3.2xlarge: -------------------------------------------------------------------------------- 1 | [mysqld3306] 2 | innodb_io_capacity = 4000 3 | innodb_io_capacity_max = 8000 4 | innodb_buffer_pool_size = 30000M 5 | max_binlog_files = 300 6 | -------------------------------------------------------------------------------- /mysql_cnf_config/r3.xlarge: -------------------------------------------------------------------------------- 1 | [mysqld3306] 2 | innodb_io_capacity = 4000 3 | innodb_io_capacity_max = 8000 4 | innodb_buffer_pool_size = 15000M 5 | max_binlog_files = 100 6 | -------------------------------------------------------------------------------- /mysql_cnf_config/sharddb: -------------------------------------------------------------------------------- 1 | [mysqld3306] 2 | binlog_format = ROW 3 | gtid_mode = ON 4 | enforce_gtid_consistency = ON 5 | binlog_gtid_simple_recovery = 1 6 | -------------------------------------------------------------------------------- /mysql_grants.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import difflib 4 | import pprint 5 | import re 6 | import MySQLdb 7 | import sys 8 | from lib import host_utils 9 | from lib import mysql_lib 10 | 11 | 12 | def main(): 13 | action_desc = """Action description: 14 | 15 | stdout - dump grants to stdout 16 | check - check grants on the instance and ouput errors to stdout 17 | import - import grants on to the instance and then check 18 | nuke_then_import - DANGEROUS! Delete all grants, reimport and then recheck 19 | 20 | Note: Grants do *NOT* run through replication. If you need to make a change, 21 | you will need to run it against the entire replica set. 22 | """ 23 | 24 | parser = argparse.ArgumentParser(description='MySQL grant manager', 25 | epilog=action_desc, 26 | formatter_class=argparse.RawDescriptionHelpFormatter) 27 | parser.add_argument('-i', 28 | '--instance', 29 | help='Instance to act on if other than localhost:3306', 30 | default=''.join((host_utils.HOSTNAME, ':3306'))) 31 | parser.add_argument('-a', 32 | '--action', 33 | choices=('stdout', 34 | 'check', 35 | 'import', 36 | 'nuke_then_import'), 37 | required=True) 38 | 39 | args = parser.parse_args() 40 | instance = host_utils.HostAddr(args.instance) 41 | 42 | if args.action == 'stdout': 43 | grants = mysql_lib.get_all_mysql_grants() 44 | for grant in grants.iteritems(): 45 | print format_grant(grant[1]) 46 | else: 47 | problems = manage_mysql_grants(instance, args.action) 48 | if len(problems) > 0: 49 | print "Current problems:" 50 | print '\n'.join(problems) 51 | sys.exit(1) 52 | 53 | 54 | def format_grant(grant): 55 | """ Convert a dict describing mysql grants into a GRANT command 56 | 57 | Args: 58 | grant - dict with keys string privileges, username, source_host, password 59 | and a bool grant_option 60 | 61 | Returns: 62 | sql - A GRANT command in string format 63 | """ 64 | if grant['grant_option']: 65 | grant_option = ' WITH GRANT OPTION' 66 | else: 67 | grant_option = '' 68 | sql_format = "GRANT {privs} ON *.* TO `{user}`@`{host}` " +\ 69 | "IDENTIFIED BY '{password}' {grant_option};" 70 | sql = sql_format.format(privs=grant['privileges'], 71 | user=grant['username'], 72 | host=grant['source_host'], 73 | password=grant['password'], 74 | grant_option=grant_option) 75 | return sql 76 | 77 | 78 | def parse_grant(raw_grant): 79 | """ Convert a MySQL GRANT into a dict 80 | 81 | Args: 82 | sql - A GRANT command in string format 83 | 84 | Returns: 85 | grant - dict with keys string privileges, username, source_host, password 86 | and a bool grant_option 87 | """ 88 | ret = dict() 89 | pattern = "GRANT (?P.+) ON (?:.+) TO '(?P.+)'@'(?P[^']+)'" 90 | match = re.match(pattern, raw_grant) 91 | ret['privileges'] = match.group(1) 92 | ret['username'] = match.group(2) 93 | ret['source_host'] = match.group(3) 94 | 95 | pattern = ".+PASSWORD '(?P[^']+)'(?P WITH GRANT OPTION)?" 96 | match = re.match(pattern, raw_grant) 97 | if match: 98 | ret['hashed_password'] = match.group(1) 99 | else: 100 | ret['hashed_password'] = "NONE" 101 | 102 | pattern = ".+WITH GRANT OPTION+" 103 | match = re.match(pattern, raw_grant) 104 | if match: 105 | ret['grant_option'] = True 106 | else: 107 | ret['grant_option'] = False 108 | return ret 109 | 110 | 111 | def manage_mysql_grants(instance, action): 112 | """ Nuke/import/check MySQL grants 113 | 114 | Args: 115 | instance - an object identify which host to act upon 116 | action - available options: 117 | check - check grants on the instance and ouput errors to stdout 118 | import - import grants on to the instance and then check 119 | nuke_then_import - delete all grants, reimport and then recheck 120 | 121 | Returns: 122 | problems - a list of problems 123 | 124 | """ 125 | try: 126 | conn = mysql_lib.connect_mysql(instance) 127 | except MySQLdb.OperationalError as detail: 128 | (error_code, msg) = detail.args 129 | if (error_code != mysql_lib.MYSQL_ERROR_HOST_ACCESS_DENIED and 130 | error_code != mysql_lib.MYSQL_ERROR_ACCESS_DENIED): 131 | raise 132 | 133 | if instance.hostname == host_utils.HOSTNAME.split('.')[0]: 134 | print ('Could not connect to instance, but it looks like ' 135 | 'instance is on localhost. Going to try defaults for ' 136 | 'authentication.') 137 | conn = mysql_lib.connect_mysql(instance, 'bootstrap') 138 | else: 139 | raise 140 | 141 | grants = mysql_lib.get_all_mysql_grants() 142 | 143 | # nuke 144 | conn.query("SET SQL_LOG_BIN=0") 145 | if action == 'nuke_then_import': 146 | conn.query("SET SQL_SAFE_UPDATES = 0") 147 | conn.query("delete from mysql.user") 148 | conn.query("delete from mysql.db") 149 | conn.query("delete from mysql.proxies_priv") 150 | # import 151 | if action in ('import', 'nuke_then_import'): 152 | for grant in grants.iteritems(): 153 | sql = format_grant(grant[1]) 154 | conn.query(sql) 155 | conn.query('flush privileges') 156 | # check 157 | if action in ('check', 'import', 'nuke_then_import'): 158 | problems = [] 159 | on_server = dict() 160 | cursor = conn.cursor() 161 | 162 | # PK on (user, host), so this returns all distinct users 163 | cursor.execute("SELECT user, host FROM mysql.user") 164 | users = cursor.fetchall() 165 | for row in users: 166 | user = "`{user}`@`{host}`".format(user=row['user'], 167 | host=row['host']) 168 | sql = "SHOW GRANTS FOR {user}".format(user=user) 169 | try: 170 | cursor.execute(sql) 171 | except MySQLdb.OperationalError as detail: 172 | (error_code, msg) = detail.args 173 | if error_code != mysql_lib.MYSQL_ERROR_NO_DEFINED_GRANT: 174 | raise 175 | 176 | problems.append('Grant {user} is not active, probably due to ' 177 | 'skip-name-resolve being on'.format(user=user)) 178 | continue 179 | returned_grants = cursor.fetchall() 180 | 181 | if len(returned_grants) > 1: 182 | problems.append('Grant for {user} is too complicated, ' 183 | 'ignoring grant'.format(user=user)) 184 | continue 185 | unparsed_grant = returned_grants[0][returned_grants[0].keys()[0]] 186 | on_server[user] = parse_grant(unparsed_grant) 187 | 188 | expected_users = set(grants.keys()) 189 | active_users = set(on_server.keys()) 190 | 191 | missing_users = expected_users.difference(active_users) 192 | for user in missing_users: 193 | problems.append('Missing user: {user}'.format(user=user)) 194 | 195 | unexpected_user = active_users.difference(expected_users) 196 | for user in unexpected_user: 197 | problems.append('Unexpected user: {user}'.format(user=user)) 198 | 199 | # need hashes from passwords. We could store this in zk, but it just 200 | # another thing to screw up 201 | for key in grants.keys(): 202 | password = grants[key]['password'] 203 | sql = "SELECT PASSWORD('{pw}') pw".format(pw=password) 204 | cursor.execute(sql) 205 | ret = cursor.fetchone() 206 | grants[key]['hashed_password'] = ret['pw'] 207 | del grants[key]['password'] 208 | 209 | for key in set(grants.keys()).intersection(set(on_server.keys())): 210 | if grants[key] != on_server[key]: 211 | diff = difflib.unified_diff(pprint.pformat(on_server[key]).splitlines(), 212 | pprint.pformat(grants[key]).splitlines()) 213 | problems.append('Grant for user "{user}" does not match:' 214 | '{problem}'.format(user=key, 215 | problem='\n'.join(diff))) 216 | 217 | return problems 218 | 219 | 220 | if __name__ == "__main__": 221 | main() 222 | -------------------------------------------------------------------------------- /mysql_init_server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import ConfigParser 4 | import glob 5 | import os 6 | import time 7 | 8 | import mysql_backup 9 | import mysql_cnf_builder 10 | import mysql_grants 11 | from lib import backup 12 | from lib import environment_specific 13 | from lib import host_utils 14 | from lib import mysql_lib 15 | 16 | DIRS_TO_CLEAR = ['log_bin', 'datadir', 'tmpdir'] 17 | DIRS_TO_CREATE = ['datadir', 'log_bin', 'log_error', 18 | 'slow_query_log_file', 'tmpdir'] 19 | # in MySQL 5.5+, log_slow_queries is deprecated in favor of 20 | # slow_query_log_file 21 | FILES_TO_CLEAR = ['log_slow_queries', 'log_error', 'slow_query_log_file'] 22 | 23 | # If MySQL 5.7+, don't use mysql_install_db 24 | MYSQL_INSTALL_DB = '/usr/bin/mysql_install_db' 25 | MYSQL_INITIALIZE = '/usr/sbin/mysqld --initialize-insecure' 26 | log = environment_specific.setup_logging_defaults(__name__) 27 | 28 | 29 | def main(): 30 | description = 'Initialize a MySQL serer' 31 | parser = argparse.ArgumentParser(description=description) 32 | parser.add_argument('-p', 33 | '--port', 34 | help='Port to act on, default is 3306', 35 | default='3306') 36 | parser.add_argument('--skip_production_check', 37 | help=('DANGEROUS! Skip check of whether the instance ' 38 | 'to be initialized is already in use'), 39 | default=False, 40 | action='store_true') 41 | parser.add_argument('--skip_backup', 42 | help=('Do not run a backup once the instance is ' 43 | 'setup'), 44 | default=False, 45 | action='store_true') 46 | args = parser.parse_args() 47 | 48 | instance = host_utils.HostAddr(':'.join((host_utils.HOSTNAME, 49 | args.port))) 50 | mysql_init_server(instance, 51 | args.skip_production_check, 52 | skip_backup=args.skip_backup) 53 | 54 | 55 | def mysql_init_server(instance, 56 | skip_production_check=False, 57 | skip_backup=True, lock_handle=None): 58 | """ Remove any data and initialize a MySQL instance 59 | 60 | Args: 61 | instance - A hostaddr object pointing towards localhost to act upon 62 | skip_production_check - Dangerous! will not run safety checks to protect 63 | production data 64 | skip_backup - Don't run a backup after the instance is setup 65 | lock_handle - If the caller already locked the system, pass in the 66 | lock handle, as we may need to release and reacquire 67 | to prevent mysqld from keeping it. 68 | """ 69 | if lock_handle is None: 70 | # Take a lock to prevent multiple restores from running concurrently 71 | log.info('Taking a lock to block race conditions') 72 | lock_handle = host_utils.bind_lock_socket(backup.STD_BACKUP_LOCK_SOCKET) 73 | else: 74 | log.info('Lock already exists from caller.') 75 | 76 | try: 77 | # sanity check 78 | zk = host_utils.MysqlZookeeper() 79 | if (not skip_production_check and 80 | instance in zk.get_all_mysql_instances()): 81 | raise Exception("It appears {instance} is in use. This is" 82 | " very dangerous!".format(instance=instance)) 83 | 84 | log.info('Checking host for mounts, etc...') 85 | basic_host_sanity() 86 | 87 | log.info('(re)Generating MySQL cnf files') 88 | mysql_cnf_builder.build_cnf() 89 | 90 | log.info('Creating any missing directories') 91 | create_and_chown_dirs(instance.port) 92 | 93 | log.info('Shutting down MySQL (if applicable)') 94 | host_utils.stop_mysql(instance.port) 95 | 96 | log.info('Deleting existing MySQL data') 97 | delete_mysql_data(instance.port) 98 | 99 | log.info('Creating MySQL privileges tables') 100 | init_privileges_tables(instance.port) 101 | 102 | log.info('Clearing innodb log files') 103 | delete_innodb_log_files(instance.port) 104 | 105 | log.info('Starting up instance') 106 | host_utils.start_mysql(instance.port) 107 | 108 | log.info('Importing MySQL users') 109 | mysql_grants.manage_mysql_grants(instance, 'nuke_then_import') 110 | 111 | log.info('Creating test database') 112 | mysql_lib.create_db(instance, 'test') 113 | 114 | log.info('Setting up query response time plugins') 115 | mysql_lib.setup_response_time_metrics(instance) 116 | 117 | log.info('Setting up semi-sync replication plugins') 118 | mysql_lib.setup_semisync_plugins(instance) 119 | 120 | log.info('Setting up audit log plugin') 121 | mysql_lib.setup_audit_plugin(instance) 122 | 123 | log.info('Restarting pt daemons') 124 | host_utils.manage_pt_daemons(instance.port) 125 | 126 | log.info('MySQL initalization complete') 127 | 128 | finally: 129 | # We have to do this, ugly though it may be, to ensure that 130 | # the running MySQL process doesn't maintain a hold on the lock 131 | # socket after the script exits. We reacquire the lock after 132 | # the restart and pass it back to the caller. 133 | # 134 | if lock_handle: 135 | log.info('Restarting MySQL, releasing lock.') 136 | host_utils.stop_mysql(instance.port) 137 | log.info('Sleeping 5 seconds.') 138 | time.sleep(5) 139 | host_utils.release_lock_socket(lock_handle) 140 | host_utils.start_mysql(instance.port) 141 | log.info('Reacquiring lock.') 142 | lock_handle = host_utils.bind_lock_socket(backup.STD_BACKUP_LOCK_SOCKET) 143 | 144 | if not skip_backup: 145 | log.info('Taking a backup') 146 | mysql_backup.mysql_backup(instance, initial_build=True, 147 | lock_handle=lock_handle) 148 | 149 | return lock_handle 150 | 151 | 152 | def basic_host_sanity(): 153 | """ Confirm basic sanity (mounts, etc) on localhost """ 154 | if host_utils.get_pinfo_cloud() != host_utils.TESTING_PINFO_CLOUD: 155 | for path in host_utils.REQUIRED_MOUNTS: 156 | found = False 157 | for choice in path.split(':'): 158 | if os.path.ismount(choice): 159 | found = True 160 | break 161 | if not found: 162 | raise Exception('No acceptable options for {path} ' 163 | 'are mounted'.format(path=path)) 164 | 165 | for path in host_utils.ZK_CACHE: 166 | if not os.path.isfile(path): 167 | raise Exception('ZK updater path {path} ' 168 | 'is not present'.format(path=path)) 169 | 170 | if not os.path.isfile(MYSQL_INSTALL_DB): 171 | raise Exception('MySQL install script {script} is not present' 172 | ''.format(script=mysql_init_server.MYSQL_INSTALL_DB)) 173 | 174 | 175 | def create_and_chown_dirs(port): 176 | """ Create and chown any missing directories needed for mysql """ 177 | for variable in DIRS_TO_CREATE: 178 | try: 179 | path = os.path.dirname(host_utils.get_cnf_setting(variable, port)) 180 | except ConfigParser.NoOptionError: 181 | # Not defined, so must not matter 182 | return 183 | if not os.path.isdir(path): 184 | log.info('Creating and chowning {path}'.format(path=path)) 185 | os.makedirs(path) 186 | host_utils.change_owner(path, 'mysql', 'mysql') 187 | 188 | 189 | def delete_mysql_data(port): 190 | """ Purge all data on disk for a MySQL instance 191 | 192 | Args: 193 | port - The port on which to act upon on localhost 194 | """ 195 | for dir_key in DIRS_TO_CLEAR: 196 | directory = host_utils.get_cnf_setting(dir_key, port) 197 | if not os.path.isdir(directory): 198 | directory = os.path.dirname(directory) 199 | log.info('Removing contents of {dir}'.format(dir=directory)) 200 | host_utils.clean_directory(directory) 201 | 202 | # This should not bomb if one of the files to truncate 203 | # isn't specified in the config file. 204 | for file_keys in FILES_TO_CLEAR: 205 | try: 206 | del_file = host_utils.get_cnf_setting(file_keys, port) 207 | log.info('Truncating {del_file}'.format(del_file=del_file)) 208 | open(del_file, 'w').close() 209 | host_utils.change_owner(del_file, 'mysql', 'mysql') 210 | except Exception: 211 | log.warning('Option {f} not specified ' 212 | 'in my.cnf - continuing.'.format(f=file_keys)) 213 | 214 | 215 | def delete_innodb_log_files(port): 216 | """ Purge ib_log files 217 | 218 | Args: 219 | port - the port on which to act on localhost 220 | """ 221 | try: 222 | ib_logs_dir = host_utils.get_cnf_setting('innodb_log_group_home_dir', 223 | port) 224 | except ConfigParser.NoOptionError: 225 | ib_logs_dir = host_utils.get_cnf_setting('datadir', 226 | port) 227 | glob_path = os.path.join(ib_logs_dir, 'ib_logfile') 228 | final_glob = ''.join((glob_path, '*')) 229 | for del_file in glob.glob(final_glob): 230 | log.info('Clearing {del_file}'.format(del_file=del_file)) 231 | os.remove(del_file) 232 | 233 | 234 | def init_privileges_tables(port): 235 | """ Bootstap a MySQL instance 236 | 237 | Args: 238 | port - the port on which to act upon on localhost 239 | """ 240 | version = mysql_lib.get_installed_mysqld_version() 241 | if version[0:3] < '5.7': 242 | install_command = MYSQL_INSTALL_DB 243 | else: 244 | install_command = MYSQL_INITIALIZE 245 | 246 | datadir = host_utils.get_cnf_setting('datadir', port) 247 | cmd = ('{MYSQL_INSTALL_DB} --datadir={datadir}' 248 | ' --user=mysql'.format(MYSQL_INSTALL_DB=install_command, 249 | datadir=datadir)) 250 | log.info(cmd) 251 | (std_out, std_err, return_code) = host_utils.shell_exec(cmd) 252 | if return_code: 253 | raise Exception("Return {return_code} != 0 \n" 254 | "std_err:{std_err}\n" 255 | "std_out:{std_out}".format(return_code=return_code, 256 | std_err=std_err, 257 | std_out=std_out)) 258 | 259 | 260 | if __name__ == "__main__": 261 | main() 262 | -------------------------------------------------------------------------------- /mysql_record_table_size.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse 4 | import glob 5 | import logging 6 | import os 7 | import re 8 | 9 | from lib import environment_specific 10 | from lib import host_utils 11 | from lib import mysql_lib 12 | 13 | INNODB_EXTENSION = 'ibd' 14 | TABLE_SIZE_TBL = 'historic_table_size' 15 | TABLE_DEF = ("CREATE TABLE {db}.{tbl} ( " 16 | "`hostname` varchar(90) NOT NULL DEFAULT '', " 17 | "`port` int(11) NOT NULL DEFAULT '0', " 18 | "`db` varchar(64) NOT NULL, " 19 | "`table_name` varchar(64) NOT NULL, " 20 | "`partition_name` varchar(64) NOT NULL DEFAULT '', " 21 | "`reported_at` date NOT NULL, " 22 | "`size_mb` int(10) unsigned NOT NULL, " 23 | "PRIMARY KEY (`hostname`,`port`,`db`,`table_name`, `partition_name`, `reported_at`) " 24 | ") ENGINE=InnoDB DEFAULT CHARSET=latin1") 25 | 26 | log = logging.getLogger(__name__) 27 | 28 | 29 | def main(): 30 | parser = argparse.ArgumentParser() 31 | parser.add_argument('-p', 32 | '--port', 33 | help=('Port on localhost on to record db size. ' 34 | 'Default 3306.'), 35 | default='3306') 36 | 37 | args = parser.parse_args() 38 | log_table_sizes(args.port) 39 | 40 | 41 | def log_table_sizes(port): 42 | """ Determine and record the size of tables on a MySQL instance 43 | 44 | Args: 45 | port - int 46 | """ 47 | instance = host_utils.HostAddr(':'.join((host_utils.HOSTNAME, port))) 48 | zk = host_utils.MysqlZookeeper() 49 | 50 | replica_set = zk.get_replica_set_from_instance(instance) 51 | master = zk.get_mysql_instance_from_replica_set(replica_set, 52 | host_utils.REPLICA_ROLE_MASTER) 53 | if not mysql_lib.does_table_exist(master, 54 | mysql_lib.METADATA_DB, 55 | TABLE_SIZE_TBL): 56 | create_table_size_table(master) 57 | 58 | sizes = get_all_table_sizes(instance) 59 | conn = mysql_lib.connect_mysql(master, 'dbascript') 60 | for db in sizes: 61 | for table in sizes[db]: 62 | for partition in sizes[db][table]: 63 | cursor = conn.cursor() 64 | sql = ('REPLACE INTO {metadata_db}.{tbl} ' 65 | 'SET ' 66 | 'hostname = %(hostname)s, ' 67 | 'port = %(port)s, ' 68 | 'db = %(db)s, ' 69 | 'table_name = %(table)s, ' 70 | 'partition_name = %(partition)s, ' 71 | 'reported_at = CURDATE(), ' 72 | 'size_mb = %(size)s ') 73 | cursor.execute(sql.format(metadata_db=mysql_lib.METADATA_DB, 74 | tbl=TABLE_SIZE_TBL), 75 | {'hostname': instance.hostname, 76 | 'port': instance.port, 77 | 'db': db, 78 | 'table': table, 79 | 'partition': partition, 80 | 'size': sizes[db][table][partition]}) 81 | conn.commit() 82 | log.info(cursor._executed) 83 | cursor.close() 84 | 85 | 86 | def get_db_size_from_log(instance, db): 87 | """ Get yesterdays db size for an instance 88 | 89 | Args: 90 | instance - A hostaddr object 91 | db - A database that exists on the instance 92 | 93 | Returns: size in MB 94 | """ 95 | conn = mysql_lib.connect_mysql(instance, 'dbascript') 96 | cursor = conn.cursor() 97 | sql = ("SELECT SUM(size_mb) as 'mb', " 98 | " COUNT(1) as 'table_count' " 99 | "FROM {metadata_db}.{tbl} " 100 | "WHERE db = %(db)s " 101 | " AND reported_at=CURDATE() - INTERVAL 1 DAY " 102 | " AND hostname=%(hostname)s and port=%(port)s " 103 | "GROUP BY db;") 104 | params = {'hostname': instance.hostname, 105 | 'port': instance.port, 106 | 'db': db} 107 | cursor.execute(sql.format(metadata_db=mysql_lib.METADATA_DB, 108 | tbl=TABLE_SIZE_TBL), params) 109 | ret = cursor.fetchone() 110 | 111 | expected_tables = mysql_lib.get_tables(instance, db, skip_views=True) 112 | if ret['table_count'] != len(expected_tables): 113 | raise Exception('Size data appears to be missing for {db} on {inst}' 114 | ''.format(db=db, inst=instance)) 115 | return ret['mb'] 116 | 117 | 118 | def create_table_size_table(instance): 119 | """ Create the table_size_historic table 120 | 121 | Args: 122 | a hostAddr object for the master of the replica set 123 | """ 124 | conn = mysql_lib.connect_mysql(instance, 'dbascript') 125 | cursor = conn.cursor() 126 | cursor.execute(TABLE_DEF.format(db=mysql_lib.METADATA_DB, 127 | tbl=TABLE_SIZE_TBL)) 128 | cursor.close() 129 | conn.close() 130 | 131 | 132 | def get_all_table_sizes(instance): 133 | """ Get size of all innodb tables 134 | NOTE: At this point tables should always be innodb 135 | NOTE2: file per table should always be on. 136 | 137 | Args: 138 | instance - A hostAddr object 139 | """ 140 | datadir = host_utils.get_cnf_setting('datadir', instance.port) 141 | ret = dict() 142 | for db in mysql_lib.get_dbs(instance): 143 | ret[db] = dict() 144 | db_dir = os.path.join(datadir, db) 145 | for table_path in glob.glob(''.join([db_dir, '/*', INNODB_EXTENSION])): 146 | (table, partition) = parse_table_file_name(table_path) 147 | if table not in ret[db]: 148 | ret[db][table] = dict() 149 | ret[db][table][partition] = os.stat(table_path).st_size / 1048576 150 | 151 | return ret 152 | 153 | 154 | def parse_table_file_name(table_path): 155 | """ Parse a filename into a tablename and partition name 156 | 157 | Args: 158 | filepath - a file path to a innodb table 159 | 160 | Returns: A tuple whose first element is the table name and second 161 | element is the partition name or an empty string for 162 | non-partitioned tables. 163 | """ 164 | res = re.match(''.join(['([^#.]+)(#P#)?(.+)?\.', INNODB_EXTENSION]), 165 | os.path.basename(table_path)) 166 | if res.group(3) is None: 167 | return (res.group(1), '') 168 | else: 169 | return (res.group(1), res.group(3)) 170 | 171 | 172 | if __name__ == "__main__": 173 | environment_specific.initialize_logger() 174 | main() 175 | -------------------------------------------------------------------------------- /mysql_replica_mappings.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | 4 | from lib import environment_specific 5 | from lib import host_utils 6 | 7 | OUTPUT_FORMAT = ('{replica_set: max_rs_length: 45 | max_rs_length = len(replica_set) 46 | if args.extended and inst['host'] in servers: 47 | sg = ','.join(servers[inst['host']].get('security_groups', 48 | 'N/A')) 49 | if len(sg) > max_sg_length: 50 | max_sg_length = len(sg) 51 | 52 | max_rs_length += 4 53 | max_sg_length += 4 54 | hostport_length = max_rs_length + 6 55 | 56 | # dynamically generate padding 57 | format_str = OUTPUT_FORMAT.replace( 58 | 'RS', str(max_rs_length)).replace( 59 | 'HP', str(hostport_length)).replace( 60 | 'SGL', str(max_sg_length)) 61 | format_str_extended = OUTPUT_FORMAT_EXTENDED.replace( 62 | 'RS', str(max_rs_length)).replace( 63 | 'HP', str(hostport_length)).replace( 64 | 'SGL', str(max_sg_length)) 65 | 66 | for replica_set in config: 67 | for rtype in host_utils.REPLICA_TYPES: 68 | if rtype in config[replica_set]: 69 | inst = config[replica_set][rtype] 70 | 71 | if args.extended and inst['host'] in servers: 72 | az = servers[inst['host']]['zone'] 73 | id = servers[inst['host']]['instance_id'] 74 | hw = servers[inst['host']]['instance_type'] 75 | try: 76 | sg = ','.join(servers[inst['host']]['security_groups']) 77 | except KeyError: 78 | sg = '??VPC??' 79 | 80 | output.append(format_str_extended.format( 81 | replica_set=replica_set, 82 | replica_type=rtype, 83 | hostport=':'.join([inst['host'], str(inst['port'])]), 84 | az=az, 85 | hw=hw, 86 | sg=sg, 87 | id=id)) 88 | else: 89 | output.append(format_str.format( 90 | replica_set=replica_set, 91 | replica_type=rtype, 92 | hostport=':'.join([inst['host'], str(inst['port'])]))) 93 | 94 | output.sort() 95 | print '\n'.join(output) 96 | 97 | 98 | if __name__ == "__main__": 99 | main() 100 | -------------------------------------------------------------------------------- /mysql_shard_config.py: -------------------------------------------------------------------------------- 1 | from check_shard_mappings import get_problem_replicasets 2 | from lib.environment_specific import MYSQL_SHARDS_CONFIG_PATH 3 | from lib.environment_specific import TEST_MYSQL_SHARDS_CONFIG_PATH 4 | from lib.environment_specific import MYSQL_SHARDS_S3_PATH_PREFIX 5 | from lib.environment_specific import TEST_MYSQL_SHARDS_S3_PATH_PREFIX 6 | #from lib.environment_specific import MYSQL_SHARDS_CONFIG_LOCAL_PATH 7 | #from lib.environment_specific import TEST_MYSQL_SHARDS_CONFIG_LOCAL_PATH 8 | from lib.environment_specific_dir.config_base import Config 9 | 10 | class MySqlShardConfig(Config): 11 | 12 | SERVICES = "services" 13 | NAMESPACES = "namespaces" 14 | SHARDS = "shards" 15 | 16 | MYSQLDB = "mysqldb" 17 | REPLICA_SET = "replica_set" 18 | 19 | def __init__(self, use_test_config=True): 20 | super(MySqlShardConfig, self).__init__( 21 | zk_path=TEST_MYSQL_SHARDS_CONFIG_PATH if use_test_config else MYSQL_SHARDS_CONFIG_PATH, 22 | s3_path_prefix=TEST_MYSQL_SHARDS_S3_PATH_PREFIX if use_test_config else MYSQL_SHARDS_S3_PATH_PREFIX, 23 | name='test_mysql_shards' if use_test_config else 'mysql_shards') 24 | # localpath = TEST_MYSQL_SHARDS_CONFIG_LOCAL_PATH if use_test_config else MYSQL_SHARDS_CONFIG_LOCAL_PATH 25 | 26 | def migrate_shard(self, service_name, namespace_name, shard_name, old_replica_set, new_replica_set): 27 | "changes repplica set of a shard during shard migration" 28 | service = self.updated_config_dict[self.SERVICES][service_name] 29 | namespace = service[self.NAMESPACES][namespace_name] 30 | shard = namespace[self.SHARDS][shard_name] 31 | if shard[self.REPLICA_SET] == old_replica_set: 32 | shard[self.REPLICA_SET] = new_replica_set 33 | elif shard[self.REPLICA_SET] != new_replica_set: 34 | raise Exception("migrate shard to %s fail: %s %s %s. Old replica set is %s, not %s " % 35 | (new_replica_set, service_name, namespace_name, shard_name, shard[self.REPLICA_SET], 36 | old_replica_set)) 37 | else: 38 | print ("Warning: %s %s %s already on %s, not on %s " % 39 | (service_name, namespace_name, shard_name, new_replica_set, old_replica_set)) 40 | 41 | def add_shard(self, service_name, namespace_name, shard_name, replica_set, dbname): 42 | "adds shard to an existing namespace in existing service" 43 | service = self.updated_config_dict[self.SERVICES][service_name] 44 | namespace = service[self.NAMESPACES][namespace_name] 45 | shard = namespace[self.SHARDS].get(shard_name) 46 | if shard is None: 47 | shard = { self.MYSQLDB : dbname , self.REPLICA_SET : replica_set } 48 | namespace[self.SHARDS][shard_name] = shard 49 | elif shard[self.MYSQLDB] != dbname or shard[self.REPLICA_SET] != replica_set: 50 | raise Exception("add shard failed for: %s %s %s. shard exists : %s" % 51 | (service_name, namespace_name, shard_name, str(shard))) 52 | 53 | def add_new_service(self, service_name): 54 | """ add new service and default name space """ 55 | if self.updated_config_dict[self.SERVICES].get(service_name) is None: 56 | self.updated_config_dict[self.SERVICES][service_name] = { 57 | self.NAMESPACES: {'': {self.SHARDS: {}}}} 58 | else: 59 | raise Exception("add service name failed for {}".format( 60 | service_name)) 61 | 62 | def add_new_namespace(self, service_name, namespace_name): 63 | """ add new namespace to existing service """ 64 | try: 65 | service = self.updated_config_dict[self.SERVICES][service_name] 66 | except KeyError as e: 67 | print 'Service name {} is not in ZK mapping yet'.format(e) 68 | raise Exception('service name does not exist') 69 | namespace = service[self.NAMESPACES].get(namespace_name) 70 | if namespace is None: 71 | service[self.NAMESPACES][namespace_name] = {self.SHARDS: {}} 72 | else: 73 | raise Exception("adding new name space failed for {}".format(namespace_name)) 74 | 75 | def check_shard(self, service_name, namespace_name, shard_name, shard): 76 | "checks if shard contents match what is expected" 77 | service = self.updated_config_dict[self.SERVICES][service_name] 78 | namespace = service[self.NAMESPACES][namespace_name] 79 | return namespace[self.SHARDS].get(shard_name) == shard 80 | 81 | def push_config_with_validation(self): 82 | problem_replica_sets = get_problem_replicasets(None, 83 | self.updated_config_dict[self.SERVICES]) 84 | if problem_replica_sets: 85 | raise Exception('Problem in shard mapping {}'.format(problem_replica_sets)) 86 | self.push_config() 87 | -------------------------------------------------------------------------------- /mysql_shard_mappings.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | from lib import host_utils 4 | 5 | DEFAULT = 'default' 6 | OUTPUT_FORMAT = ('{replica_set:&2 11 | exit 1 12 | } 13 | end script 14 | 15 | script 16 | exec start-stop-daemon --start --exec /usr/bin/pt-heartbeat -- --config /etc/pt-heartbeat-3306.conf --create-table --update 17 | end script 18 | -------------------------------------------------------------------------------- /pt-kill.conf: -------------------------------------------------------------------------------- 1 | description "Percona MySQL pt-kill tool" 2 | 3 | start on filesystem and net-device-up IFACE=lo 4 | respawn limit 15 5 5 | 6 | script 7 | exec start-stop-daemon --start --exec /usr/bin/pt-kill -- --config /etc/pt-kill.conf >> /var/log/pt-kill.log 2>&1 8 | end script 9 | -------------------------------------------------------------------------------- /restart_daemons.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import argparse 4 | import logging 5 | import psutil 6 | from lib import mysql_lib 7 | from lib import host_utils 8 | from lib import environment_specific 9 | 10 | import mysql_cnf_builder 11 | 12 | log = logging.getLogger(__name__) 13 | 14 | 15 | def restart_maxwell_if_not_exists(instance): 16 | """ Start Maxwell if it isn't currently running. 17 | Args: 18 | instance: (host_utils.HostAddr): host to check 19 | Returns: 20 | none 21 | """ 22 | zk = host_utils.MysqlZookeeper() 23 | replica_type = zk.get_replica_type_from_instance(instance) 24 | gvars = mysql_lib.get_global_variables(instance) 25 | 26 | client_id = gvars['server_uuid'] 27 | gtid_mode = True if gvars.get('gtid_mode') == 'ON' else False 28 | (username, _) = mysql_lib.get_mysql_user_for_role('maxwell') 29 | 30 | output_target = 'file' 31 | 32 | # master writes to kafka, everything else writes to /dev/null, 33 | # at least for now. 34 | if instance.hostname_prefix in environment_specific.MAXWELL_TARGET_MAP \ 35 | and replica_type == host_utils.REPLICA_ROLE_MASTER: 36 | output_target = 'kafka' 37 | 38 | # we need to rewrite the config each time, because something may 39 | # have changed - i.e., a failover. this is just a stopgap solution 40 | # pending resolution of LP-809 41 | mysql_cnf_builder.create_maxwell_config(client_id, instance, 42 | None, output_target, 43 | gtid_mode) 44 | 45 | # Check for the Maxwell PID file and then see if it belongs to Maxwell. 46 | maxwell_running = False 47 | try: 48 | with open(environment_specific.MAXWELL_PID, "r") as f: 49 | pid = f.read() 50 | 51 | proc = psutil.Process(int(pid)) 52 | cmdline = proc.cmdline() 53 | 54 | if 'java' in cmdline and 'com.zendesk.maxwell.Maxwell' in cmdline: 55 | maxwell_running = True 56 | 57 | except (IOError, psutil.NoSuchProcess, psutil.ZombieProcess): 58 | # No PID file or no process matching said PID, so maxwell is definitely 59 | # not running. If maxwell is a zombie then it's not running either. 60 | pass 61 | 62 | if maxwell_running: 63 | log.debug('Maxwell is already running') 64 | return 65 | 66 | if instance.hostname_prefix in environment_specific.MAXWELL_TARGET_MAP: 67 | host_utils.manage_maxwell(instance.port) 68 | log.info('Started Maxwell process') 69 | 70 | 71 | def restart_pt_kill_if_not_exists(instance): 72 | """ 73 | Restarts ptkill if it isn't currently running 74 | 75 | Args: 76 | instance (host_utils.HostAddr): host to check for ptkill 77 | 78 | Returns: 79 | None 80 | """ 81 | connected_users = mysql_lib.get_connected_users(instance) 82 | ptkill_user, ptkill_pass = mysql_lib.get_mysql_user_for_role('ptkill') 83 | if ptkill_user not in connected_users: 84 | host_utils.manage_pt_kill(instance.port) 85 | log.info('Started Processes ptkill') 86 | 87 | 88 | def manage_pt_heartbeat(instance): 89 | """ 90 | Restarts ptheartbeat if it isn't currently running and the 91 | replica role type is master, or stop it if it is running on 92 | a non-master. 93 | 94 | Args: 95 | instance (host_utils.HostAddr): host to check for ptheartbeat 96 | 97 | Returns: 98 | None 99 | """ 100 | connected_users = mysql_lib.get_connected_users(instance) 101 | zk = host_utils.MysqlZookeeper() 102 | try: 103 | replica_type = zk.get_replica_type_from_instance(instance) 104 | except: 105 | replica_type = None 106 | pthb_user, pthb_pass = mysql_lib.get_mysql_user_for_role('ptheartbeat') 107 | if replica_type == host_utils.REPLICA_ROLE_MASTER and \ 108 | pthb_user not in connected_users: 109 | host_utils.manage_pt_heartbeat(instance.port) 110 | log.info('Started process pt-heartbeat') 111 | elif replica_type != host_utils.REPLICA_ROLE_MASTER and \ 112 | pthb_user in connected_users: 113 | host_utils.manage_pt_heartbeat(instance.port, action='stop') 114 | log.info('Stopped pt-heartbeat on non-master replica') 115 | 116 | 117 | def main(): 118 | parser = argparse.ArgumentParser( 119 | description='Restarts ptkill and ptheartbeat ' 120 | 'if they aren\'t running under ' 121 | 'the right conditions' 122 | ) 123 | parser.add_argument('action', 124 | help='Action to take', 125 | default='all', 126 | nargs='?', 127 | choices=['ptkill', 'ptheartbeat', 'maxwell', 'all']) 128 | args = parser.parse_args() 129 | 130 | instance = host_utils.HostAddr(host_utils.HOSTNAME) 131 | 132 | if args.action == 'all' or args.action == 'ptkill': 133 | restart_pt_kill_if_not_exists(instance) 134 | 135 | if args.action == 'all' or args.action == 'ptheartbeat': 136 | manage_pt_heartbeat(instance) 137 | 138 | if args.action == 'all' or args.action == 'maxwell': 139 | restart_maxwell_if_not_exists(instance) 140 | 141 | 142 | if __name__ == "__main__": 143 | environment_specific.initialize_logger() 144 | main() 145 | -------------------------------------------------------------------------------- /safe_uploader.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import multiprocessing 3 | import os 4 | import subprocess 5 | import tempfile 6 | import time 7 | import urllib 8 | 9 | import boto 10 | import psutil 11 | 12 | PROGRESS_PROC = 'progress' 13 | PV = ['/usr/bin/pv', '-peafbt'] 14 | PYTHON = 'python' 15 | REPEATER_PROC = 'repeater' 16 | REPEATER_SCRIPT = 'safe_uploader_repeater.py' 17 | S3_SCRIPT = '/usr/local/bin/gof3r' 18 | SLEEP_TIME = .25 19 | TERM_STRING = 'TIME_TO_DIE' 20 | UPLOADER_PROC = 'uploader' 21 | 22 | log = logging.getLogger(__name__) 23 | 24 | 25 | def safe_upload(precursor_procs, stdin, bucket, key, 26 | check_func=None, check_arg=None, verbose=False): 27 | """ For sures, safely upload a file to s3 28 | 29 | Args: 30 | precursor_procs - A dict of procs that will be monitored 31 | stdin - The stdout from the last proc in precursor_procs that will be 32 | uploaded 33 | bucket - The s3 bucket where we should upload the data 34 | key - The name of the key which will be the destination of the data 35 | check_func - An optional function that if supplied will be run after all 36 | procs in precursor_procs have finished. If the uploader should 37 | abort, then an exception should be thrown in the function. 38 | check_args - The arguments to supply to the check_func 39 | verbose - If True, display upload speeds statistics and destination 40 | """ 41 | upload_procs = dict() 42 | devnull = open(os.devnull, 'w') 43 | term_path = None 44 | try: 45 | term_path = get_term_file() 46 | if verbose: 47 | log.info('Uploading to s3://{buk}/{key}'.format(buk=bucket, 48 | key=key)) 49 | upload_procs[PROGRESS_PROC] = subprocess.Popen( 50 | PV, 51 | stdin=stdin, 52 | stdout=subprocess.PIPE) 53 | stdin = upload_procs[PROGRESS_PROC].stdout 54 | upload_procs[REPEATER_PROC] = subprocess.Popen( 55 | [PYTHON, get_exec_path(), term_path], 56 | stdin=stdin, 57 | stdout=subprocess.PIPE) 58 | upload_procs[UPLOADER_PROC] = subprocess.Popen( 59 | [S3_SCRIPT, 60 | 'put', 61 | '-k', urllib.quote_plus(key), 62 | '-b', bucket], 63 | stdin=upload_procs[REPEATER_PROC].stdout, 64 | stderr=devnull) 65 | 66 | # While the precursor procs are running, we need to make sure 67 | # none of them have errors and also check that the upload procs 68 | # also don't have errors. 69 | while not check_dict_of_procs(precursor_procs): 70 | check_dict_of_procs(upload_procs) 71 | time.sleep(SLEEP_TIME) 72 | 73 | # Once the precursor procs have exited successfully, we will run 74 | # any defined check function 75 | if check_func: 76 | check_func(check_arg) 77 | 78 | # And then create the term file which will cause the repeater and 79 | # uploader to exit 80 | write_term_file(term_path) 81 | 82 | # And finally we will wait for the uploader procs to exit without error 83 | while not check_dict_of_procs(upload_procs): 84 | time.sleep(SLEEP_TIME) 85 | except: 86 | clean_up_procs(upload_procs, precursor_procs) 87 | raise 88 | finally: 89 | if term_path: 90 | os.remove(term_path) 91 | 92 | # Assuming all went well, return the new S3 key. 93 | conn = boto.connect_s3() 94 | bucket_conn = conn.get_bucket(bucket, validate=False) 95 | return bucket_conn.get_key(key) 96 | 97 | 98 | def get_exec_path(): 99 | """ Get the path to this executable 100 | 101 | Returns: 102 | the path as a string of this script 103 | """ 104 | path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 105 | REPEATER_SCRIPT) 106 | return path 107 | 108 | 109 | def get_term_file(): 110 | """ Get a path to a file which can be used to communicate 111 | 112 | Returns 113 | a path to a file created by tempfile.mkstemp 114 | """ 115 | (handle, path) = tempfile.mkstemp() 116 | os.close(handle) 117 | return path 118 | 119 | 120 | def write_term_file(term_path): 121 | """ Create the termination file 122 | 123 | Args: 124 | term_path - Where to write the magic string to terminate the repeater 125 | """ 126 | with open(term_path, 'w') as term_handle: 127 | term_handle.write(TERM_STRING) 128 | 129 | def try_kill(proc): 130 | """ Try to kill a process 131 | 132 | Args: 133 | proc - A process created by subprocess.Popen 134 | """ 135 | if not psutil.pid_exists(proc.pid): 136 | return 137 | 138 | try: 139 | proc.kill() 140 | proc.wait() 141 | except: 142 | pass 143 | 144 | 145 | def check_dict_of_procs(proc_dict): 146 | """ Check a dict of process for exit, error, etc... 147 | 148 | Args: 149 | A dict of processes 150 | 151 | Returns: True if all processes have completed with return status 0 152 | False is some processes are still running 153 | An exception is generated if any processes have completed with a 154 | returns status other than 0 155 | """ 156 | success = True 157 | for proc in proc_dict: 158 | ret = proc_dict[proc].poll() 159 | if ret is None: 160 | # process has not yet terminated 161 | success = False 162 | elif ret != 0: 163 | if multiprocessing.current_process().name != 'MainProcess': 164 | proc_id = '{}: '.format(multiprocessing.current_process().name) 165 | else: 166 | proc_id = '' 167 | 168 | raise Exception('{proc_id}{proc} encountered an error' 169 | ''.format(proc_id=proc_id, 170 | proc=proc)) 171 | return success 172 | 173 | 174 | def clean_up_procs(upload_procs, precursor_procs): 175 | """ Clean up the pipeline procs in a safe order 176 | 177 | Args: 178 | upload_procs - A dictionary of procs used for the upload 179 | precursor_procs - A dictionary of procs that feed the uploader 180 | """ 181 | # So there has been some sort of a problem. We want to make sure that 182 | # we kill the uploader so that under no circumstances the upload is 183 | # successfull with bad data 184 | if UPLOADER_PROC in upload_procs: 185 | try_kill(upload_procs[UPLOADER_PROC]) 186 | del upload_procs[UPLOADER_PROC] 187 | 188 | # Next the repeater and the pv (if applicable) 189 | for proc in upload_procs: 190 | try_kill(upload_procs[proc]) 191 | 192 | # And finally whatever is feeding the uploader 193 | for proc in precursor_procs: 194 | try_kill(precursor_procs[proc]) 195 | -------------------------------------------------------------------------------- /safe_uploader_repeater.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ This module will is a helper to safe_uploader and should not be called 3 | directly. 4 | """ 5 | import argparse 6 | import os 7 | import re 8 | import subprocess 9 | import sys 10 | import time 11 | 12 | import safe_uploader 13 | 14 | 15 | BLOCK_SIZE = 262144 16 | INIT_PID = 1 17 | SLEEP_TIME = .25 18 | STDIN = 0 19 | STDOUT = 1 20 | 21 | 22 | def main(): 23 | parser = argparse.ArgumentParser() 24 | parser.add_argument("terminate_path", 25 | help='When a file appears at this path, exit') 26 | args = parser.parse_args() 27 | repeater(args.terminate_path) 28 | 29 | 30 | def repeater(terminate_path): 31 | """ Repeat data from stdin to stdout until no more data is present comes 32 | from stdin AND a stop file is populated with a magic value. 33 | 34 | Args: 35 | terminate_path - The path of the stop file 36 | """ 37 | while True: 38 | if os.getppid() == INIT_PID: 39 | kill_stdout_reader() 40 | raise Exception('Safe uploader proc is now somehow the child of ' 41 | 'proc 1. This means that that parent of the ' 42 | 'repeater process no longer is no longer in ' 43 | 'control. Lacking any good option, the repeater ' 44 | 'process will terminate.') 45 | 46 | data = sys.stdin.read(BLOCK_SIZE) 47 | if len(data) == 0: 48 | time.sleep(SLEEP_TIME) 49 | 50 | # write empty data to detect broken pipes 51 | sys.stdout.write(data) 52 | 53 | if os.path.exists(terminate_path): 54 | if check_term_file(terminate_path): 55 | sys.exit(0) 56 | else: 57 | sys.stdout.write(data) 58 | 59 | if len(data) < BLOCK_SIZE: 60 | sys.stdout.flush() 61 | 62 | 63 | def check_term_file(term_path): 64 | """ Check to see if a term file has been populated with a magic string 65 | meaning that the repeater code can terminate 66 | 67 | Returns 68 | True if the file has been populated, false otherwise 69 | """ 70 | with open(term_path, 'r') as term_handle: 71 | contents = term_handle.read(len(safe_uploader.TERM_STRING)) 72 | return contents == safe_uploader.TERM_STRING 73 | 74 | 75 | def kill_stdout_reader(): 76 | """ Kill whatever is on the otherside of stdout """ 77 | std_out_fd = '/proc/{pid}/fd/{stdout}'.format(pid=os.getpid(), 78 | stdout=STDOUT) 79 | readlink = os.readlink(std_out_fd) 80 | pipe_node = re.match('pipe:\[([0-9]+)]', readlink).groups()[0] 81 | cmd = ("lsof | " 82 | "awk '{{if($4 == \"{stdin}r\" && $8 == {pipe_node}) print $2}}'" 83 | "".format(stdin=str(STDIN), 84 | pipe_node=pipe_node)) 85 | lsof = subprocess.Popen(cmd, shell=True, 86 | stdout=subprocess.PIPE) 87 | lsof.wait() 88 | stdout_reader_pid = int(lsof.stdout.read()) 89 | try: 90 | os.kill(stdout_reader_pid, 9) 91 | except: 92 | pass 93 | # Nothing really to be done here, it is probalby hopeless to try 94 | # to do anything more. 95 | 96 | 97 | if __name__ == "__main__": 98 | main() 99 | -------------------------------------------------------------------------------- /schema_verifier.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import hashlib 4 | import sys 5 | import difflib 6 | 7 | from lib import environment_specific 8 | from lib import host_utils 9 | from lib import mysql_lib 10 | 11 | 12 | def main(): 13 | zk = host_utils.MysqlZookeeper() 14 | all_sharded_systems = (list(zk.get_sharded_types()) + 15 | environment_specific.FLEXSHARD_DBS.keys()) 16 | parser = argparse.ArgumentParser(description='MySQL schema verifier') 17 | parser.add_argument('instance_type', 18 | help='Type of MySQL instance to verify', 19 | choices=all_sharded_systems) 20 | parser.add_argument('table', 21 | help='Table to check',) 22 | parser.add_argument('seed_instance', 23 | help=('Which host from which to fetch a table ' 24 | ' definition. (format hostname[:port])'),) 25 | parser.add_argument('seed_db', 26 | help=('Which db on --seed_instance from which to fetch' 27 | ' a table definition. (ex pbdata012345)')) 28 | args = parser.parse_args() 29 | seed_instance = host_utils.HostAddr(args.seed_instance) 30 | desired = mysql_lib.show_create_table(seed_instance, args.seed_db, args.table) 31 | tbl_hash = hashlib.md5(desired).hexdigest() 32 | print ("Desired table definition:\n{desired}").format(desired=desired) 33 | incorrect = check_schema(args.instance_type, args.table, tbl_hash) 34 | if len(incorrect) == 0: 35 | print "It appears that all schema is synced" 36 | sys.exit(0) 37 | 38 | d = difflib.Differ() 39 | for problem in incorrect.iteritems(): 40 | represenative = list(problem[1])[0].split(' ') 41 | hostaddr = host_utils.HostAddr(represenative[0]) 42 | create = mysql_lib.show_create_table(hostaddr, 43 | represenative[1], 44 | args.table) 45 | diff = d.compare(desired.splitlines(), create.splitlines()) 46 | print 'The following difference has been found:' 47 | print '\n'.join(diff) 48 | print "It is present on the following db's:" 49 | print '\n'.join(list(problem[1])) 50 | sys.exit(1) 51 | 52 | 53 | def check_schema(instance_type, tablename, tbl_hash): 54 | """Verify that a table across an entire tier has the expected schema 55 | 56 | Args: 57 | zk_prefix - The prefix of the key ZK 58 | table - the name of the table to verify 59 | tbl_hash - the md5sum of the desired CREATE TABLE for the table 60 | 61 | Returns: 62 | A dictionary with keys that are the hash of the CREATE TABLE statement 63 | and the values are sets of hostname:port followed by a space and then the 64 | db one which the incorrect schema was found. 65 | """ 66 | incorrect = dict() 67 | zk = host_utils.MysqlZookeeper() 68 | for replica_set in zk.get_replica_sets_by_shard_type(instance_type): 69 | 70 | for role in host_utils.REPLICA_TYPES: 71 | instance = zk.get_mysql_instance_from_replica_set(replica_set, role) 72 | hashes = check_instance_table(instance, tablename, tbl_hash) 73 | for entry in hashes.iteritems(): 74 | if entry[0] not in incorrect: 75 | incorrect[entry[0]] = set() 76 | incorrect[entry[0]] = incorrect[entry[0]].union(entry[1]) 77 | return incorrect 78 | 79 | 80 | def check_instance_table(hostaddr, table, desired_hash): 81 | """ Check that a table on a MySQL instance has the expected schema 82 | 83 | Args: 84 | hostaddr - object describing which mysql instance to connect to 85 | table - the name of the table to verify 86 | desired_hash - the md5sum of the desired CREATE TABLE for the table 87 | 88 | Returns: 89 | A dictionary with keys that are the hash of the CREATE TABLE statement 90 | and the values are sets of hostname:port followed by a space and then the 91 | db one which the incorrect schema was found. 92 | """ 93 | ret = dict() 94 | for db in mysql_lib.get_dbs(hostaddr): 95 | definition = mysql_lib.show_create_table(hostaddr, db, table) 96 | tbl_hash = hashlib.md5(definition).hexdigest() 97 | if tbl_hash != desired_hash: 98 | if tbl_hash not in ret: 99 | ret[tbl_hash] = set() 100 | ret[tbl_hash].add(''.join((hostaddr.__str__(), 101 | ' ', 102 | db))) 103 | return ret 104 | 105 | if __name__ == "__main__": 106 | main() 107 | -------------------------------------------------------------------------------- /start_shard_migration.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import argparse 3 | import logging 4 | import time 5 | import uuid 6 | 7 | from lib import mysql_lib 8 | from lib import host_utils 9 | from lib import backup 10 | from lib import environment_specific 11 | import find_shard_mismatches 12 | import mysql_record_table_size 13 | import mysql_restore 14 | 15 | # Do not allow migrate so the destination would be above DISK_LIMIT 16 | # percent full 17 | DISK_LIMIT = .65 18 | # Terrible escaping... Desired outcome is of the form (wrapped for clarity) 19 | # ssh root@testmodsharddb-1-86 "df /raid0/mysql/3306/data/ | 20 | # awk \"{print (0.65*\\\$2 - \\\$3)/1024}\" | tail -n1" 21 | 22 | MIGRATION_SPACE_CMD = ('ssh root@{hostname} "df {datadir} | awk \\\"{{print ' 23 | '({disk_limit}*\\\\\$2 - \\\\\$3)/1024}}\\\" | ' 24 | 'tail -n1"') 25 | STATUS_ABORTED = 'ABORTED' 26 | STATUS_EXPORT_FAILED = 'Export failed' 27 | STATUS_FAILOVER_READY = 'Ready for failover' 28 | STATUS_FINISHED = 'Finished' 29 | STATUS_IMPORTING = 'Importing' 30 | log = logging.getLogger(__name__) 31 | 32 | def main(): 33 | parser = argparse.ArgumentParser() 34 | parser.add_argument('source_replica_set', 35 | help='Which replica set to move shards *FROM*') 36 | parser.add_argument('destination_replica_set', 37 | help='Which replica set to move shards *TO*') 38 | parser.add_argument('databases', 39 | help=('Which databases to move from source_replica_set' 40 | 'to destination_replica_set'), 41 | nargs='+') 42 | parser.add_argument('--dry_run', 43 | help=('Do not actually run a migration, just run ' 44 | 'safety checks, etc...'), 45 | default=False, 46 | action='store_true') 47 | args = parser.parse_args() 48 | 49 | start_shard_migration(args.source_replica_set, 50 | args.destination_replica_set, 51 | set(args.databases)) 52 | 53 | 54 | def start_shard_migration(source_replica_set, destination_replica_set, 55 | mig_dbs): 56 | """ Move shards from one replica set to another 57 | 58 | Args: 59 | source_replica_set - Which replica set to take the shards from 60 | destination_replica_set - Which replica set to put the shards on 61 | mig_dbs - A set of databases to be migrated 62 | """ 63 | # In 2017Q1 shardb and modsharddb will learn how to deal with shard 64 | # migrations. We will block them for now. 65 | if source_replica_set.startswith('db') or \ 66 | source_replica_set.startswith('moddb'): 67 | raise Exception('Sharddb and modsharddb migrations are not yet ' 68 | 'supported') 69 | 70 | if source_replica_set == destination_replica_set: 71 | raise Exception('Source and destination can not be the same!') 72 | # Dealing with failures, potentially due to failovers seems scary 73 | # here. We are intentionally not catching exception as this seems racy 74 | # and it would be far better for the entire process to fail than to mess 75 | # with replication during a failover. 76 | log.info('Requested to migrate from {s} to {d} databases: {db}' 77 | ''.format(s=source_replica_set, 78 | d=destination_replica_set, 79 | db=', '.join(mig_dbs))) 80 | 81 | zk = host_utils.MysqlZookeeper() 82 | source_master = zk.get_mysql_instance_from_replica_set(source_replica_set) 83 | source_slave = zk.get_mysql_instance_from_replica_set( 84 | source_replica_set, host_utils.REPLICA_ROLE_DR_SLAVE) 85 | 86 | if not source_slave: 87 | source_slave = zk.get_mysql_instance_from_replica_set( 88 | source_replica_set, host_utils.REPLICA_ROLE_SLAVE) 89 | log.info('Source host for dumping data {}'.format(source_slave)) 90 | destination_master = zk.get_mysql_instance_from_replica_set( 91 | destination_replica_set) 92 | log.info('Destination host for restoring data {}' 93 | ''.format(destination_master)) 94 | 95 | expected_dbs_on_source = zk.get_sharded_dbs_by_replica_set()[source_replica_set] 96 | non_mig_dbs = mysql_lib.get_dbs(source_slave).difference(mig_dbs) 97 | unexpected_dbs = mig_dbs.difference(expected_dbs_on_source) 98 | if unexpected_dbs: 99 | raise Exception('Unexpected database supplied for migraton: {}' 100 | ''.format(unexpected_dbs)) 101 | 102 | # Make sure there are no missing or extra shards 103 | precheck_schema(source_master) 104 | precheck_schema(destination_master) 105 | 106 | # Check disk space 107 | required_disk_space = get_required_disk_space(mig_dbs, source_master) 108 | available_disk_space = disk_space_available_for_migration(destination_master) 109 | if available_disk_space < required_disk_space: 110 | raise Exception('Insufficent disk space to migrate, ' 111 | 'available {a}MB, ' 112 | 'requred {r}MB' 113 | ''.format(a=available_disk_space, 114 | r=required_disk_space)) 115 | else: 116 | log.info('Disk space looks ok: ' 117 | 'available {a}MB, ' 118 | 'requred {r}MB' 119 | ''.format(a=available_disk_space, 120 | r=required_disk_space)) 121 | 122 | # Let's take out a lock to make sure we don't have multiple migrations 123 | # running on the same replica sets (either source or destination). 124 | lock_id = take_migration_lock(source_replica_set, destination_replica_set, 125 | mig_dbs, non_mig_dbs) 126 | try: 127 | if(non_mig_dbs): 128 | # First we will dump the schema for the shards that are not moving 129 | log.info('Backing up non-migrating schema: {}'.format(non_mig_dbs)) 130 | no_mig_backup = backup.logical_backup_instance( 131 | source_slave, time.localtime(), 132 | blackhole=True, databases=non_mig_dbs) 133 | 134 | time.sleep(1) 135 | # And next the metadata db 136 | log.info('Backing up metadata db: {}'.format(mysql_lib.METADATA_DB)) 137 | metadata_backup = backup.logical_backup_instance( 138 | source_slave, time.localtime(), 139 | databases=[mysql_lib.METADATA_DB]) 140 | 141 | time.sleep(1) 142 | # Next we will backup the data for the shards that are moving 143 | log.info('Backing up migrating schema data: {}'.format(mig_dbs)) 144 | mig_backup = backup.logical_backup_instance( 145 | source_slave, time.localtime(), 146 | databases=mig_dbs) 147 | except: 148 | finish_migration_log(lock_id, STATUS_EXPORT_FAILED) 149 | raise 150 | 151 | if(non_mig_dbs): 152 | # Finally import the backups 153 | log.info('Importing all the blackhole tables') 154 | mysql_restore.logical_restore(no_mig_backup, destination_master) 155 | 156 | log.info('Import metadata') 157 | mysql_restore.logical_restore(metadata_backup, destination_master) 158 | 159 | log.info('Setting up replication') 160 | mysql_lib.change_master(destination_master, source_master, 161 | 'BOGUS', 0, no_start=True, skip_set_readonly=True, 162 | gtid_auto_pos=False) 163 | mysql_restore.logical_restore(mig_backup, destination_master) 164 | 165 | # add start slave, catchup 166 | mysql_lib.start_replication(destination_master) 167 | mysql_lib.wait_for_catch_up(destination_master, migration=True) 168 | 169 | # And update the log/locks 170 | update_migration_status(lock_id, STATUS_FAILOVER_READY) 171 | log.info('The migration is ready to be finished by running:') 172 | log.info('/usr/local/bin/mysql_utils/finish_shard_migration.py {src}' 173 | ''.format(src=source_replica_set)) 174 | 175 | 176 | def take_migration_lock(source_replica_set, destination_replica_set, 177 | mig_dbs, non_mig_dbs): 178 | """ Take a migration lock to ensure no other migration are run concurrenly 179 | 180 | Args: 181 | source_replica_set - Which replica set to take the shards from 182 | destination_replica_set - Which replica set to put the shards on 183 | mig_dbs - The names of the databases which map to the shards which 184 | are being migrated 185 | non_mig_dbs - The names of the databases which are created with blackhole 186 | tables for replication to function. 187 | 188 | Returns: a lock identifier 189 | """ 190 | conn = mysql_lib.get_mysqlops_connections() 191 | cursor = conn.cursor() 192 | lock_identifier = str(uuid.uuid4()) 193 | log.info('Migration lock identifier is {}'.format(lock_identifier)) 194 | 195 | log.info('Checking existing locks') 196 | existing_lock = check_migration_lock(source_replica_set) 197 | if not existing_lock: 198 | existing_lock = check_migration_lock(destination_replica_set) 199 | if existing_lock: 200 | log.error('Lock is already held by {}'.format(existing_lock)) 201 | log.error('You can abort this migration by running:') 202 | log.error('/usr/local/bin/mysql_utils/clean_up_unfinished_migration.py {}' 203 | ''.format(existing_lock['source_replica_set'])) 204 | raise Exception('Can not take migration lock') 205 | 206 | params = {'lock': lock_identifier, 207 | 'source_replica_set': source_replica_set, 208 | 'destination_replica_set': destination_replica_set, 209 | 'mig_dbs': ', '.join(mig_dbs), 210 | 'non_mig_dbs': ', '.join(non_mig_dbs), 211 | 'status': STATUS_IMPORTING} 212 | 213 | # Todo: turn on locking checking, swich to INSERT 214 | sql = ("INSERT INTO mysqlops.mysql_migration_locks " 215 | "SET " 216 | "lock_identifier = %(lock)s, " 217 | "lock_active = 'active', " 218 | "created_at = NOW(), " 219 | "released = NULL, " 220 | "source_replica_set = %(source_replica_set)s, " 221 | "destination_replica_set = %(destination_replica_set)s, " 222 | "mig_databases = %(mig_dbs)s, " 223 | "non_mig_databases = %(non_mig_dbs)s, " 224 | "status = %(status)s ") 225 | cursor.execute(sql, params) 226 | conn.commit() 227 | log.info(cursor._executed) 228 | return lock_identifier 229 | 230 | 231 | def update_migration_status(lock_identifier, status): 232 | """ Update the migration lock table 233 | 234 | Args: 235 | lock_identifier - a lock id as returned by take_migration_lock 236 | status - The new status 237 | """ 238 | conn = mysql_lib.get_mysqlops_connections() 239 | cursor = conn.cursor() 240 | 241 | params = {'lock': lock_identifier, 242 | 'status': status} 243 | sql = ("UPDATE mysqlops.mysql_migration_locks " 244 | "SET " 245 | "status = %(status)s " 246 | "WHERE " 247 | "lock_identifier = %(lock)s ") 248 | cursor = conn.cursor() 249 | cursor.execute(sql, params) 250 | conn.commit() 251 | log.info(cursor._executed) 252 | 253 | def finish_migration_log(lock_identifier, status): 254 | """ Update the migration lock table and release the lock 255 | 256 | Args: 257 | lock_identifier - a lock id as returned by take_migration_lock 258 | status - The new status 259 | """ 260 | conn = mysql_lib.get_mysqlops_connections() 261 | cursor = conn.cursor() 262 | 263 | params = {'lock': lock_identifier, 264 | 'status': status} 265 | sql = ("UPDATE mysqlops.mysql_migration_locks " 266 | "SET " 267 | "status = %(status)s, " 268 | "lock_active = NULL " 269 | "WHERE " 270 | "lock_identifier = %(lock)s ") 271 | cursor = conn.cursor() 272 | cursor.execute(sql, params) 273 | conn.commit() 274 | log.info(cursor._executed) 275 | 276 | 277 | def check_migration_lock(replica_set): 278 | """ Confirm there are no active locks that would block taking a 279 | migration lock 280 | 281 | Args: 282 | replica_set - A name of a replica set 283 | """ 284 | conn = mysql_lib.get_mysqlops_connections() 285 | cursor = conn.cursor() 286 | params = {'replica_set': replica_set} 287 | sql = ('SELECT lock_identifier, ' 288 | ' source_replica_set, ' 289 | ' destination_replica_set, ' 290 | ' mig_databases, ' 291 | ' non_mig_databases, ' 292 | ' status ' 293 | 'FROM mysqlops.mysql_migration_locks ' 294 | "WHERE lock_active = 'active' AND " 295 | "( source_replica_set = %(replica_set)s OR" 296 | " destination_replica_set = %(replica_set)s )") 297 | cursor.execute(sql, params) 298 | row = cursor.fetchone() 299 | log.info(cursor._executed) 300 | return row 301 | 302 | 303 | def precheck_schema(instance): 304 | """ Make sure the existing state is sane 305 | 306 | Args: 307 | instance - a hostAddr instance 308 | """ 309 | orphaned, orphaned_but_used, missing = \ 310 | find_shard_mismatches.find_shard_mismatches(instance) 311 | if (orphaned or orphaned_but_used): 312 | raise Exception('Unexpected shards are on {inst}. You can try to ' 313 | 'clean them up using: ' 314 | '/usr/local/bin/mysql_utils/fix_orphaned_shards.py ' 315 | '-a rename -i {inst}' 316 | ''.format(inst=instance)) 317 | if missing: 318 | raise Exception('Shards are missing on {}. This is really weird ' 319 | 'and needs to be debugged'.format(instance)) 320 | 321 | 322 | def disk_space_available_for_migration(instance): 323 | """ Check the disk space available for migrations on the data dir mount 324 | 325 | Args: 326 | instance - A hostaddr object 327 | 328 | Returns: The number of MB available 329 | """ 330 | datadir = mysql_lib.get_global_variables(instance)['datadir'] 331 | cmd = MIGRATION_SPACE_CMD.format(hostname=instance.hostname, 332 | datadir=datadir, 333 | disk_limit=DISK_LIMIT) 334 | log.info(cmd) 335 | out, err, ret = host_utils.shell_exec(cmd) 336 | return float(out.strip()) 337 | 338 | 339 | def get_required_disk_space(databases, instance): 340 | """ Determine how much disk space is needed for a migration 341 | 342 | Args: 343 | databases - A list of databases to be migrated 344 | instance - A hostaddr object 345 | 346 | Returns - The number of MB needed for the migration 347 | """ 348 | required_disk_space = 0 349 | for db in databases: 350 | try: 351 | required_disk_space += mysql_record_table_size.get_db_size_from_log( 352 | instance, db) 353 | except: 354 | log.info('Exact table size is unavailable for {}, using estimate' 355 | ''.format(db)) 356 | required_disk_space += mysql_lib.get_approx_schema_size(instance, db) 357 | return required_disk_space 358 | 359 | 360 | if __name__ == "__main__": 361 | environment_specific.initialize_logger() 362 | main() 363 | -------------------------------------------------------------------------------- /zdict_gen/README.md: -------------------------------------------------------------------------------- 1 | # Compression lookback dictionary creation tools 2 | 3 | ## Tools 4 | - **zdict_freqs.py** 5 | This tool will accepts two files as arguments. These files should be populated 6 | with example data, with each line having a separate instance of data to be 7 | compared. The script will read through each files and compare the data on the 8 | first line of the first file to the first line of the second file, the second 9 | line of the first file to the second line of the second and so on. 10 | 11 | When the script is finished, it will output a JSON encoded dictionary of 12 | common substrings. 13 | 14 | - **zdict_gen.py** 15 | This tool will accept a file with the output from zdict_freqs.py and an 16 | optional --size argument. The script will then construct a lookback compression 17 | dictionary limited to a size defined by the --size argument. 18 | 19 | -------------------------------------------------------------------------------- /zdict_gen/test_zdict_freqs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3.3 2 | 3 | import zdict_freqs as zdf 4 | import os 5 | import tempfile 6 | import unittest 7 | 8 | test_data1 = '''Lorem ipsum dolor sit amet, eu habitant ac odio integer ac 9 | odio. Cras dolor tempus ultrices, adipiscing commodi in, morbi sed, nec 10 | vestibulum urna. Vel scelerisque amet. Aliquet ridiculus, nec et diam nostra 11 | commodo. Blandit vitae quam maecenas. Rhoncus morbi mauris, faucibus massa 12 | velit sollicitudin sociosqu, in nec adipiscing. Dolor elit phasellus, suscipit 13 | porttitor euismod nunc, vel in ridiculus sem amet turpis massa. Vestibulum 14 | pulvinar consectetuer tortor lobortis, magna dictum libero egestas enim, 15 | lectus ullamcorper ultricies, ipsum cursus vel tempus, ut a in. Mauris ut arcu 16 | qui vestibulum duis, lacinia ultrices non sed, aut dolor nunc, ridiculus id, 17 | amet litora vel diam. Integer est sodales nec faucibus.''' 18 | 19 | test_data2 = '''Lorem ipsum dolor sit amet, eu habitant ac odio integer ac 20 | odio. Cras dolor tempus ultrices, adipiscing commodi in, morbi sed, nec 21 | urna. Vel scelerisque amet. Aliquet ridiculus, nec et diam nostra 22 | commodo. Blandit vitae quam maecenas. Rhoncus morbi mauris, faucibus massa 23 | velit sollici ''' 24 | 25 | test_fname1 = 'Pamplemousse' 26 | test_fname2 = 'LaCroix' 27 | 28 | # generate using http://www.miraclesalad.com/webtools/md5.php 29 | fname_md5 = '259dabbf4c050b3db874eb00d2a5dabb' 30 | 31 | 32 | class TestUpdatePinZDict(unittest.TestCase): 33 | 34 | def __init__(self, *args, **kwargs): 35 | super().__init__(*args, **kwargs) 36 | zdict_set = zdf.updatePinZDict(test_data1, test_data2) 37 | self.zdict_set_str = ''.join(zdict_set) 38 | 39 | def test_simple_match(self): 40 | """ 41 | Common word should be included 42 | """ 43 | self.assertTrue('amet' in self.zdict_set_str) 44 | 45 | def test_no_non_common_match(self): 46 | """ 47 | Word not common to both should not be included 48 | """ 49 | self.assertFalse('litora' in self.zdict_set_str) 50 | 51 | def test_no_self_match(self): 52 | """ 53 | Word should not be counted as matched if it is only present multiple 54 | times in its own dataset and not in the other 55 | """ 56 | self.assertFalse('vestibulum' in self.zdict_set_str) 57 | 58 | 59 | class TestGetMD5(unittest.TestCase): 60 | 61 | def test_correct_md5(self): 62 | """ 63 | MD5 hash method should match expected MD5 64 | """ 65 | self.assertEqual(fname_md5, zdf.getMD5(test_fname1, test_fname2)) 66 | 67 | 68 | class TestRestoreState(unittest.TestCase): 69 | 70 | def test_invalid_file_format(self): 71 | """ 72 | An input state file with invalid format should throw an Exception 73 | """ 74 | tf = tempfile.NamedTemporaryFile(mode='w+', delete=False) 75 | try: 76 | tf.write('{0}\n{1}\n{2}\nPin'.format(fname_md5, 4, "{ }")) 77 | tf.flush() 78 | tf.seek(0) 79 | self.assertRaises(Exception, zdf.restoreState, 80 | test_fname1, test_fname2, tf.name) 81 | finally: 82 | tf.close() 83 | os.remove(tf.name) 84 | 85 | def test_wrong_md5(self): 86 | """ 87 | An input state file with wrong MD5 hash should throw an Exception 88 | """ 89 | tf = tempfile.NamedTemporaryFile(mode='w+', delete=False) 90 | wrong_md5 = '2a4a2782c0782d65f7a9d2ff5fe7a638' # md5 of 'Wrong hash' 91 | try: 92 | tf.write('{0}\n{1}\n{2}'.format(wrong_md5, 4, "{ }")) 93 | tf.flush() 94 | tf.seek(0) 95 | self.assertRaises(Exception, zdf.restoreState, 96 | test_fname1, test_fname2, tf.name) 97 | finally: 98 | tf.close() 99 | os.remove(tf.name) 100 | 101 | def test_invalid_line_num(self): 102 | """ 103 | An input state file with an invalid line number should throw an 104 | Exception 105 | """ 106 | tf = tempfile.NamedTemporaryFile(mode='w+', delete=False) 107 | try: 108 | tf.write('{0}\n{1}\n{2}'.format(fname_md5, 'not a num', "{ }")) 109 | tf.flush() 110 | tf.seek(0) 111 | self.assertRaises(Exception, zdf.restoreState, 112 | test_fname1, test_fname2, tf.name) 113 | finally: 114 | tf.close() 115 | os.remove(tf.name) 116 | 117 | def test_invalid_json(self): 118 | """ 119 | An input state file with invalid json should throw an Exception 120 | """ 121 | tf = tempfile.NamedTemporaryFile(mode='w+', delete=False) 122 | try: 123 | tf.write('{0}\n{1}\n{2}'.format(fname_md5, '4', "{ )")) 124 | tf.flush() 125 | tf.seek(0) 126 | self.assertRaises(Exception, zdf.restoreState, 127 | test_fname1, test_fname2, tf.name) 128 | finally: 129 | tf.close() 130 | os.remove(tf.name) 131 | 132 | if __name__ == '__main__': 133 | unittest.main() 134 | -------------------------------------------------------------------------------- /zdict_gen/test_zdict_gen.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3.3 2 | 3 | import unittest 4 | import zdict_gen 5 | 6 | test_data1 = '''Lorem ipsum dolor sit amet, eu habitant ac odio integer ac 7 | odio. Cras dolor tempus ultrices, adipiscing commodi in, morbi sed, nec 8 | vestibulum urna. Vel scelerisque amet. Aliquet ridiculus, nec et diam nostra 9 | commodo. Blandit vitae quam maecenas. Rhoncus morbi mauris, faucibus massa 10 | velit sollicitudin sociosqu, in nec adipiscing. Dolor elit phasellus, suscipit 11 | porttitor euismod nunc, vel in ridiculus sem amet turpis massa. Vestibulum 12 | pulvinar consectetuer tortor lobortis, magna dictum libero egestas enim, 13 | lectus ullamcorper ultricies, ipsum cursus vel tempus, ut a in. Mauris ut arcu 14 | qui vestibulum duis, lacinia ultrices non sed, aut dolor nunc, ridiculus id, 15 | amet litora vel diam. Integer est sodales nec faucibus.''' 16 | 17 | test_data2 = '''Lorem ipsum dolor sit amet, eu habitant ac odio integer ac 18 | odio. Cras dolor tempus ultrices, adipiscing commodi in, morbi sed, nec 19 | urna. Vel scelerisque amet. Aliquet ridiculus, nec et diam nostra 20 | commodo. Blandit vitae quam maecenas. Rhoncus morbi mauris, faucibus massa 21 | velit sollici ''' 22 | 23 | 24 | class TestGenDictFromFreq(unittest.TestCase): 25 | 26 | def test_is_corrrect_order(self): 27 | """ 28 | Should correctly score and generate dictionary 29 | """ 30 | test_counts = {'corgi': 10, # score 50 31 | 'cat': 5, # score 15 32 | 'lizard': 6, # score 36 33 | 'gecko': 7, # score 35 34 | 'hamster': 4, # score 28 35 | 'parrot': 3} # score 18 36 | full_dict = zdict_gen.genDictFromFreq(test_counts, -1) 37 | expected_dict = 'catparrothamstergeckolizardcorgi' 38 | self.assertEqual(full_dict, expected_dict) 39 | 40 | def test_equal_to_size_b(self): 41 | """ 42 | Output of dictionary should be equal to size_b if size_b is valid and 43 | the unlimited dictionary would have been larger than size_b 44 | """ 45 | test_counts = {'corgi': 10, # score 50 46 | 'cat': 5, # score 15 47 | 'lizard': 6, # score 36 48 | 'gecko': 7, # score 35 49 | 'hamster': 4, # score 28 50 | 'parrot': 3} # score 18 51 | 52 | expected_dict = 'geckolizardcorgi' 53 | expected_size = len(expected_dict) 54 | small_dict = zdict_gen.genDictFromFreq(test_counts, expected_size) 55 | self.assertEqual(small_dict, expected_dict) 56 | 57 | def test_remove_substrings(self): 58 | """ 59 | Doesn't include substring words in the output dictionary 60 | """ 61 | test_counts = {'corgi': 10, # score 50 62 | 'corgicat': 5} # score 40 63 | 64 | expected_dict = 'corgicat' 65 | d = zdict_gen.genDictFromFreq(test_counts, -1) 66 | self.assertEqual(d, expected_dict) 67 | 68 | if __name__ == '__main__': 69 | unittest.main() 70 | -------------------------------------------------------------------------------- /zdict_gen/zdict_freqs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3.3 2 | 3 | from __future__ import print_function 4 | 5 | import argparse 6 | import json 7 | import multiprocessing 8 | import os 9 | import sys 10 | 11 | from collections import OrderedDict 12 | from hashlib import md5 13 | from itertools import islice 14 | 15 | ''' 16 | Zlib's implementation uses 262 bytes of overhead for pre-defined dictionary 17 | thus, the max zdict size is 32KB - 262B = 23506B 18 | ''' 19 | MAX_WINDOW_SIZE = 32506 # 32KB-262B, size of MAX_WBITS - zdict overhead 20 | MAX_LOOKAHEAD_BUFFER_SIZE = 258 # Maximum Match Length in zlib 21 | BATCHING_FACTOR = 3 # Number of times of cores to use as batching size 22 | DEFAULT_ZDICT_SIZE = 32506 # Length of predefined dictionary to generate 23 | 24 | 25 | def printProgress(iteration, total): 26 | """ 27 | Call in a loop to show terminal progress 28 | 29 | Args: 30 | iteration (int): current iteration 31 | total (int): total iterations 32 | 33 | Returns: 34 | None 35 | """ 36 | fraction = min(iteration / float(total), 1.0) 37 | percent = round(100.00 * fraction, 2) 38 | sys.stderr.write('\r%s%s' % (percent, '%')), 39 | sys.stderr.flush() 40 | if fraction == 1.0: 41 | sys.stderr.write('\n') 42 | sys.stderr.flush() 43 | 44 | 45 | def gotoLine(f, n): 46 | """ 47 | Sets the file object to start at nth line (0 indexed) 48 | 49 | Args: 50 | f_obj (file): file object 51 | n (int): nth line to jump to 52 | 53 | Returns: 54 | None 55 | """ 56 | f.seek(0) 57 | if n > 0: 58 | for _ in islice(f, n): 59 | pass 60 | 61 | 62 | def updatePinZDict(pin1, pin2): 63 | """ 64 | Finds common substrings between both pins represenations 65 | 66 | Args: 67 | pin1 (str): data to find common strings against pin2 68 | pin2 (str): data to find common strings against pin1 69 | 70 | Returns: 71 | set(str): Set of substrings that are used for lookback in LZ77 72 | among both pins bidirectionally 73 | """ 74 | pin1_dict_set = getSubstringSet(pin1, pin2) 75 | pin2_dict_set = getSubstringSet(pin2, pin1) 76 | pin_dict_set = pin1_dict_set.intersection(pin2_dict_set) 77 | return pin_dict_set 78 | 79 | 80 | def getSubstringSet(input_data, 81 | zdict, 82 | window_size=MAX_WINDOW_SIZE, 83 | buffer_size=MAX_LOOKAHEAD_BUFFER_SIZE): 84 | """ 85 | Finds longest matches for input_data using zdict as a predefined 86 | dictionary (pre-loaded window) 87 | 88 | Args: 89 | input_data (str): 90 | zdict (str): 91 | window_size (Optional[int]): lookback window size 92 | buffer_size (Optional[int]): lookahead buffer size 93 | 94 | Returns: 95 | set(str): Set of substrings that are used for lookback in LZ77 96 | """ 97 | data = zdict # pre-load window 98 | pos = len(zdict) # starting position after pre-defined window 99 | data += input_data 100 | zDict = set() 101 | 102 | while pos < len(data): 103 | match = findLongestMatch(data, pos, window_size, buffer_size) 104 | if match: 105 | (bestMatchLength, bestMatchStr) = match 106 | zDict.add(bestMatchStr) 107 | pos += bestMatchLength 108 | else: 109 | pos += 1 110 | return zDict 111 | 112 | 113 | def findLongestMatch(data, 114 | current_position, 115 | window_size=MAX_WINDOW_SIZE, 116 | buffer_size=MAX_LOOKAHEAD_BUFFER_SIZE): 117 | """ 118 | Finds the longest match to a substring starting at the current_position 119 | in the lookahead buffer from the history window 120 | 121 | Args: 122 | data (str): data to find next longest match 123 | current_position (int): current position (index) in data 124 | window_size (Optional[int]): lookback window size 125 | buffer_size (Optional[int]): lookahead buffer size 126 | 127 | Returns: 128 | tuple(int, str): Tuple of length of best match found and the string 129 | matched, None if no match found in lookahead buffer 130 | """ 131 | end_of_buffer = min(current_position + buffer_size, len(data) + 1) 132 | 133 | best_match_distance = -1 134 | best_match_length = -1 135 | best_match_str = None 136 | 137 | # Optimization: Only consider substrings of length 3 and greater 138 | for j in range(current_position + 3, end_of_buffer): 139 | 140 | start_index = max(0, current_position - window_size) 141 | substring = data[current_position:j] 142 | 143 | for i in range(start_index, current_position): 144 | 145 | repetitions = len(substring) // (current_position - i) 146 | 147 | last = len(substring) % (current_position - i) 148 | 149 | matched_string = (data[i:current_position] * repetitions + 150 | data[i:i+last]) 151 | 152 | if matched_string == substring and \ 153 | len(substring) > best_match_length: 154 | best_match_distance = current_position - i 155 | best_match_length = len(substring) 156 | best_match_str = substring 157 | 158 | if best_match_distance > 0 and best_match_length > 0: 159 | return (best_match_length, best_match_str) 160 | return None 161 | 162 | 163 | def executeBatchFreqs(nlines_per_iter, 164 | pins_file_1, 165 | pins_file_2, 166 | cores=multiprocessing.cpu_count()): 167 | # Grab next n lines from both files 168 | nlines1 = islice(pins_file_1, nlines_per_iter) 169 | nlines2 = islice(pins_file_2, nlines_per_iter) 170 | 171 | # Start processing n lines over args.cores processors 172 | pool = multiprocessing.Pool(processes=cores) 173 | list_sets = pool.starmap(updatePinZDict, 174 | zip(nlines1, 175 | nlines2)) 176 | 177 | return list_sets 178 | 179 | 180 | def getMD5(fbase1, fbase2): 181 | md5_obj = md5() 182 | md5_obj.update(fbase1.encode()) 183 | md5_obj.update(fbase2.encode()) 184 | return md5_obj.hexdigest() 185 | 186 | 187 | def saveState(fbase1, fbase2, state_fname, line_num, freq): 188 | """ 189 | Saves state in state file 190 | 191 | State file is a 3 line file as follows 192 | 193 | Line Contents 194 | ---- -------- 195 | 1 MD5 hash of first data file and second data file 196 | 2 Last datafile line read 197 | 3 Frequencies of substrings as json 198 | 199 | Args: 200 | fbase1 (str): base filename of first data file 201 | fbase2 (str): base filename of second data file 202 | state_fname (str): filename of state file 203 | line_num (int): most recent line numebr read from 204 | data files 205 | freqs (dict of str:int): mapping of substring to 206 | occurring frequency 207 | Returns: 208 | None 209 | """ 210 | 211 | md5_hash = getMD5(fbase1, fbase2) 212 | freq_json = json.dumps(freq) 213 | 214 | with open(state_fname, 'w+') as state_file: 215 | state_file_contents = '{0}\n{1}\n{2}'.format(md5_hash, 216 | line_num, 217 | freq_json) 218 | state_file.write(state_file_contents) 219 | 220 | 221 | def restoreState(fbase1, fbase2, state_fname): 222 | """ 223 | Attempts to read in previous state from state file 224 | 225 | State file should be a 3 line file 226 | 227 | Line Contents 228 | ---- -------- 229 | 1 MD5 hash of first data file and second data file 230 | 2 Last datafile line read 231 | 3 Frequencies of substrings as json 232 | 233 | Args: 234 | fbase1 (str): base filename of first data file 235 | fbase2 (str): base filename of second data file 236 | state_fname (str): filename of state file 237 | 238 | Returns: 239 | (int, dict of str: int): line number of last datafile line read, 240 | frequencies of substrings (substr: freq) 241 | if able to parse file else None 242 | """ 243 | md5_hash = getMD5(fbase1, fbase2) 244 | try: 245 | with open(state_fname, 'r+') as state_file: 246 | try: 247 | state_data = state_file.read() 248 | file_md5, line_num, freq_json = state_data.splitlines() 249 | except ValueError: 250 | raise Exception('Invalid statefile format') 251 | 252 | # Verify correct state file 253 | if file_md5 != str(md5_hash): 254 | raise Exception('State file does not correspond to input data') 255 | 256 | # Populate left off line number 257 | try: 258 | line_num = int(line_num) 259 | except ValueError: 260 | raise Exception('Invalid state line number') 261 | 262 | # Initialize frquencies dictionary 263 | try: 264 | freqs = json.loads(freq_json) 265 | except ValueError: 266 | raise Exception('Invalid substring frequency JSON') 267 | 268 | except IOError: 269 | print("State file doesn't exist", file=sys.stderr) 270 | raise 271 | return line_num, freqs 272 | 273 | 274 | def parse(): 275 | """ 276 | Defines a cli and parses command line inputs 277 | 278 | Args: 279 | 280 | Returns: 281 | object(options): The returned object from an 282 | argparse.ArgumentParser().parse_args() call 283 | """ 284 | parser = argparse.ArgumentParser( 285 | description="Takes in two files and looks for common " 286 | "substrings within the lookback window " 287 | "and lookahead buffer pairwise by line.") 288 | parser.add_argument('data1', 289 | action='store', 290 | help='First data file to generate LZ77 dict') 291 | parser.add_argument('data2', 292 | action='store', 293 | help='Second data file to generate LZ77 dict') 294 | parser.add_argument('--cores', 295 | action='store', 296 | type=int, 297 | default=multiprocessing.cpu_count(), 298 | help='Number of cores to utilize. ' 299 | 'Default all cores.') 300 | parser.add_argument('--state', 301 | action='store', 302 | help='File to track progress for failure') 303 | return parser.parse_args() 304 | 305 | 306 | def main(): 307 | args = parse() 308 | 309 | line_num = 0 # current line number in file 310 | freqs = dict() # common substring: frequency 311 | 312 | if args.state: 313 | _, data1_base_fname = os.path.split(args.data1) 314 | _, data2_base_fname = os.path.split(args.data2) 315 | 316 | try: 317 | line_num, freqs = restoreState(data1_base_fname, 318 | data2_base_fname, 319 | args.state) 320 | except OSError: 321 | print('Invalid state file. Starting anew.', file=sys.stderr) 322 | 323 | with open(args.data1, 'r') as pins_file_1, \ 324 | open(args.data2, 'r') as pins_file_2: 325 | 326 | # Read only min number lines of file pair 327 | line_count_1 = sum(1 for _ in pins_file_1) 328 | line_count_2 = sum(1 for _ in pins_file_2) 329 | nlines = min(line_count_1, line_count_2) 330 | 331 | # Start at left off state. Default beginning of file 332 | gotoLine(pins_file_1, line_num) 333 | gotoLine(pins_file_2, line_num) 334 | 335 | nlines_per_iter = args.cores * BATCHING_FACTOR # lines per batch 336 | 337 | while line_num < nlines: 338 | printProgress(line_num, nlines) 339 | list_sets = executeBatchFreqs(nlines_per_iter, 340 | pins_file_1, 341 | pins_file_2, 342 | args.cores) 343 | 344 | # Aggregate substrings per batch 345 | for s in list_sets: 346 | s = list(s) 347 | for i in s: 348 | freqs[i] = freqs.get(i, 0) + 1 349 | 350 | if args.state: 351 | saveState(data1_base_fname, 352 | data2_base_fname, 353 | args.state, 354 | line_num, 355 | freqs) 356 | 357 | line_num += nlines_per_iter 358 | line_num = min(line_num, nlines) # batching may go over nlines 359 | 360 | printProgress(line_num, nlines) 361 | # Output frequencies as JSON 362 | sorted_freqs = OrderedDict(sorted(freqs.items(), 363 | key=lambda k: k[1], 364 | reverse=True)) 365 | sorted_freqs_json = json.dumps(sorted_freqs, 366 | indent=4, 367 | separators=(',', ': ')) 368 | print(sorted_freqs_json, end='') 369 | 370 | # cleanup state file upon completion 371 | if args.state: 372 | os.remove(args.state) 373 | 374 | 375 | if __name__ == '__main__': 376 | main() 377 | -------------------------------------------------------------------------------- /zdict_gen/zdict_gen.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3.3 2 | 3 | from __future__ import print_function 4 | 5 | import argparse 6 | import json 7 | 8 | 9 | ''' 10 | Zlib's implementation uses 262 bytes of overhead for pre-defined dictionary 11 | thus, the max zdict size is 32KB - 262B = 23506B 12 | ''' 13 | MAX_WINDOW_SIZE = 32506 # 32KB-262B, size of MAX_WBITS - zdict overhead 14 | MAX_LOOKAHEAD_BUFFER_SIZE = 258 # Maximum Match Length in zlib 15 | BATCHING_FACTOR = 3 # Number of times of cores to use as batching size 16 | DEFAULT_ZDICT_SIZE = 32506 # Length of predefined dictionary to generate 17 | 18 | 19 | ''' 20 | TODO 21 | 22 | Implement a more optimal shortest common superstring solution. 23 | This would allow more relevant substrings to fit within the pre-defined 24 | dictionary window. In order to generalize it for compression against 25 | larger data objects the solution would have to account for more 26 | commonly occuring substrings to be placed towards the end of the 27 | dictionary. As far as I know, the vanilla SCS problem is NP-complete, 28 | not including the previously stated constraint. 29 | ''' 30 | 31 | 32 | def genDictFromFreq(freq_dict, size_b): 33 | """ 34 | Creates a LZ77 dictionary (initial lookback window) from a dictionary of 35 | word: frequency 36 | 37 | Args: 38 | freq_dict (dict(str)): A dictionary mapping word to frequency 39 | size_b (int): output size of frequency dictionary in B 40 | 41 | Returns: 42 | str: A LZ77 dictionary of size_b scored on len(word) * frequency 43 | """ 44 | 45 | # change value from frequency to score 46 | for word, freq in freq_dict.items(): 47 | freq_dict[word] = len(word) * freq 48 | 49 | """ superstrings swallow substring scores """ 50 | # 1. sort keys on increasing key (word) length 51 | sorted_keys = sorted(freq_dict, key=lambda k: len(k)) 52 | 53 | # 2. add substring scores to superstring value and flag for 54 | # removal (set score to 0) 55 | for i, key_i in enumerate(sorted_keys): 56 | # highest scoring superstring should consume substring 57 | sorted_keys_by_score = sorted(sorted_keys[i+1:], 58 | key=lambda k: freq_dict[k], 59 | reverse=True) 60 | for j, key_j in enumerate(sorted_keys_by_score): 61 | if key_i in key_j: 62 | freq_dict[key_j] += freq_dict[key_i] 63 | freq_dict[key_i] = 0 64 | break 65 | 66 | # 3. Remove substring items (has score 0) 67 | freq_dict = {k: v for k, v in freq_dict.items() if v > 0} 68 | 69 | """ Create LZ77 dictionary string """ 70 | # 1. Join keys (word) on score in ascending) order 71 | # According to zlib documentation, most common substrings should be placed 72 | # at the end of the pre-defined dictionary 73 | dict_str = ''.join(sorted(freq_dict, 74 | key=lambda k: freq_dict[k])) 75 | 76 | # 2. trim to size_b if valid 77 | if 0 < size_b < len(dict_str): 78 | dict_str = dict_str[len(dict_str)-size_b:] 79 | 80 | return dict_str 81 | 82 | 83 | def parse(): 84 | """ 85 | Defines a cli and parses command line inputs 86 | 87 | Args: 88 | 89 | Returns: 90 | object(options): The returned object from an 91 | argparse.ArgumentParser().parse_args() call 92 | """ 93 | parser = argparse.ArgumentParser( 94 | description="Generates a *good* predefined dictionary " 95 | "for compressing pin objects with the " 96 | "DEFLATE algorithm. Takes in a file of " 97 | "commonly occuring substrings and their " 98 | "frequencies. " 99 | "Common substring occurances are scored " 100 | "from the product of length of substring " 101 | "and the frequency with which it occurs. " 102 | "Fully contained substrings are swallowed " 103 | "and the top scoring strings are " 104 | "concatenated up to SIZE bytes.") 105 | parser.add_argument('freqs_file', 106 | action='store', 107 | help="File of commonly occuring substrings. Reads in " 108 | "json with substring keys and frequency values") 109 | parser.add_argument('--size', 110 | action='store', 111 | type=int, 112 | default=DEFAULT_ZDICT_SIZE, 113 | help='Size of predefined dictionary to generate') 114 | return parser.parse_args() 115 | 116 | 117 | def main(): 118 | args = parse() 119 | 120 | with open(args.freqs_file, 'r') as freqs_file: 121 | counts = json.load(freqs_file) 122 | 123 | # Generate Dictionary from substring frequencies 124 | zdict_str = genDictFromFreq(counts, args.size) 125 | print(zdict_str, end='') 126 | 127 | 128 | if __name__ == '__main__': 129 | main() 130 | --------------------------------------------------------------------------------