├── libs ├── __init__.py ├── RuleHandler.py ├── MysqlParser.py └── PsqlParser.py ├── logs └── .gitignore ├── output └── .gitignore ├── .gitignore ├── requirements.txt ├── bin ├── install_requirements.sh └── migrate.sh ├── config └── parameters.json.sample ├── rules ├── mysql_raw_dump.json ├── schema_changes.json └── mysql_to_psql.json ├── LICENSE ├── dumperAuxFuncs.py ├── README.md └── main.py /libs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /logs/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore -------------------------------------------------------------------------------- /output/.gitignore: -------------------------------------------------------------------------------- 1 | * 2 | !.gitignore -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | tools/cleaner/output/* 2 | output/* 3 | vendor/* 4 | config/parameters.json 5 | logs/* 6 | *.pyc 7 | .vscode/* -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4==4.0.1 2 | PyMySQL==1.1.0 3 | httplib2==0.18.0 4 | netifaces==0.11.0 5 | psycopg2cffi==2.9.0 6 | html5lib==0.9999999 7 | chardet==2.3.0 8 | -------------------------------------------------------------------------------- /bin/install_requirements.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | echo "Installing project libraries dependences" 3 | #pip install --use-pep517 -r requirements.txt --upgrade 4 | pip install -r requirements.txt --upgrade -------------------------------------------------------------------------------- /config/parameters.json.sample: -------------------------------------------------------------------------------- 1 | { 2 | "mysql": { 3 | "host": "127.0.0.1", 4 | "port": 3306, 5 | "user": "root", 6 | "password": "root", 7 | "charset": "utf8", 8 | "use_unicode": false 9 | }, 10 | "psql": { 11 | "host": "127.0.0.1", 12 | "port": 5432, 13 | "user": "root", 14 | "password": "root" 15 | }, 16 | "threads": 0, 17 | "prefix": false, 18 | "v1_schema_name" : false 19 | } -------------------------------------------------------------------------------- /rules/mysql_raw_dump.json: -------------------------------------------------------------------------------- 1 | { 2 | "column": { 3 | "type": [ 4 | {"value": "boolean", "method": "convertStrBoolean"}, 5 | {"value": "bytea", "method": "makeItEmpty"}, 6 | {"value": "TIME", "method": "makeItTime"} 7 | ], 8 | "fullType": [ 9 | {"value": "datetime", "method": "notNullableDatetime"}, 10 | {"value": "date", "method": "notNullableDate"} 11 | ], 12 | "reference": [ 13 | {"value": "notNone", "method": "refToNullable"} 14 | ] 15 | } 16 | } -------------------------------------------------------------------------------- /rules/schema_changes.json: -------------------------------------------------------------------------------- 1 | { 2 | "tables": { 3 | "reservation_reminder": { 4 | "_PRE_SQL_": [ 5 | "DELETE IGNORE FROM reservation_reminder WHERE resa_id NOT IN (SELECT id FROM reservation)" 6 | ], 7 | "name": "reminder", 8 | "columns": { 9 | "resa_id": { 10 | "name": "reservation_id", 11 | "reference": "reservation (id)" 12 | }, 13 | "user_id": { 14 | "nullable": true 15 | }, 16 | "client_id": { 17 | "reference": "client (id)" 18 | } 19 | } 20 | }, 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Gabriel Garrido 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /rules/mysql_to_psql.json: -------------------------------------------------------------------------------- 1 | { 2 | "column": { 3 | "type": [ 4 | {"from": "datetime", "to": "timestamp"}, 5 | {"from": "tinyint", "to": "smallint", 6 | "_IF_": [ 7 | {"attr": "fullType", "val": "tinyint(1)", "to": "boolean"} 8 | ] 9 | }, 10 | {"from": "smallint", "to": "smallint", 11 | "_IF_": [ 12 | {"attr": "fullType", "val": "smallint(1)", "to": "boolean"} 13 | ] 14 | }, 15 | {"from": "int", "to": "int", 16 | "_IF_": [ 17 | {"attr": "fullType", "val": "int(1)", "to": "boolean"} 18 | ] 19 | }, 20 | {"from": "mediumint", "to": "int"}, 21 | {"from": "mediumtext", "to": "text"}, 22 | {"from": "longtext", "to": "text"}, 23 | {"from": "longblob", "to": "bytea"}, 24 | {"from": "blob", "to": "bytea"}, 25 | {"from": "double", "to": "decimal"}, 26 | {"from": "enum", "to": "set"} 27 | ], 28 | "default": [ 29 | {"from": "CURRENT_TIMESTAMP", "to": "current_timestamp"} 30 | ] 31 | }, 32 | "table": { 33 | "engine": [ 34 | {"from": "MyISAM", "to": "INNODB"} 35 | ] 36 | } 37 | } -------------------------------------------------------------------------------- /dumperAuxFuncs.py: -------------------------------------------------------------------------------- 1 | __author__ = 'ggarrido' 2 | 3 | import datetime 4 | import re 5 | 6 | validTimeRE = re.compile(u'\d\d:\d\d') 7 | 8 | def convertStrBoolean(value, col_attrs=None): 9 | if not value or not (isinstance(value, str) or isinstance(value, int)): 10 | return False 11 | return False if value == 0 or int(value) == 0 else True 12 | 13 | def defaultDate(value, format, defaultValue, nullable): 14 | if value is None or value[:4] == '0000': 15 | return None if nullable else '1900-01-01' 16 | return value 17 | 18 | def notNullableDate(value, col_attrs=None): 19 | nullable = col_attrs['nullable'] if col_attrs else False 20 | format, defaultValue = "%d%m%Y", "01011900" 21 | return defaultDate(value, format, defaultValue, nullable) 22 | 23 | 24 | def notNullableDatetime(value, col_attrs=None): 25 | nullable = col_attrs['nullable'] if col_attrs else False 26 | format, defaultValue = "%d%m%Y %H:%M:%S", "01011900 00:00:00" 27 | return defaultDate(value, format, defaultValue, nullable) 28 | 29 | def refToNullable(value, col_attrs=None): 30 | nullable = col_attrs['nullable'] if col_attrs else False 31 | if (value == 0 or value == '0') and nullable: return None 32 | return value 33 | 34 | def makeItEmpty(value, col_attrs=None): 35 | nullable = col_attrs['nullable'] if col_attrs else False 36 | return None if nullable else '' 37 | 38 | def makeItTime(value, col_attrs=None): 39 | nullable = col_attrs['nullable'] if col_attrs else False 40 | if value is not None and re.match(validTimeRE, value): return value 41 | return None if nullable else '00:00' 42 | -------------------------------------------------------------------------------- /bin/migrate.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | readonly YELLOW="\033[0;33m" 3 | readonly RED="\033[0;31m" 4 | readonly GREEN="\033[0;32m" 5 | readonly NC="\033[0;0m" 6 | readonly BASH_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )" 7 | 8 | function error { 9 | MSG="$1" 10 | echo -e "${RED}ERROR: ${MSG}${NC}\nExitting" 11 | exit 1 12 | } 13 | 14 | PORT=5432 15 | for ((i=1;i<=$#;i++)); 16 | do 17 | if [ ${!i} = "-p" ] 18 | then ((i++)) 19 | PORT=${!i}; 20 | 21 | elif [ ${!i} = "-d" ]; 22 | then ((i++)) 23 | DB_NAME=${!i}; 24 | 25 | elif [ ${!i} = "-U" ]; 26 | then ((i++)) 27 | USER=${!i}; 28 | 29 | elif [ ${!i} = "-Wf" ]; 30 | then ((i++)) 31 | export PGPASSWORD=${!i}; 32 | 33 | elif [ ${!i} = "-W" ]; 34 | then ((i++)) 35 | echo -en "${YELLOW}Root password${NC}\n" 36 | read -s password 37 | export PGPASSWORD=$password 38 | fi 39 | done; 40 | 41 | readonly ERRLOG="/tmp/pg_migration_$(date +%s).err" 42 | touch "${ERRLOG}" 43 | readonly HOST="127.0.0.1" 44 | 45 | echo -e "${YELLOW}************************${NC}" 46 | echo -e "${YELLOW} MIGRATION MYSQL > PG ${NC}" 47 | echo -e "${YELLOW}************************${NC}" 48 | 49 | # touch ${DUMPLOG} 50 | # echo -e "${YELLOW}Logs are being redirect to:${NC} \n\tQueries:\t${DUMPLOG}\n\tErrors:\t\t${ERRLOG}${NC}" 51 | 52 | echo -en "${YELLOW}Creating db${NC}..." 53 | psql --set ON_ERROR_STOP=on -U ${USER} -p ${PORT} -o ${ERRLOG} -c "DROP DATABASE IF EXISTS \"${DB_NAME}\"" || error "Dropping old db" 54 | psql --set ON_ERROR_STOP=on -U ${USER} -p ${PORT} -o ${ERRLOG} -c "CREATE DATABASE \"${DB_NAME}\"" || error "Creating db" 55 | echo "Done ($SECONDS)" 56 | 57 | SECONDS=0 58 | SQL="./output/${DB_NAME}/psql_tables.sql" 59 | echo -en "${YELLOW}Creating v2 tables${NC}..." 60 | psql --set ON_ERROR_STOP=on -U ${USER} -p ${PORT} -f ${SQL} -o ${ERRLOG} -d ${DB_NAME} || error "Creating tables" 61 | echo "Done ($SECONDS)" 62 | 63 | SECONDS=0 64 | SQL="./output/${DB_NAME}/psql_data.sql" 65 | echo -en "${YELLOW}Inserting data${NC}..." 66 | psql --set ON_ERROR_STOP=on -U ${USER} -p ${PORT} -f ${SQL} -o ${ERRLOG} -d ${DB_NAME} || error "Inserting data" 67 | echo "Done ($SECONDS)" 68 | 69 | SECONDS=0 70 | SQL="./output/${DB_NAME}/psql_views.sql" 71 | echo -en "${YELLOW}Creating views${NC}..." 72 | psql --set ON_ERROR_STOP=on -U ${USER} -p ${PORT} -f ${SQL} -o ${ERRLOG} -d ${DB_NAME} || error "Creating views" 73 | echo "Done ($SECONDS)" 74 | 75 | SECONDS=0 76 | SQL="./output/${DB_NAME}/psql_index_fk.sql" 77 | echo -en "${YELLOW}Creating indexes and fk${NC}..." 78 | psql --set ON_ERROR_STOP=on -U ${USER} -p ${PORT} -f ${SQL} -o ${ERRLOG} -d ${DB_NAME} || error "Creating add indexes and constraints" 79 | echo "Done ($SECONDS)" 80 | 81 | echo -e "${GREEN}Migration to PG was completed SUCCESSFULLY${NC}" 82 | exit 0 83 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Migrate Mysql db to Postgresql (by rules) 2 | 3 | Migrate your current MySQL databases into Postgres in a single command. 4 | 5 | This tool provides you the chance of migrate your local MySQL databases to Postgres and even modify your current database structure, names and achieve a better 6 | consistence defining new foreign keys and cleaning up the data using `_PRE_SQL` queries. 7 | 8 | # How to use it 9 | 10 | ### Mode1: Migrate a single database 11 | $ python main.py {db_name} 12 | 13 | ### Mode2: Migrate few tables from a single database 14 | $ python main.py {db_name} [{table_name1} {table_name2} ..] 15 | 16 | ### Mode3: Migrate your whole MySQL schema 17 | $ python main.py all-databases 18 | (it is recommended to use `prefix` from parameters.json, read more above) 19 | 20 | # Documentation 21 | ## OS Dependences 22 | ### Install pip 23 | sudo apt-get install python-pip python-dev build-essential python-psycopg2 python-mysqldb libpq-dev libmysqlclient-dev 24 | 25 | ## Environment dependences 26 | ### Install python libraries and vendors 27 | sudo bash ./bin/install_requirements.sh 28 | 29 | ## Set it up for yourself 30 | #### Step1: Set up db config 31 | Set up your database configuration on `./config/parameters.json` 32 | * `mysql`: Mysql connection values 33 | * `psql`: Postgres connection values 34 | * `threads`: In case of 'all-databases', you can define the number of threads to run in parallel (Max. number of CPUs). Non parallel 0 35 | * `prefix`: In case of 'all-databases', it filters every database which prefix is the defined here. Otherwise use false 36 | * `v1_schema_name`: If you want to migrate old schema onto a separated postgres schema, its name is defined here. Otherwise use false 37 | 38 | #### Step2: Version schema names 39 | Set up your schema names for version1 and version2 on "./config/parameters.json" 40 | 41 | #### Step3: Define model rules you want to modified 42 | * Open `./rules/schema_changes.json` 43 | * Define your own schema rules on it. These rules are going to be used to redefine the new db structure, in case of not including any rules to a table or column, they will be migrated as it is in Mysql 44 | 45 | #### Step4: Define Postgresql conversion rules from Mysql ones 46 | * Open `./rules/mysql_to_psql.json` 47 | * Define MySQL keys to Postgres, most of rules were already defined by default, but there might be some more missing 48 | 49 | #### Step5: Define data convertion 50 | * Open `./rules/mysql_raw_dump.json` 51 | * Define data conversion according to its type, YOU might prefer to define different data conversion depending of your own model. Functions for conversion are defined in `dumperAuxFuncs.py`, feel free to add your own customized ones. 52 | 53 | ======================== 54 | 55 | ## Outputs 56 | These are the files generated during the migration process: 57 | 58 | * `mysql_schema.json`: Original Mysql schema exported in Json format 59 | * `mysql_schema_v2.json`: Mysql schema after model rules where applied 60 | * `mysql_data.sql`: INSERT INTO statement in mysql 61 | 62 | * `psql_schema.json`: Postgres schema 63 | * `psql_tables.sql`: CREATE TABLE statements, generated from psql_schema. 64 | * `psql_data.sql`: INSERT INTO statements, generated from psql_schema. Raw data will be allocated under ./table folder 65 | 66 | ## Manual migration 67 | ### Mode1: Manually 68 | #### Create tables 69 | psql -h server -d database_name -U username < ./output/{databaase}/psql_tables.sql 70 | #### Insert data 71 | psql -h server -d database_name -U username < ./output/{databaase}/psql_data.sql 72 | #### Insert indexes and fks 73 | psql -h server -d database_name -U username < ./output/{databaase}/psql_index_fk.sql 74 | #### Create views ( Just in case you want to keep views with previous squema) 75 | psql -h server -d database_name -U username < ./output/{databaase}/psql_views.sql 76 | 77 | ### Mode2: Single command 78 | $ bash ./bin/migrate.sh [-p {port}] -U {username} -d {database} -Wf {password} 79 | -------------------------------------------------------------------------------- /libs/RuleHandler.py: -------------------------------------------------------------------------------- 1 | __author__ = 'ggarrido' 2 | 3 | 4 | class RuleHandler: 5 | """ 6 | Apply a list of rules into given json. Just overwrite in case rules has a value on the same location 7 | """ 8 | 9 | STR_SKIP = '_SKIP_' 10 | 11 | def __init__(self, rules=None, node_rules=None): 12 | """ 13 | :param rules: Schema rules (same nesting level replacement) 14 | :param node_rules: Node level rules (column, table) 15 | :return: 16 | """ 17 | self.rules = rules if rules is not None else {} 18 | self.node_rules = node_rules if node_rules is not None else {} 19 | 20 | def obtain_modified_schema(self, schema): 21 | """ 22 | Iterate over every table in schema replacing its attrs in case there is rule for it 23 | :param schema: Db schema 24 | :return: schema after rules where applied 25 | """ 26 | res_schema = schema.copy() 27 | self._apply_rules(res_schema) 28 | self._apply_node_rules(res_schema) 29 | return res_schema 30 | 31 | def _apply_rules(self, schema): 32 | if self.rules is None or 'tables' not in self.rules: 33 | return 34 | 35 | for table_name, table_attrs in self.rules['tables'].items(): 36 | # If table doesn't WITH TIME ZONE on the schema, skip iter 37 | if 'tables' not in schema or table_name not in schema['tables']: 38 | continue 39 | 40 | # If table_attr is SKIP string, it means table is removed from schema 41 | elif table_attrs == self.STR_SKIP: 42 | del schema['tables'][table_name] 43 | continue 44 | 45 | # Otherwise, apply schema changes 46 | schema_part = schema['tables'][table_name] 47 | self._apply_rule_table(schema_part, table_attrs) 48 | 49 | def _apply_rule_table(self, schema, table_attrs): 50 | for table_attr_key, table_name_val in table_attrs.iteritems(): 51 | if table_attr_key != 'columns': 52 | schema[table_attr_key] = table_name_val 53 | continue 54 | # If table doesn't have any column declared, skip iter 55 | elif 'columns' not in schema: 56 | continue 57 | # In case of columns, iterate over all of them, replacing values 58 | schema_part = schema['columns'] 59 | self._apply_rule_col(schema_part, table_name_val) 60 | 61 | def _apply_rule_col(self, schema, col_attrs): 62 | for col_name, col_attrs in col_attrs.iteritems(): 63 | # If schema doesn't have col_name defined, skip iter 64 | if col_name not in schema: 65 | continue 66 | elif col_attrs == self.STR_SKIP: 67 | schema[col_name][self.STR_SKIP] = True 68 | # del schema[col_name] 69 | continue 70 | for col_attr_key, col_attr_value in col_attrs.iteritems(): 71 | schema[col_name][col_attr_key] = col_attr_value 72 | if col_attr_key == 'type' and 'size' not in col_attrs: 73 | schema[col_name]['size'] = None 74 | 75 | def _apply_node_rules(self, schema): 76 | for table_name, table_attrs in schema['tables'].items(): 77 | self._apply_table_node_rule(schema['tables'][table_name], table_attrs) 78 | 79 | def _apply_table_node_rule(self, schema, table_attrs): 80 | for table_attr_key, table_attr_value in table_attrs.items(): 81 | # In case it is a table attr and there is rules for them 82 | if table_attr_key != 'columns' and table_attr_key in self.node_rules.get('table', {}): 83 | for node_attr_fromto in self.node_rules['table'][table_attr_key]: 84 | if table_attr_value == node_attr_fromto['from']: 85 | schema[table_attr_key] = node_attr_fromto['to'] 86 | # In case of columns 87 | elif table_attr_key == 'columns': 88 | # Replace in case from value matches current column attr value 89 | for col_name, col_attrs in table_attr_value.items(): 90 | self._apply_col_node_rule(schema['columns'][col_name], col_attrs) 91 | 92 | def _apply_col_node_rule(self, schema, col_attrs): 93 | for col_attr_key, col_attr_value in col_attrs.items(): 94 | if col_attr_key not in self.node_rules.get('column', {}): 95 | continue 96 | for node_attr_fromto in self.node_rules['column'][col_attr_key]: 97 | if col_attr_value == node_attr_fromto['from']: 98 | # Check if there are cases depending of other attr values 99 | schema[col_attr_key] = node_attr_fromto['to'] 100 | if '_IF_' in node_attr_fromto: 101 | for if_cond in node_attr_fromto['_IF_']: 102 | if schema[if_cond['attr']] == if_cond['val']: 103 | schema[col_attr_key] = if_cond['to'] 104 | 105 | @staticmethod 106 | def get_skip_colums(schema_changes): 107 | skipped_cols = [] 108 | for table_name, table_attrs in schema_changes['tables'].tems(): 109 | if 'columns' not in table_attrs: 110 | continue 111 | for col_name, col_attrs in table_attrs['columns'].items(): 112 | if RuleHandler.STR_SKIP == col_attrs: 113 | skipped_cols.append((table_attrs.get('name', table_name), col_name)) 114 | return skipped_cols -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | __author__ = 'ggarrido' 2 | 3 | import json 4 | import sys 5 | import os 6 | import time 7 | 8 | from libs.MysqlParser import MysqlParser 9 | from libs.RuleHandler import RuleHandler 10 | from libs.PsqlParser import PsqlParser 11 | from multiprocessing import Pool 12 | import multiprocessing 13 | import subprocess 14 | import traceback 15 | 16 | try: import psycopg2 17 | except ImportError: import psycopg2cffi as psycopg2 18 | 19 | 20 | path = os.path.dirname(os.path.realpath(__file__)) 21 | MAX_THREADS, skip_pre_sql, pending_dbs, terminate = multiprocessing.cpu_count(), False, None, False 22 | information_schema = 'information_schema' 23 | 24 | def migrate(config, psql_conn_params, database, tables, skip_pre_sql, log_file=None): 25 | """ 26 | Main executor, generate psql no-data dump file base on indicated db/tables from mysql connection 27 | :param config: Mysql db connection params and others 28 | :param database: Database to migrate 29 | :param tables: Table to migrate 30 | :return: 31 | """ 32 | db_name = database 33 | output_path = os.path.join(path, 'output', db_name) 34 | tables_path = os.path.join(output_path, 'tables') 35 | 36 | # can trigger a race condition 37 | if not os.path.exists(output_path): os.mkdir(output_path, 755) 38 | if not os.path.exists(tables_path): os.mkdir(tables_path, 755) 39 | 40 | pg_conn = psycopg2.connect(**psql_conn_params) 41 | pg_cursor = pg_conn.cursor() 42 | 43 | mysql_conn_params = config['mysql'] 44 | mysql_conn_params['db'] = db_name 45 | mysql_parser = MysqlParser(mysql_conn_params, information_schema) 46 | mysql_parser.set_skip_pre_sql(skip_pre_sql) 47 | mysql_schema = mysql_parser.get_schema(db_name, tables) 48 | 49 | # Generate psql schema, parsing psql rules(types, defaults...) 50 | # Write result into output/psql_schema.json 51 | psql_parser = PsqlParser(pg_cursor, pg_conn) 52 | 53 | try: 54 | with open(os.path.join(output_path, 'mysql_schema.json'), 'w') as outfile: 55 | json.dump(mysql_schema, outfile, indent=4, sort_keys=True) 56 | 57 | # Applying model rules (renaming, new defaults, ....) 58 | # Write result into output/mysql_schema_v2.json 59 | schema_changes = json.loads(open('./rules/schema_changes.json').read()) 60 | rule_handler = RuleHandler(schema_changes) 61 | mysql_schema_v2 = rule_handler.obtain_modified_schema(mysql_schema) 62 | with open(os.path.join(output_path, 'mysql_schema_v2.json'), 'w') as outfile: 63 | json.dump(mysql_schema_v2, outfile, indent=4, sort_keys=True) 64 | outfile.close() 65 | 66 | psql_schema = psql_parser.get_schema_from_mysql(mysql_schema_v2) 67 | with open(os.path.join(output_path, 'psql_schema.json'), 'w') as outfile: 68 | json.dump(psql_schema, outfile, indent=4, sort_keys=True) 69 | outfile.close() 70 | 71 | # Generate psql create table queries from psql schema generated on previous step 72 | # Write result into output/psql_tables.sql 73 | timeS, msg = time.time(), "Generating Schema... " 74 | print(msg) 75 | if log_file: log_file.write(msg) 76 | psql_parser.generate_sql_schema(psql_schema, 'public', os.path.join(output_path, 'psql_tables.sql')) 77 | print(time.time() - timeS) 78 | if log_file: log_file.write(str(time.time() - timeS)+'\n') 79 | 80 | 81 | # Generate mysql dump file 82 | timeS, msg = time.time(), "Generating raw data...(might take few minutes)" 83 | print(msg) 84 | if log_file: log_file.write(msg) 85 | psql_parser.generate_dump_from_raw(mysql_parser, db_name, psql_schema, 'public', 86 | os.path.join(output_path, 'psql_data.sql'), tables_path, schema_changes) 87 | 88 | print(time.time() - timeS) 89 | if log_file: log_file.write(str(time.time() - timeS)+'\n') 90 | 91 | 92 | timeS, msg = time.time(), "Generating indexes and fk... " 93 | print(msg) 94 | if log_file: log_file.write(msg) 95 | psql_parser.generate_psql_index_fk(mysql_schema_v2, os.path.join(output_path, 'psql_index_fk.sql')) 96 | print(time.time() - timeS) 97 | if log_file: log_file.write(str(time.time() - timeS)+'\n') 98 | 99 | 100 | # Generate vies in case it is a client db 101 | if 'v1_schema_name' in config and config['v1_schema_name'] and len(config['v1_schema_name']) > 0: 102 | timeS, msg = time.time(), "Generating views... " 103 | print(msg) 104 | if log_file: log_file.write(msg) 105 | psql_parser.generate_psql_views(mysql_schema_v2, config['v1_schema_name'], 'public', 106 | os.path.join(output_path, 'psql_views.sql')) 107 | else: 108 | open(os.path.join(output_path, 'psql_views.sql'), 'w').close() 109 | print(time.time() - timeS) 110 | if log_file: log_file.write(str(time.time() - timeS)+'\n') 111 | # except: 112 | # e = sys.exc_info()[0] 113 | # print "ERROR: %s" % str(e) 114 | finally: 115 | mysql_parser.close() 116 | pg_cursor.close() 117 | 118 | def get_all_databases(config): 119 | mysql_conn_params = config['mysql'] 120 | mysql_parser = MysqlParser(mysql_conn_params, information_schema) 121 | dbs = mysql_parser.get_all_databases(config['prefix']) 122 | mysql_parser.close() 123 | return dbs 124 | 125 | def migrate_db(params, psql_conn_params, database, tables=[], skip_pre_sql=False): 126 | print('-------------------------------------') 127 | print('\t %s ' % (database)) 128 | print('-------------------------------------') 129 | 130 | log_file_path = os.path.join(path, 'logs', database+'.log') 131 | log_file = open(log_file_path, 'w') 132 | try: 133 | migrate(config, psql_conn_params, database, tables, skip_pre_sql, log_file) 134 | timeS = time.time() 135 | print("Running ./bin/migrate.sh .....logs in " + log_file_path) 136 | subprocess.check_call(['bash', path+'/bin/migrate.sh', '-d', database, '-Wf', config['psql']['password'], '-p', 137 | str(config['psql']['port']), '-U', config['psql']['user']] 138 | , stderr=log_file, stdout=log_file) 139 | print(time.time() - timeS) 140 | except Exception: 141 | e = sys.exc_info()[0] 142 | print("ERROR: %s" % str(e)) 143 | log_file.write("Python exception during generating\n") 144 | log_file.write("ERROR: %s" % e) 145 | print(traceback.format_exc()) 146 | finally: 147 | log_file.close() 148 | return database 149 | 150 | def migration_completed(database): 151 | global pending_dbs; pending_dbs -= 1 152 | log_file_path = os.path.join(path, 'logs', database+'.log') 153 | log_file = open(log_file_path, 'r') 154 | print(log_file.read()) 155 | log_file.close() 156 | return database 157 | 158 | def test_f(params, pg_cursor, database, tables=[]): 159 | print(params, pg_cursor, database, tables) 160 | return database 161 | 162 | if __name__ == '__main__': 163 | database = sys.argv[1] 164 | tables = sys.argv[2:] 165 | 166 | config = json.loads(open('./config/parameters.json').read()) 167 | databases = [database] if database != "all-databases" else get_all_databases(config) 168 | pending_dbs = len(databases) 169 | isThreading, n_threads = False, 0 170 | if pending_dbs > 1 and 'threads' in config and int(config['threads']) > 0: 171 | isThreading = True 172 | if int(config['threads']) > MAX_THREADS: print("WARNING: Max number of threads are %d" % config['threads']) 173 | n_threads = MAX_THREADS if (int(config['threads']) > MAX_THREADS) else config['threads'] 174 | 175 | psql_conn_params = config['psql'] 176 | psql_conn_params['dbname'] = 'postgres' 177 | if isThreading: pool = Pool(processes=n_threads) 178 | 179 | for database in databases: 180 | if isThreading: 181 | pool.apply_async(migrate_db, [config, psql_conn_params, database, tables, skip_pre_sql], callback=migration_completed) 182 | else: 183 | migrate_db(config, psql_conn_params, database, tables, skip_pre_sql) 184 | migration_completed(database) 185 | try: 186 | while isThreading and pending_dbs>0: print("Pending dbs %s...") % pending_dbs; sys.stdout.flush(); time.sleep(5) 187 | except KeyboardInterrupt: terminate=True; print("Interrupt!!!") 188 | 189 | if isThreading: 190 | if terminate: pool.terminate() 191 | else: 192 | print("Waiting threads to complete"); pool.close() 193 | print("Waiting threads to wrap-up"); pool.join() -------------------------------------------------------------------------------- /libs/MysqlParser.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | __author__ = 'ggarrido' 4 | 5 | import subprocess 6 | from collections import OrderedDict 7 | import pymysql.cursors 8 | from pymysql.converters import decoders, through 9 | 10 | 11 | RED, GREEN, YELLOW, NC = '\033[0;31m', '\033[0;32m', '\033[0;33m', '\033[0m' 12 | 13 | class MysqlParser(): 14 | """ 15 | Generate MySql schema on JSON format given a Mysql Connectior, defined DbName and Tables 16 | """ 17 | 18 | information_schema = 'information_schema' 19 | 20 | def __init__(self, conn_params, information_schema=None): 21 | # replace date, datetime, timstamp decoders so they're not decoded into date objects 22 | custom_decoders = decoders.copy() 23 | custom_decoders[7] = through 24 | custom_decoders[10] = through 25 | custom_decoders[11] = through 26 | custom_decoders[12] = through 27 | 28 | conn_params['conv'] = custom_decoders 29 | self.connection = pymysql.connect(**conn_params) 30 | self.cursor = self.connection.cursor() 31 | self.skip_pre_sql = False 32 | if information_schema is not None: self.information_schema = information_schema 33 | 34 | def close(self): 35 | self.cursor.close() 36 | self.connection.close() 37 | 38 | def set_skip_pre_sql(self, skip=False): 39 | self.skip_pre_sql = skip 40 | 41 | def get_schema(self, db_name, tables=[]): 42 | """ 43 | Iterate over every table from give DbName, filter by passed tables array 44 | :param db_name: DbName to generate schema from 45 | :param tables: Filtered tables 46 | :return: Json schema 47 | """ 48 | mysql_schema = { 49 | 'tables': self._get_db_tables_schema(db_name, tables) 50 | } 51 | 52 | return mysql_schema 53 | 54 | @staticmethod 55 | def mysqldump_data(config, db_name, tables, output_path): 56 | cmd = "mysqldump -h%s -u%s %s %s --compatible=postgresql --no-create-info --compact " \ 57 | "--extended-insert=FALSE --default-character-set=utf8 --complete-insert %s %s > %s "\ 58 | % (config['host'], config['user'], ('-p'+config['password'] if 'password' in config else ''), 59 | ('-P'+str(config['port']) if 'port' in config else ''), db_name, ' '.join(tables), output_path) 60 | subprocess.call(cmd, shell=True) 61 | 62 | @staticmethod 63 | def mysqldump_tables(config, db_name, tables, output_path): 64 | cmd = "mysqldump -h%s -u%s %s %s --compatible=postgresql --no-data --compact %s %s > %s "\ 65 | % (config['host'], config['user'], ('-p'+config['password'] if 'password' in config else ''), 66 | ('-P'+str(config['port']) if 'port' in config else ''), db_name, ' '.join(tables), output_path) 67 | subprocess.call(cmd, shell=True) 68 | 69 | 70 | def run_pre_sql(self, db_name, table, table_attrs, schema_changes): 71 | # def sql_fix_violates_fk(col_name, col_reference): 72 | # reg = re.compile(ur'^[\\]?["]?(\w*)[\\]?["]?\s*\((\w*)\)$') 73 | # reg_match = re.search(reg, col_reference) 74 | # ref_table, ref_col = reg_match.group(1), reg_match.group(2) 75 | # # Getting original table name from new schema 76 | # ref_table_attr = [ cg_table_name for cg_table_name, cg_table_attrs in schema_changes['tables'].iteritems() \ 77 | # if cg_table_name == ref_table or ('name' in cg_table_attrs and cg_table_attrs['name'] == ref_table) 78 | # ] 79 | # 80 | # if len(ref_table_attr) != 0: ref_table = ref_table_attr[0] 81 | # 82 | # return """UPDATE IGNORE `%s` AS x 83 | # LEFT JOIN `%s` y ON (y.%s = x.%s) 84 | # SET x.%s = 0 WHERE y.%s IS NULL""" % \ 85 | # (table, ref_table, ref_col, col_name, col_name, ref_col) 86 | if self.skip_pre_sql: return 0 87 | 88 | def get_utc_pre_sql(tableName, columns): 89 | if tableName == 'channel_log_resa': return [] 90 | return ["UPDATE IGNORE `%s` SET `%s` = `%s` - INTERVAL 2 HOUR WHERE `%s` IS NOT NULL" % (tableName, col_name, col_name, col_name) \ 91 | for col_name, col_attr in columns.iteritems() \ 92 | if col_attr['type'] == 'datetime' or col_attr['type'] == 'timestamp'] 93 | 94 | if not '_PRE_SQL_' in table_attrs: table_attrs['_PRE_SQL_'] = [] 95 | 96 | table_attrs['_PRE_SQL_'] += get_utc_pre_sql(table, table_attrs['columns']) 97 | for pre_sql in table_attrs['_PRE_SQL_']: 98 | try: 99 | res = self.cursor.execute(pre_sql) 100 | if res is not None and res != 0: print("%s (Affected rows %d)" % (pre_sql, res)) 101 | except Exception as e: 102 | print(RED + ("ERROR: %s\n MSG: %s" % (pre_sql, str(e))) + NC) 103 | 104 | 105 | def get_table_raw_data(self, db_name, table, cols, table_attrs, schema_changes): 106 | """ 107 | Return raw data from passed table cols, applying conversion rules 108 | :param table: 109 | :param cols: 110 | :param export_rules: 111 | :return: 112 | """ 113 | 114 | def append_join(idx, ref_alias, join_attrs): 115 | alias = 'j'+str(idx) 116 | return ' INNER JOIN %s.%s AS %s ON (%s.%s = %s.%s) ' % ( 117 | db_name, 118 | join_attrs['table'], 119 | alias, 120 | ref_alias, 121 | join_attrs['col'], 122 | alias, 123 | join_attrs['col_ref'] 124 | ) 125 | 126 | # Generate SELECT SQL to export raw data 127 | sql, res = '', None 128 | alias = 't'; sql = "SELECT t.`%s` FROM %s.%s as %s" % ('`, t.`'.join(cols), db_name, table, alias) 129 | if '_JOIN_' in table_attrs: 130 | if not isinstance(table_attrs, list): table_attrs['_JOIN_'] = [table_attrs['_JOIN_']] 131 | for idx, join_attrs in enumerate(table_attrs['_JOIN_']): 132 | sql += append_join(idx, alias, join_attrs) 133 | 134 | if '_WHERE_' in table_attrs: 135 | sql += ' WHERE ' + table_attrs['_WHERE_'] 136 | 137 | if len(sql) > 0: self.cursor.execute(sql); res = self.cursor.fetchall() 138 | return res 139 | 140 | def _get_db_tables_schema(self, db_name, tables=[]): 141 | """ 142 | Iterate over every table(filtered) and obtain information from information_schema 143 | :param db_name: DbName where table belongs 144 | :param tables: Tables to filter 145 | :return: Json with every table from given dbname 146 | """ 147 | output = OrderedDict() 148 | sql = """ 149 | SELECT 150 | T.table_name, T.engine, T.table_collation, T.auto_increment 151 | FROM 152 | %s.tables as T 153 | WHERE 154 | T.table_schema = '%s' 155 | """ % (self.information_schema, db_name) 156 | 157 | self.cursor.execute(sql) 158 | res = self.cursor.fetchall() 159 | for table_info in res: 160 | # Due to an update in the PyMysql lib it return list instead of dict 161 | if isinstance(table_info, tuple): 162 | table_info = dict(zip(('table_name', 'engine', 'table_collation', 'auto_increment') 163 | , list(table_info))) 164 | if len(tables) > 0 and table_info['table_name'] not in tables: 165 | continue 166 | output[table_info['table_name']] = { 167 | 'name': table_info['table_name'], 168 | 'collation': table_info['table_collation'], 169 | 'engine': table_info['engine'], 170 | 'autoIncrement': table_info['auto_increment'], 171 | 'columns': self._get_table_columns_schema(db_name, table_info['table_name']), 172 | 'indexes': self._get_table_indexes_schema(db_name, table_info['table_name']) 173 | } 174 | 175 | return output 176 | 177 | def _get_table_columns_schema(self, db_name, table_name): 178 | """ 179 | Iterate over every column from given DbName and Table 180 | :param db_name: DbName where columns belongs to 181 | :param table_name: Table where columns belongs to 182 | :return: Json with every column from given table 183 | """ 184 | columns = OrderedDict() 185 | sql = """ 186 | SELECT 187 | C.column_name, C.is_nullable, C.data_type, C.column_default, C.column_type, 188 | C.character_maximum_length as size, C.column_key as isPk, C.extra, 189 | CONCAT('"', K.referenced_table_name, '"("', referenced_column_name, '")') as reference, 190 | CONCAT_WS(',', C.numeric_precision, C.numeric_scale) as dsize 191 | FROM 192 | %s.columns C 193 | LEFT JOIN %s.KEY_COLUMN_USAGE K ON ( 194 | K.column_name = C.column_name and K.constraint_schema = C.table_schema and C.table_name = K.table_name 195 | ) 196 | WHERE 197 | C.table_name = '%s' 198 | AND C.table_schema = '%s' 199 | """ % (self.information_schema, self.information_schema, table_name, db_name) 200 | 201 | self.cursor.execute(sql) 202 | res = self.cursor.fetchall() 203 | for column_info in res: 204 | # Due to an update in the PyMysql lib it return list instead of dict 205 | if isinstance(column_info, tuple): 206 | column_info = dict( 207 | zip(('column_name', 'is_nullable', 'data_type', 'column_default', 'column_type','size', 208 | 'isPk', 'extra', 'reference', 'dsize'), 209 | list(column_info)) 210 | ) 211 | 212 | columns[column_info['column_name']] = { 213 | 'name': column_info['column_name'], 214 | 'type': column_info['data_type'], 215 | 'nullable': column_info['is_nullable'] == 'YES', 216 | 'size': column_info['size'] if column_info['data_type'] not in ['double', 'decimal'] else column_info['dsize'], 217 | 'default': column_info['column_default'], 218 | 'extra': column_info['extra'], 219 | 'isPk': column_info['isPk'] == 'PRI', 220 | 'fullType': column_info['column_type'], 221 | 'reference': column_info['reference'], 222 | } 223 | 224 | return columns 225 | 226 | def _get_table_indexes_schema(self, db_name, table_name): 227 | """ 228 | Iterate over every column from given DbName and Table 229 | :param db_name: DbName where columns belongs to 230 | :param table_name: Table where columns belongs to 231 | :return: Json with every column from given table 232 | """ 233 | indexes = OrderedDict() 234 | sql = """ 235 | SELECT table_name AS `table_name`, 236 | index_name AS `index_name`, 237 | GROUP_CONCAT(column_name ORDER BY seq_in_index) AS `columns` 238 | FROM %s.statistics S 239 | WHERE S.table_schema = '%s' 240 | AND S.table_name = '%s' 241 | AND S.index_name <> 'PRIMARY' 242 | GROUP BY 1,2; 243 | """ % (self.information_schema, db_name, table_name) 244 | 245 | self.cursor.execute(sql) 246 | res = self.cursor.fetchall() 247 | for index_info in res: 248 | # Due to an update in the PyMysql lib it return list instead of dict 249 | if isinstance(index_info, tuple): 250 | index_info = dict( 251 | zip(('table_name', 'index_name', 'columns'), 252 | list(index_info)) 253 | ) 254 | 255 | indexes[index_info['index_name']] = { 256 | 'name': index_info['index_name'], 257 | 'columns': index_info['columns'].split(',') 258 | } 259 | 260 | return indexes 261 | 262 | 263 | def get_user_user_pass(self, opcode): 264 | sql = """ 265 | SELECT cl.db_login as login, 266 | cl.db_pass as pass 267 | FROM base7_config.client_db as cl 268 | WHERE cl.identifier = '%s' 269 | """ % (opcode) 270 | 271 | self.cursor.execute(sql) 272 | res = self.cursor.fetchone() 273 | 274 | if res is None: return None, None 275 | return res[0], res[1] 276 | 277 | 278 | def get_all_databases(self, prefix): 279 | sql = """ 280 | SELECT TABLE_SCHEMA as db_name 281 | FROM """+self.information_schema+""".tables 282 | WHERE TABLE_SCHEMA <> 'mysql' 283 | AND TABLE_SCHEMA <> 'information_schema'""" 284 | 285 | if prefix and len(prefix)>0: 286 | sql += ' AND TABLE_SCHEMA LIKE \''+prefix+'%\'' 287 | 288 | sql += ' GROUP BY TABLE_SCHEMA' 289 | 290 | self.cursor.execute(sql) 291 | res = self.cursor.fetchall() 292 | return [str(db[0]) for db in res] 293 | -------------------------------------------------------------------------------- /libs/PsqlParser.py: -------------------------------------------------------------------------------- 1 | __author__ = 'ggarrido' 2 | 3 | import os 4 | import re 5 | import json 6 | import time 7 | import copy 8 | from libs.RuleHandler import RuleHandler 9 | from libs.MysqlParser import MysqlParser 10 | import dumperAuxFuncs 11 | from decimal import Decimal 12 | 13 | REGEX_TYPE = type(re.compile('')) 14 | PGSQL_BLOCK = 1000 15 | 16 | def merge_dicts(*dict_args): 17 | ''' 18 | Given any number of dicts, shallow copy and merge into a new dict, 19 | precedence goes to key value pairs in latter dicts. 20 | ''' 21 | result = {} 22 | for dictionary in dict_args: 23 | result.update(dictionary) 24 | return result 25 | 26 | class PsqlParser(): 27 | 28 | def __init__(self, cursor, conn): 29 | self.cur = cursor 30 | self.conn = conn 31 | self.raw_dump_rules = json.loads(open('./rules/mysql_raw_dump.json').read()) 32 | self.rules = json.loads(open('./rules/mysql_to_psql.json').read()) 33 | 34 | def close(self): 35 | self.cur.close() 36 | self.conn.close() 37 | 38 | def get_schema_from_mysql(self, mysql_schema): 39 | pq_schema = mysql_schema.copy() 40 | rule_handler = RuleHandler(None, {'table': self.rules['table'], 'column': self.rules['column']}) 41 | return rule_handler.obtain_modified_schema(pq_schema) 42 | 43 | def generate_sql_user(self, mysql_parser, opcode, psql_users_path): 44 | output = open(psql_users_path, 'w') 45 | if opcode is not None and len(opcode) > 0 and opcode != 'empty': 46 | users_sql = self._get_sql_user(opcode, mysql_parser) 47 | output.write(users_sql) 48 | output.close() 49 | 50 | 51 | def generate_sql_schema(self, schema, schema_name, psql_tables_path): 52 | """ 53 | Generate sql queries from given schema 54 | :param schema: Psql schema 55 | :return: Psql queries to generate tables 56 | """ 57 | psql_tables = open(psql_tables_path, 'w') 58 | psql_tables.write("SET client_min_messages TO WARNING;\n") 59 | psql_tables.write("DROP SCHEMA IF EXISTS %s CASCADE;\n" % schema_name) 60 | psql_tables.write("CREATE SCHEMA IF NOT EXISTS %s;\n" % schema_name) 61 | psql_tables.write("SET SCHEMA '%s';\n" % schema_name) 62 | psql_tables.write("CREATE EXTENSION \"unaccent\";\n\n") 63 | 64 | for table_name, table_attr in schema['tables'].items(): 65 | psql_tables.write("\n-- CREATE TABLE %s \n %s \n %s \n" % ( 66 | table_attr['name'], self._get_sql_drop_table(table_attr), 67 | self._get_sql_create_table(table_attr) 68 | )) 69 | 70 | psql_tables.close() 71 | 72 | # @deprecated 73 | def generate_dump_from_mysql_dump(self, schema_changes, schema_name, mysql_dump_path, psql_dump_path): 74 | """ 75 | Read an mysql dump file and convert it into psql syntax 76 | :param schema_changes: changes applied into mysql schema 77 | :param schema_name: psql schema name 78 | :param mysql_dump_path: mysql source file 79 | :param psql_dump_path: desc file for dump 80 | """ 81 | mysql_dump = open(mysql_dump_path, 'r') 82 | psql_dump = open(psql_dump_path, 'w') 83 | psql_dump.write(self._get_dump_initial_statements()) 84 | psql_dump.write("\n\n") 85 | 86 | for i, line in enumerate(mysql_dump): 87 | insert_sql = self._convert_mysql_insert_to_psql(schema_changes, line) 88 | if insert_sql is not None: 89 | psql_dump.write(insert_sql) 90 | psql_dump.write("\n") 91 | 92 | psql_dump.write("\n\n") 93 | psql_dump.write(self._get_dump_final_statements()) 94 | for skip in RuleHandler.get_skip_colums(schema_changes): 95 | psql_dump.write("ALTER TABLE \"%s\" DROP COLUMN IF EXISTS \"%s\";\n" % skip) 96 | 97 | mysql_dump.close() 98 | psql_dump.close() 99 | 100 | 101 | def generate_dump_from_raw(self, mysql_parser, db_name, pg_schema, schema_name, psql_dump_path, tables_path, schema_changes): 102 | """ 103 | Obtain raw data from mysql connection and convert into INSERT INTOs 104 | :param mysql_parser 105 | :param schema: 106 | :param schema_name: 107 | :param psql_dump_path: 108 | :type mysql_parser: MysqlParser 109 | """ 110 | psql_dump = open(psql_dump_path, 'w') 111 | 112 | pre_sql_tables = { table_name: pg_schema['tables'][table_name] \ 113 | for table_name in schema_changes['tables'].keys() \ 114 | if '_PRE_SQL_' in schema_changes['tables'][table_name] and table_name in pg_schema['tables']} 115 | 116 | for table_name, table_attrs in pre_sql_tables.items(): 117 | mysql_parser.run_pre_sql(db_name, table_name, table_attrs, schema_changes) 118 | 119 | for table_name, table_attrs in pg_schema['tables'].items(): 120 | print("Parsing table '%s' data...." % table_name) 121 | table_name_to = table_attrs if not table_attrs.get('name', {}) else table_attrs['name'] 122 | table_filename = os.path.join(tables_path, "%s.sql" % (table_name_to)) 123 | table_dump = open(table_filename, 'w+') 124 | 125 | cols_from = [col_name for col_name, col_attr in table_attrs['columns'].items() 126 | if not col_attr.get('_SKIP_', False)] 127 | cols_to = [col_name if not col_attr.get('name', {}) else col_attr['name'] 128 | for col_name, col_attr in table_attrs['columns'].items() 129 | if not col_attr.get('_SKIP_', False)] 130 | 131 | start_time = time.time() 132 | rows = mysql_parser.get_table_raw_data(db_name, table_name, cols_from, table_attrs, schema_changes) 133 | table_raw_rules = self._get_table_raw_dump_rules(table_name, cols_from, table_attrs['columns']) 134 | sql_copy_data_template = ','.join(['%s' for x in range(0, len(cols_to))]) + '\n' 135 | columns = '", "'.join(cols_to) 136 | psql_dump.write("\copy \"%s\" (\"%s\") FROM '%s' WITH (FORMAT CSV, QUOTE '''', DELIMITER ',', NULL 'NULL');\n" 137 | % (table_name_to, columns, table_filename)) 138 | 139 | for row_data in rows: 140 | row_data = list(row_data) 141 | self._apply_raw_dump_rules(row_data, table_raw_rules) 142 | csv_row_data = sql_copy_data_template % tuple(map(self._supaFilta, row_data)) 143 | table_dump.write(csv_row_data) 144 | 145 | table_dump.close() 146 | psql_dump.close() 147 | 148 | def generate_psql_index_fk(self, schema, output_file): 149 | output = open(output_file, 'w') 150 | output.write("SET client_min_messages TO ERROR;\n") 151 | output.write("SET SCHEMA 'public';\n") 152 | 153 | output.write("\n\n") 154 | for table_name, table_attr in schema['tables'].items(): 155 | output.write(self._get_sql_sequence(table_attr)) 156 | output.write(self._get_sql_fkeys(table_attr)) 157 | output.write(self._get_sql_indexes(table_attr)) 158 | 159 | output.close() 160 | 161 | 162 | def generate_psql_views(self, schema, schema_name_v1, schema_name_v2, psql_views_path): 163 | """ 164 | Generate view to be able to query on old db schema trough new v2 db schema 165 | :param schema: 166 | :param schema_name_v1: 167 | :param schema_name_v2: 168 | :param psql_views_path: 169 | :return: 170 | """ 171 | psql_views = open(psql_views_path, 'w') 172 | psql_views.write("SET client_min_messages TO ERROR;\n") 173 | psql_views.write("DROP SCHEMA IF EXISTS %s CASCADE;\n\n" % schema_name_v1) 174 | psql_views.write("CREATE SCHEMA IF NOT EXISTS %s;\n\n" % schema_name_v1) 175 | 176 | for table_name_v1, table_attr in schema['tables'].iteritems(): 177 | table_name_v2 = table_attr['name'] 178 | columns_pri, columns_ref, columns, columns_ignore = \ 179 | PsqlParser._get_categorized_columns(table_attr['columns']) 180 | 181 | columns = merge_dicts(columns_pri, columns_ref, columns) 182 | 183 | columns_v2 = [ '"'+col_attr['name']+'"' for col_name_v1, col_attr in columns.iteritems() ] 184 | columns_v2 += [ 'NULL' for col_name_v1, col_attr in columns_ignore.iteritems() ] 185 | 186 | columns_v1 = [ '"'+col_name_v1+'"' for col_name_v1, col_attr in columns.iteritems()] 187 | columns_v1 += [ '"'+col_name_v1+'"' for col_name_v1, col_attr in columns_ignore.iteritems() ] 188 | 189 | view_sql = ('CREATE VIEW %s (%s) AS \n SELECT %s FROM %s WITH CASCADED CHECK OPTION;\n\n' % ( 190 | "%s.%s" % (schema_name_v1, table_name_v1), 191 | ', '.join(columns_v1), 192 | ', '.join(columns_v2), 193 | "%s.%s" % (schema_name_v2, table_name_v2) 194 | )) 195 | 196 | psql_views.write(view_sql + "\n") 197 | psql_views.close() 198 | 199 | 200 | def _get_table_raw_dump_rules(self, table_name, cols, attrs): 201 | tuple_to_check = [] 202 | for rule_attr, rule_conds in self.raw_dump_rules.get('column', {}).items(): 203 | for rule_cond in rule_conds: 204 | tuple_to_check += [(col_key, attrs[col_name], rule_cond['method']) \ 205 | for col_key, col_name in enumerate(cols) \ 206 | if attrs[col_name].get(rule_attr, None) == rule_cond['value'] 207 | or (rule_cond['value'] == "notNone" and attrs[col_name].get(rule_attr, None) is not None) 208 | ] 209 | return tuple_to_check 210 | 211 | def _apply_raw_dump_rules(self, row_data, tuple_to_check): 212 | for col_key, col_attrs, rule_method in tuple_to_check: 213 | params = [row_data[col_key], col_attrs] 214 | row_data[col_key] = getattr(dumperAuxFuncs, rule_method)(*params) 215 | 216 | 217 | @staticmethod 218 | def _convert_mysql_insert_to_psql(schema_changes, line): 219 | """ 220 | Convert mysql insert sql statement into psql one 221 | :param schema_changes: 222 | :param line: 223 | :return: 224 | """ 225 | insert_regex = re.compile('^INSERT INTO "([\w\d]+)"([\w\W]+)VALUES([\w\W]+);$') 226 | try: 227 | line = line.decode("utf8").strip().replace(r"\\", "WUBWUBREALSLASHWUB").\ 228 | replace(r"\'", "''").replace("WUBWUBREALSLASHWUB", r"\\").\ 229 | replace("0000-00-00 00:00:00", "2000-01-01 00:00:00").\ 230 | replace("0000-00-00", "2000-01-01") 231 | except: 232 | print("Can't decode value") 233 | print(line) 234 | return None 235 | 236 | # Grag table name from insert query and check if there is a new name for it 237 | table_name = insert_regex.match(line).group(1) 238 | orig_table_name = insert_regex.match(line).group(1) 239 | if RuleHandler.STR_SKIP == schema_changes.get('tables', {}).get(table_name, {}): 240 | return None 241 | if 'name' in schema_changes.get('tables', {}).get(table_name, {}): 242 | table_name = schema_changes['tables'][table_name]['name'] 243 | 244 | # Grag columns names from insert query and check if there is a new name for them 245 | columns = re.findall('"([^"]*)"', insert_regex.match(line).group(2)) 246 | for key, col in enumerate(columns): 247 | if 'name' in schema_changes.get('tables', {}).get(orig_table_name, {}).get('columns', {}).get(col, {}): 248 | columns[key] = schema_changes['tables'][orig_table_name]['columns'][col]['name'] 249 | 250 | column_str = '("' + '", "'.join(columns) + '")' 251 | 252 | # Values to be inserted 253 | values = insert_regex.match(line).group(3) 254 | 255 | # Re-build insert query with new names 256 | insert_sql = "INSERT INTO \"%s\" %s VALUES %s;" % (table_name, column_str, values) 257 | return insert_sql.encode('utf8') 258 | 259 | 260 | @staticmethod 261 | def _create_rules(rules, node_rules, node_atrrs): 262 | """ 263 | Generates list of rules from class general rules 264 | :param rules: Dict to allocate new rules 265 | :param node_rules: Global class rules defined on the node level 266 | :param node_atrrs: List of available attrs on that level 267 | :return: 268 | """ 269 | for node_attr, node_value in node_atrrs.iteritems(): 270 | if node_attr not in node_rules: 271 | continue 272 | for rule in node_rules[node_attr]: 273 | # if isinstance(rule['from'], REGEX_TYPE) and node_value.startswith('mediumtext'): 274 | if rule['from'] == node_value: 275 | rules[node_attr] = rule['to'] 276 | 277 | @staticmethod 278 | def _get_sql_drop_table(table_attr): 279 | """ 280 | Generate drop database statement 281 | :param table_attr: table attrs 282 | :return: SQL statement for dropping 283 | """ 284 | template = 'DROP TABLE IF EXISTS "%s" CASCADE;' % (table_attr['name']) 285 | return template 286 | 287 | 288 | def _get_sql_user(self, opcode, mysql_parser): 289 | db_name = 'client_'+opcode 290 | b7_user, b7_pass = mysql_parser.get_user_user_pass(opcode) 291 | 292 | if b7_user is None or b7_pass is None: return "" 293 | if len(b7_user) == 0 or b7_user == 'root': return "" 294 | b7_pass = b7_pass.replace('$', '\\0024') 295 | return """-- Adding PG User 296 | DO $$DECLARE r record; 297 | BEGIN 298 | IF NOT EXISTS ( 299 | SELECT * 300 | FROM pg_catalog.pg_user 301 | WHERE usename = '%s') THEN 302 | 303 | CREATE USER %s WITH PASSWORD U&'%s'; 304 | ELSE 305 | ALTER USER %s WITH PASSWORD U&'%s'; 306 | END IF; 307 | END$$; 308 | ALTER DATABASE %s OWNER TO %s; 309 | GRANT CONNECT ON DATABASE %s TO %s; 310 | 311 | GRANT %s TO %s; 312 | 313 | GRANT USAGE ON SCHEMA %s TO %s; 314 | GRANT ALL ON ALL SEQUENCES IN SCHEMA %s TO %s; 315 | GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA %s TO %s; 316 | GRANT USAGE ON SCHEMA %s TO %s; 317 | GRANT ALL ON ALL SEQUENCES IN SCHEMA %s TO %s; 318 | GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA %s TO %s; 319 | 320 | ALTER DATABASE %s SET search_path TO %s; 321 | ALTER USER %s SET search_path TO %s; 322 | ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO %s; 323 | ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO %s; 324 | ALTER DEFAULT PRIVILEGES IN SCHEMA v1 GRANT ALL ON TABLES TO %s; 325 | ALTER DEFAULT PRIVILEGES IN SCHEMA v1 GRANT ALL ON SEQUENCES TO %s; 326 | """ % (b7_user, b7_user, b7_pass, b7_user, b7_pass, 327 | db_name, b7_user, 328 | db_name, b7_user, 329 | 330 | 'b7group_user', b7_user, 331 | 332 | 'v1', b7_user, 333 | 'v1', b7_user, 334 | 'v1', b7_user, 335 | 'public', b7_user, 336 | 'public', b7_user, 337 | 'public', b7_user, 338 | 339 | db_name, ', '.join(('v1', 'public')), 340 | b7_user, ', '.join(('v1', 'public')), 341 | b7_user, 342 | b7_user, 343 | b7_user, 344 | b7_user, 345 | ) 346 | 347 | 348 | def _get_sql_sequence(self, table_attr): 349 | """ 350 | Return psql statement to set SEQUENCE value for specific table 351 | """ 352 | template = 'SELECT setval(\'%s_%s_seq\', %d, false);\n' 353 | return "\n".join([template % ( 354 | table_attr['name'], col_attrs['name'], table_attr['autoIncrement'] 355 | ) for col_name, col_attrs in table_attr['columns'].items() if col_attrs['isPk'] and table_attr['autoIncrement'] ]) 356 | 357 | @staticmethod 358 | def _get_dump_initial_statements(): 359 | return """-- Converted by db_converter 360 | SET standard_conforming_strings=on; 361 | SET escape_string_warning=on; 362 | SET client_min_messages TO ERROR; 363 | SET client_encoding = 'UTF8'; 364 | SET NAMES 'UTF8'; 365 | SET CONSTRAINTS ALL DEFERRED; 366 | """ 367 | 368 | @staticmethod 369 | def _get_dump_final_statements(): 370 | return """ 371 | SET CONSTRAINTS ALL IMMEDIATE; 372 | """ 373 | 374 | @staticmethod 375 | def _supaFilta( v): 376 | if v is None: return 'NULL' 377 | if v is True: return 'true' 378 | if v is False: return 'false' 379 | 380 | # if isinstance(v, unicode): v = v.encode('utf8') 381 | if isinstance(v, str): return "'" + v.replace("'", "''") + "'" 382 | 383 | return v 384 | 385 | def _get_sql_fkeys(self, table_attr): 386 | """ 387 | Generate create database statement 388 | :param table_attr: table attrs 389 | :return: SQL statement for creating 390 | """ 391 | default_on_def = 'RESTRICT DEFERRABLE INITIALLY IMMEDIATE' 392 | fkey_template = 'ALTER TABLE "%s" ADD CONSTRAINT %s_%s_fkey FOREIGN KEY (%s) REFERENCES %s ON DELETE %s;' 393 | # index_template = 'CREATE INDEX %s_%s_idx ON %s (%s);' 394 | fkeys = '' 395 | 396 | for col_name, col_attrs in table_attr['columns'].items(): 397 | if col_attrs['reference']: 398 | fkeys += '\n' + fkey_template % \ 399 | (table_attr['name'], table_attr['name'], col_attrs['name'], '"' + col_attrs['name'] + '"', 400 | col_attrs['reference'], col_attrs['on_delete'] if 'on_delete' in col_attrs else default_on_def) 401 | # fkeys += '\n' + index_template % (table_attr['name'], col_attrs['name'], table_attr['name'], col_attrs['name']) 402 | 403 | return fkeys 404 | 405 | def _get_sql_indexes(self, table_attr): 406 | """ 407 | Generate indexes database statement 408 | :param table_attr: table attrs 409 | :return: SQL statement for creating 410 | """ 411 | index_template = 'CREATE INDEX %s_%s_x ON %s ("%s");\n' 412 | indexes = '\n'; 413 | 414 | for index_name, index_attrs in table_attr['indexes'].items(): 415 | columns = list() 416 | for index_column_name in index_attrs['columns']: 417 | columns.append(table_attr['columns'][index_column_name]['name']) 418 | indexes += index_template % (table_attr['name'], index_attrs['name'], 'public."' + table_attr['name'] + '"', '" ,"'.join(columns)) 419 | 420 | return indexes 421 | 422 | def _get_sql_create_table(self, table_attr): 423 | """ 424 | Generate create database statement 425 | :param table_attr: table attrs 426 | :return: SQL statement for creating 427 | """ 428 | template = 'CREATE TABLE IF NOT EXISTS "%s" (\n %s );' 429 | columns_pri, columns_ref, columns, columns_ignore = \ 430 | PsqlParser._get_categorized_columns(table_attr['columns']) 431 | v2_columns = [] 432 | for columnName, columnAttr in merge_dicts(columns_pri, columns_ref, columns).items(): 433 | v2_columns.append(PsqlParser._get_sql_column(columnAttr)) 434 | return template % (table_attr['name'], ", \n ".join(v2_columns)) 435 | 436 | @staticmethod 437 | def _get_categorized_columns(tableColumns): 438 | """ 439 | Generate return table columns by category 440 | :return: PK, ref_cols, remaining, skipped 441 | """ 442 | columns = {} 443 | columns_ref = {} 444 | columns_pri = {} 445 | columns_ignore = {} 446 | first_pk_col = None 447 | 448 | for col_name, col_attrs in tableColumns.items(): 449 | if RuleHandler.STR_SKIP in col_attrs: 450 | columns_ignore[col_name] = col_attrs 451 | elif col_attrs['isPk']: 452 | if first_pk_col is None: 453 | first_pk_col = col_name 454 | columns_pri[first_pk_col] = col_attrs 455 | else: 456 | col_attrs['isPk'] = False 457 | columns[col_name] = col_attrs 458 | 459 | if first_pk_col in columns_pri: 460 | columns_pri[first_pk_col]['isPk'] = False 461 | columns[first_pk_col] = columns_pri[first_pk_col] 462 | del columns_pri[first_pk_col] 463 | 464 | if 'pkC' not in columns_pri: 465 | columns_pri['pkC'] = copy.copy(col_attrs) 466 | columns_pri['pkC']['isPkC'] = list([first_pk_col]) 467 | 468 | columns_pri['pkC']['isPkC'].append(col_name) 469 | elif col_attrs['reference']: 470 | columns_ref[col_name] = col_attrs 471 | else: 472 | columns[col_name] = col_attrs 473 | 474 | return columns_pri, columns_ref, columns, columns_ignore 475 | 476 | @staticmethod 477 | def _get_sql_column(column_attr): 478 | """ 479 | Generate table columns statements 480 | :param column_attr: col attrs 481 | :return: SQL statement adding columns 482 | """ 483 | col_def_sql = ' "%s"' % column_attr['name'] 484 | if column_attr['extra'] == 'auto_increment': 485 | col_def_sql += ' SERIAL' 486 | else: 487 | col_def_sql += ' %s' % column_attr['type'].upper() 488 | 489 | if column_attr['size'] and column_attr['type'] not in ['text', 'bytea', 'smallint', 'decimal', 'set']: 490 | col_def_sql += '(' + str(column_attr['size']) + ')' 491 | if not column_attr['nullable']: 492 | col_def_sql += ' NOT NULL' 493 | if column_attr['isPk']: 494 | col_def_sql += ' PRIMARY KEY' 495 | if column_attr['default'] is not None: 496 | if column_attr['default'].replace(".", "", 1).isdigit(): 497 | if column_attr['type'] == 'boolean': 498 | col_def_sql += ' DEFAULT ' + ('true' if column_attr['default'] != '0' else 'false') 499 | else: 500 | col_def_sql += ' DEFAULT ' + column_attr['default'] 501 | elif column_attr['default'] == 'current_timestamp': 502 | col_def_sql += ' DEFAULT ' + column_attr['default'] 503 | elif column_attr['default'].lower() == "true" or column_attr['default'].lower() == "false": 504 | col_def_sql += ' DEFAULT ' + column_attr['default'].upper() 505 | else: 506 | col_def_sql += " DEFAULT U&'%s'" % column_attr['default'] 507 | 508 | if 'isPkC' in column_attr: 509 | col_def_sql = 'PRIMARY KEY (' + ','.join(column_attr['isPkC']) + ')' 510 | 511 | return col_def_sql 512 | 513 | @staticmethod 514 | # NOT IN USE 515 | def _psql_escape(value, value_type): 516 | if value is None: 517 | return 'null' 518 | if value_type in ['int', 'decimal']: 519 | return value if not isinstance(value, Decimal) else float(value) 520 | if value_type in ['boolean']: 521 | return 'false' if value == '0' else 'true' 522 | if value_type.startswith('timestamp') or value_type == 'date': 523 | return str(value) 524 | # return str(value.encode('utf8').replace('\'', '\'\'').replace('\\', '\\\\')) 525 | # return psycopg2._param_escape(value.encode('utf8')) 526 | return '$$'+str(value.encode('utf8'))+'$$' 527 | 528 | @staticmethod 529 | def sql_copy_format(row_data): 530 | row_data = re.sub(r"::(\w*)", "", row_data) 531 | row_data = row_data.replace("', E'", "', '") 532 | return row_data 533 | --------------------------------------------------------------------------------