├── libs
    ├── __init__.py
    ├── RuleHandler.py
    ├── MysqlParser.py
    └── PsqlParser.py
├── logs
    └── .gitignore
├── output
    └── .gitignore
├── .gitignore
├── requirements.txt
├── bin
    ├── install_requirements.sh
    └── migrate.sh
├── config
    └── parameters.json.sample
├── rules
    ├── mysql_raw_dump.json
    ├── schema_changes.json
    └── mysql_to_psql.json
├── LICENSE
├── dumperAuxFuncs.py
├── README.md
└── main.py


/libs/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/logs/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore


--------------------------------------------------------------------------------
/output/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | tools/cleaner/output/*
2 | output/*
3 | vendor/*
4 | config/parameters.json
5 | logs/*
6 | *.pyc
7 | .vscode/*


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | beautifulsoup4==4.0.1
2 | PyMySQL==1.1.0
3 | httplib2==0.18.0
4 | netifaces==0.11.0
5 | psycopg2cffi==2.9.0
6 | html5lib==0.9999999
7 | chardet==2.3.0
8 | 


--------------------------------------------------------------------------------
/bin/install_requirements.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | echo "Installing project libraries dependences"
3 | #pip install --use-pep517 -r requirements.txt --upgrade
4 | pip install -r requirements.txt --upgrade


--------------------------------------------------------------------------------
/config/parameters.json.sample:
--------------------------------------------------------------------------------
 1 | {
 2 |     "mysql": {
 3 |         "host": "127.0.0.1",
 4 |         "port": 3306,
 5 |         "user": "root",
 6 |         "password": "root",
 7 |         "charset": "utf8",
 8 |         "use_unicode": false
 9 |     },
10 |     "psql": {
11 |         "host": "127.0.0.1",
12 |         "port": 5432,
13 |         "user": "root",
14 |         "password": "root"
15 |     },
16 |     "threads": 0,
17 |     "prefix": false,
18 |     "v1_schema_name" : false
19 | }


--------------------------------------------------------------------------------
/rules/mysql_raw_dump.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "column": {
 3 |         "type": [
 4 |             {"value": "boolean", "method": "convertStrBoolean"},
 5 |             {"value": "bytea", "method": "makeItEmpty"},
 6 |             {"value": "TIME", "method": "makeItTime"}
 7 |         ],
 8 |         "fullType": [
 9 |           {"value": "datetime", "method": "notNullableDatetime"},
10 |           {"value": "date", "method": "notNullableDate"}
11 |         ],
12 |         "reference": [
13 |             {"value": "notNone", "method": "refToNullable"}
14 |         ]
15 |     }
16 | }


--------------------------------------------------------------------------------
/rules/schema_changes.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "tables": {
 3 |         "reservation_reminder": {
 4 |             "_PRE_SQL_": [
 5 |                 "DELETE IGNORE FROM reservation_reminder WHERE resa_id NOT IN (SELECT id FROM reservation)"
 6 |             ],
 7 |             "name": "reminder",
 8 |             "columns": {
 9 |                 "resa_id": {
10 |                     "name": "reservation_id",
11 |                     "reference": "reservation (id)"
12 |                 },
13 |                 "user_id": {
14 |                     "nullable": true
15 |                 },
16 |                 "client_id": {
17 |                     "reference": "client (id)"
18 |                 }
19 |             }
20 |         },
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016 Gabriel Garrido
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/rules/mysql_to_psql.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "column": {
 3 |         "type": [
 4 |             {"from": "datetime", "to": "timestamp"},
 5 |             {"from": "tinyint", "to": "smallint",
 6 |                 "_IF_": [
 7 |                     {"attr": "fullType", "val": "tinyint(1)", "to": "boolean"}
 8 |                 ]
 9 |             },
10 |             {"from": "smallint", "to": "smallint",
11 |                 "_IF_": [
12 |                     {"attr": "fullType", "val": "smallint(1)", "to": "boolean"}
13 |                 ]
14 |             },
15 |             {"from": "int", "to": "int",
16 |                 "_IF_": [
17 |                     {"attr": "fullType", "val": "int(1)", "to": "boolean"}
18 |                 ]
19 |             },
20 |             {"from": "mediumint", "to": "int"},
21 |             {"from": "mediumtext", "to": "text"},
22 |             {"from": "longtext", "to": "text"},
23 |             {"from": "longblob", "to": "bytea"},
24 |             {"from": "blob", "to": "bytea"},
25 |             {"from": "double", "to": "decimal"},
26 |             {"from": "enum", "to": "set"}
27 |         ],
28 |         "default": [
29 |             {"from": "CURRENT_TIMESTAMP", "to": "current_timestamp"}
30 |         ]
31 |     },
32 |     "table": {
33 |         "engine": [
34 |             {"from": "MyISAM", "to": "INNODB"}
35 |         ]
36 |     }
37 | }


--------------------------------------------------------------------------------
/dumperAuxFuncs.py:
--------------------------------------------------------------------------------
 1 | __author__ = 'ggarrido'
 2 | 
 3 | import datetime
 4 | import re
 5 | 
 6 | validTimeRE = re.compile(u'\d\d:\d\d')
 7 | 
 8 | def convertStrBoolean(value, col_attrs=None):
 9 |     if not value or not (isinstance(value, str) or isinstance(value, int)):
10 |         return False
11 |     return False if value == 0 or int(value) == 0 else True
12 | 
13 | def defaultDate(value, format, defaultValue, nullable):
14 |     if value is None or value[:4] == '0000':
15 |         return None if nullable else '1900-01-01'
16 |     return value
17 | 
18 | def notNullableDate(value, col_attrs=None):
19 |     nullable = col_attrs['nullable'] if col_attrs else False
20 |     format, defaultValue = "%d%m%Y", "01011900"
21 |     return defaultDate(value, format, defaultValue, nullable)
22 | 
23 | 
24 | def notNullableDatetime(value, col_attrs=None):
25 |     nullable = col_attrs['nullable'] if col_attrs else False
26 |     format, defaultValue = "%d%m%Y %H:%M:%S", "01011900 00:00:00"
27 |     return defaultDate(value, format, defaultValue, nullable)
28 | 
29 | def refToNullable(value, col_attrs=None):
30 |     nullable = col_attrs['nullable'] if col_attrs else False
31 |     if (value == 0 or value == '0') and nullable: return None
32 |     return value
33 | 
34 | def makeItEmpty(value, col_attrs=None):
35 |     nullable = col_attrs['nullable'] if col_attrs else False
36 |     return None if nullable else ''
37 | 
38 | def makeItTime(value, col_attrs=None):
39 |     nullable = col_attrs['nullable'] if col_attrs else False
40 |     if value is not None and re.match(validTimeRE, value): return value
41 |     return None if nullable else '00:00'
42 | 


--------------------------------------------------------------------------------
/bin/migrate.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | readonly YELLOW="\033[0;33m"
 3 | readonly RED="\033[0;31m"
 4 | readonly GREEN="\033[0;32m"
 5 | readonly NC="\033[0;0m"
 6 | readonly BASH_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
 7 | 
 8 | function error {
 9 |     MSG="$1"
10 |     echo -e "${RED}ERROR: ${MSG}${NC}\nExitting"
11 |     exit 1
12 | }
13 | 
14 | PORT=5432
15 | for ((i=1;i<=$#;i++));
16 | do
17 |     if [ ${!i} = "-p" ]
18 |     then ((i++))
19 |         PORT=${!i};
20 | 
21 |     elif [ ${!i} = "-d" ];
22 |     then ((i++))
23 |         DB_NAME=${!i};
24 | 
25 |     elif [ ${!i} = "-U" ];
26 |     then ((i++))
27 |         USER=${!i};
28 | 
29 |     elif [ ${!i} = "-Wf" ];
30 |     then ((i++))
31 |         export PGPASSWORD=${!i};
32 | 
33 |     elif [ ${!i} = "-W" ];
34 |     then ((i++))
35 |         echo -en "${YELLOW}Root password${NC}\n"
36 |         read -s password
37 |         export PGPASSWORD=$password
38 |     fi
39 | done;
40 | 
41 | readonly ERRLOG="/tmp/pg_migration_$(date +%s).err"
42 | touch "${ERRLOG}"
43 | readonly HOST="127.0.0.1"
44 | 
45 | echo -e "${YELLOW}************************${NC}"
46 | echo -e "${YELLOW} MIGRATION MYSQL > PG   ${NC}"
47 | echo -e "${YELLOW}************************${NC}"
48 | 
49 | # touch ${DUMPLOG}
50 | # echo -e "${YELLOW}Logs are being redirect to:${NC} \n\tQueries:\t${DUMPLOG}\n\tErrors:\t\t${ERRLOG}${NC}"
51 | 
52 | echo -en "${YELLOW}Creating db${NC}..."
53 | psql --set ON_ERROR_STOP=on -U ${USER} -p ${PORT} -o ${ERRLOG} -c "DROP DATABASE IF EXISTS \"${DB_NAME}\"" || error "Dropping old db"
54 | psql --set ON_ERROR_STOP=on -U ${USER} -p ${PORT} -o ${ERRLOG} -c "CREATE DATABASE \"${DB_NAME}\"" || error "Creating db"
55 | echo "Done ($SECONDS)"
56 | 
57 | SECONDS=0
58 | SQL="./output/${DB_NAME}/psql_tables.sql"
59 | echo -en "${YELLOW}Creating v2 tables${NC}..."
60 | psql --set ON_ERROR_STOP=on -U ${USER} -p ${PORT} -f ${SQL} -o ${ERRLOG} -d ${DB_NAME} || error "Creating tables"
61 | echo "Done ($SECONDS)"
62 | 
63 | SECONDS=0
64 | SQL="./output/${DB_NAME}/psql_data.sql"
65 | echo -en "${YELLOW}Inserting data${NC}..."
66 | psql --set  ON_ERROR_STOP=on -U ${USER} -p ${PORT} -f ${SQL} -o ${ERRLOG} -d ${DB_NAME} || error "Inserting data"
67 | echo "Done ($SECONDS)"
68 | 
69 | SECONDS=0
70 | SQL="./output/${DB_NAME}/psql_views.sql"
71 | echo -en "${YELLOW}Creating views${NC}..."
72 | psql --set ON_ERROR_STOP=on -U ${USER} -p ${PORT} -f ${SQL} -o ${ERRLOG} -d ${DB_NAME} || error "Creating views"
73 | echo "Done ($SECONDS)"
74 | 
75 | SECONDS=0
76 | SQL="./output/${DB_NAME}/psql_index_fk.sql"
77 | echo -en "${YELLOW}Creating indexes and fk${NC}..."
78 | psql --set ON_ERROR_STOP=on -U ${USER} -p ${PORT} -f ${SQL} -o ${ERRLOG} -d ${DB_NAME} || error "Creating add indexes and constraints"
79 | echo "Done ($SECONDS)"
80 | 
81 | echo -e "${GREEN}Migration to PG was completed SUCCESSFULLY${NC}"
82 | exit 0
83 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Migrate Mysql db to Postgresql (by rules) 
 2 | 
 3 | Migrate your current MySQL databases into Postgres in a single command.
 4 | 
 5 | This tool provides you the chance of migrate your local MySQL databases to Postgres and even modify your current database structure, names and achieve a better
 6 |  consistence defining new foreign keys and cleaning up the data using `_PRE_SQL` queries.
 7 | 
 8 | # How to use it
 9 |  
10 | ### Mode1: Migrate a single database
11 |     $ python main.py {db_name}
12 |     
13 | ### Mode2: Migrate few tables from a single database
14 |     $ python main.py {db_name} [{table_name1} {table_name2} ..]
15 | 
16 | ### Mode3: Migrate your whole MySQL schema
17 |     $ python main.py all-databases
18 | (it is recommended to use `prefix` from parameters.json, read more above)
19 | 
20 | # Documentation
21 | ## OS Dependences
22 | ### Install pip
23 |     sudo apt-get install python-pip python-dev build-essential python-psycopg2 python-mysqldb libpq-dev libmysqlclient-dev
24 |     
25 | ## Environment dependences
26 | ### Install python libraries and vendors
27 |     sudo bash ./bin/install_requirements.sh
28 |     
29 | ## Set it up for yourself 
30 | #### Step1: Set up db config
31 | Set up your database configuration on `./config/parameters.json`
32 | * `mysql`: Mysql connection values
33 | * `psql`: Postgres connection values
34 | * `threads`: In case of 'all-databases', you can define the number of threads to run in parallel (Max. number of CPUs). Non parallel 0
35 | * `prefix`: In case of 'all-databases', it filters every database which prefix is the defined here. Otherwise use false
36 | * `v1_schema_name`: If you want to migrate old schema onto a separated postgres schema, its name is defined here. Otherwise use false
37 | 
38 | #### Step2: Version schema names
39 | Set up your schema names for version1 and version2 on "./config/parameters.json"
40 | 
41 | #### Step3: Define model rules you want to modified
42 | * Open `./rules/schema_changes.json`
43 | * Define your own schema rules on it. These rules are going to be used to redefine the new db structure, in case of not including any rules to a table or column, they will be migrated as it is in Mysql 
44 | 
45 | #### Step4: Define Postgresql conversion rules from Mysql ones
46 | * Open `./rules/mysql_to_psql.json`
47 | * Define MySQL keys to Postgres, most of rules were already defined by default, but there might be some more missing
48 | 
49 | #### Step5: Define data convertion 
50 | * Open `./rules/mysql_raw_dump.json`
51 | * Define data conversion according to its type, YOU might prefer to define different data conversion depending of your own model. Functions for conversion are defined in `dumperAuxFuncs.py`, feel free to add your own customized ones.
52 | 
53 | ========================
54 | 
55 | ## Outputs
56 | These are the files generated during the migration process:
57 | 
58 | * `mysql_schema.json`: Original Mysql schema exported in Json format
59 | * `mysql_schema_v2.json`: Mysql schema after model rules where applied
60 | * `mysql_data.sql`: INSERT INTO statement in mysql
61 | 
62 | * `psql_schema.json`: Postgres schema 
63 | * `psql_tables.sql`: CREATE TABLE statements, generated from psql_schema. 
64 | * `psql_data.sql`: INSERT INTO statements, generated from psql_schema. Raw data will be allocated under ./table folder
65 | 
66 | ## Manual migration
67 | ### Mode1: Manually
68 | #### Create tables
69 |     psql -h server -d database_name -U username < ./output/{databaase}/psql_tables.sql
70 | #### Insert data
71 |     psql -h server -d database_name -U username < ./output/{databaase}/psql_data.sql
72 | #### Insert indexes and fks
73 |     psql -h server -d database_name -U username < ./output/{databaase}/psql_index_fk.sql
74 | #### Create views ( Just in case you want to keep views with previous squema)
75 |     psql -h server -d database_name -U username < ./output/{databaase}/psql_views.sql
76 | 
77 | ### Mode2: Single command
78 |     $ bash ./bin/migrate.sh [-p {port}] -U {username} -d {database} -Wf {password}
79 | 


--------------------------------------------------------------------------------
/libs/RuleHandler.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'ggarrido'
  2 | 
  3 | 
  4 | class RuleHandler:
  5 |     """
  6 |         Apply a list of rules into given json. Just overwrite in case rules has a value on the same location
  7 |     """
  8 | 
  9 |     STR_SKIP = '_SKIP_'
 10 | 
 11 |     def __init__(self, rules=None, node_rules=None):
 12 |         """
 13 |         :param rules: Schema rules (same nesting level replacement)
 14 |         :param node_rules: Node level rules (column, table)
 15 |         :return:
 16 |         """
 17 |         self.rules = rules if rules is not None else {}
 18 |         self.node_rules = node_rules if node_rules is not None else {}
 19 | 
 20 |     def obtain_modified_schema(self, schema):
 21 |         """
 22 |         Iterate over every table in schema replacing its attrs in case there is rule for it
 23 |         :param schema: Db schema
 24 |         :return: schema after rules where applied
 25 |         """
 26 |         res_schema = schema.copy()
 27 |         self._apply_rules(res_schema)
 28 |         self._apply_node_rules(res_schema)
 29 |         return res_schema
 30 | 
 31 |     def _apply_rules(self, schema):
 32 |         if self.rules is None or 'tables' not in self.rules:
 33 |             return
 34 | 
 35 |         for table_name, table_attrs in self.rules['tables'].items():
 36 |             # If table doesn't WITH TIME ZONE on the schema, skip iter
 37 |             if 'tables' not in schema or table_name not in schema['tables']:
 38 |                 continue
 39 | 
 40 |             # If table_attr is SKIP string, it means table is removed from schema
 41 |             elif table_attrs == self.STR_SKIP:
 42 |                 del schema['tables'][table_name]
 43 |                 continue
 44 | 
 45 |             # Otherwise, apply schema changes
 46 |             schema_part = schema['tables'][table_name]
 47 |             self._apply_rule_table(schema_part, table_attrs)
 48 | 
 49 |     def _apply_rule_table(self, schema, table_attrs):
 50 |         for table_attr_key, table_name_val in table_attrs.iteritems():
 51 |             if table_attr_key != 'columns':
 52 |                 schema[table_attr_key] = table_name_val
 53 |                 continue
 54 |             # If table doesn't have any column declared, skip iter
 55 |             elif 'columns' not in schema:
 56 |                 continue
 57 |             # In case of columns, iterate over all of them, replacing values
 58 |             schema_part = schema['columns']
 59 |             self._apply_rule_col(schema_part, table_name_val)
 60 | 
 61 |     def _apply_rule_col(self, schema, col_attrs):
 62 |         for col_name, col_attrs in col_attrs.iteritems():
 63 |             # If schema doesn't have col_name defined, skip iter
 64 |             if col_name not in schema:
 65 |                 continue
 66 |             elif col_attrs == self.STR_SKIP:
 67 |                 schema[col_name][self.STR_SKIP] = True
 68 |                 # del schema[col_name]
 69 |                 continue
 70 |             for col_attr_key, col_attr_value in col_attrs.iteritems():
 71 |                 schema[col_name][col_attr_key] = col_attr_value
 72 |                 if col_attr_key == 'type' and 'size' not in col_attrs:
 73 |                     schema[col_name]['size'] = None
 74 | 
 75 |     def _apply_node_rules(self, schema):
 76 |         for table_name, table_attrs in schema['tables'].items():
 77 |             self._apply_table_node_rule(schema['tables'][table_name], table_attrs)
 78 | 
 79 |     def _apply_table_node_rule(self, schema, table_attrs):
 80 |         for table_attr_key, table_attr_value in table_attrs.items():
 81 |             # In case it is a table attr and there is rules for them
 82 |             if table_attr_key != 'columns' and table_attr_key in self.node_rules.get('table', {}):
 83 |                 for node_attr_fromto in self.node_rules['table'][table_attr_key]:
 84 |                     if table_attr_value == node_attr_fromto['from']:
 85 |                         schema[table_attr_key] = node_attr_fromto['to']
 86 |             # In case of columns
 87 |             elif table_attr_key == 'columns':
 88 |                 # Replace in case from value matches current column attr value
 89 |                 for col_name, col_attrs in table_attr_value.items():
 90 |                     self._apply_col_node_rule(schema['columns'][col_name], col_attrs)
 91 | 
 92 |     def _apply_col_node_rule(self, schema, col_attrs):
 93 |         for col_attr_key, col_attr_value in col_attrs.items():
 94 |             if col_attr_key not in self.node_rules.get('column', {}):
 95 |                 continue
 96 |             for node_attr_fromto in self.node_rules['column'][col_attr_key]:
 97 |                 if col_attr_value == node_attr_fromto['from']:
 98 |                     # Check if there are cases depending of other attr values
 99 |                     schema[col_attr_key] = node_attr_fromto['to']
100 |                     if '_IF_' in node_attr_fromto:
101 |                         for if_cond in node_attr_fromto['_IF_']:
102 |                             if schema[if_cond['attr']] == if_cond['val']:
103 |                                 schema[col_attr_key] = if_cond['to']
104 | 
105 |     @staticmethod
106 |     def get_skip_colums(schema_changes):
107 |         skipped_cols = []
108 |         for table_name, table_attrs in schema_changes['tables'].tems():
109 |             if 'columns' not in table_attrs:
110 |                 continue
111 |             for col_name, col_attrs in table_attrs['columns'].items():
112 |                 if RuleHandler.STR_SKIP == col_attrs:
113 |                     skipped_cols.append((table_attrs.get('name', table_name), col_name))
114 |         return skipped_cols


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'ggarrido'
  2 | 
  3 | import json
  4 | import sys
  5 | import os
  6 | import time
  7 | 
  8 | from libs.MysqlParser import MysqlParser
  9 | from libs.RuleHandler import RuleHandler
 10 | from libs.PsqlParser import PsqlParser
 11 | from multiprocessing import Pool
 12 | import multiprocessing
 13 | import subprocess
 14 | import traceback
 15 | 
 16 | try: import psycopg2
 17 | except ImportError: import psycopg2cffi as psycopg2
 18 | 
 19 | 
 20 | path = os.path.dirname(os.path.realpath(__file__))
 21 | MAX_THREADS, skip_pre_sql, pending_dbs, terminate = multiprocessing.cpu_count(), False, None, False
 22 | information_schema = 'information_schema'
 23 | 
 24 | def migrate(config, psql_conn_params, database, tables, skip_pre_sql, log_file=None):
 25 |     """
 26 |     Main executor, generate psql no-data dump file base on indicated db/tables from mysql connection
 27 |     :param config: Mysql db connection params and others
 28 |     :param database: Database to migrate
 29 |     :param tables: Table to migrate
 30 |     :return:
 31 |     """
 32 |     db_name = database
 33 |     output_path = os.path.join(path, 'output', db_name)
 34 |     tables_path = os.path.join(output_path, 'tables')
 35 | 
 36 |     # can trigger a race condition
 37 |     if not os.path.exists(output_path): os.mkdir(output_path, 755)
 38 |     if not os.path.exists(tables_path): os.mkdir(tables_path, 755)
 39 | 
 40 |     pg_conn = psycopg2.connect(**psql_conn_params)
 41 |     pg_cursor = pg_conn.cursor()
 42 | 
 43 |     mysql_conn_params = config['mysql']
 44 |     mysql_conn_params['db'] = db_name
 45 |     mysql_parser = MysqlParser(mysql_conn_params, information_schema)
 46 |     mysql_parser.set_skip_pre_sql(skip_pre_sql)
 47 |     mysql_schema = mysql_parser.get_schema(db_name, tables)
 48 | 
 49 |     # Generate psql schema, parsing psql rules(types, defaults...)
 50 |     # Write result into output/psql_schema.json
 51 |     psql_parser = PsqlParser(pg_cursor, pg_conn)
 52 | 
 53 |     try:
 54 |         with open(os.path.join(output_path, 'mysql_schema.json'), 'w') as outfile:
 55 |             json.dump(mysql_schema, outfile, indent=4, sort_keys=True)
 56 | 
 57 |         # Applying model rules (renaming, new defaults, ....)
 58 |         # Write result into output/mysql_schema_v2.json
 59 |         schema_changes = json.loads(open('./rules/schema_changes.json').read())
 60 |         rule_handler = RuleHandler(schema_changes)
 61 |         mysql_schema_v2 = rule_handler.obtain_modified_schema(mysql_schema)
 62 |         with open(os.path.join(output_path, 'mysql_schema_v2.json'), 'w') as outfile:
 63 |             json.dump(mysql_schema_v2, outfile, indent=4, sort_keys=True)
 64 |             outfile.close()
 65 | 
 66 |         psql_schema = psql_parser.get_schema_from_mysql(mysql_schema_v2)
 67 |         with open(os.path.join(output_path, 'psql_schema.json'), 'w') as outfile:
 68 |             json.dump(psql_schema, outfile, indent=4, sort_keys=True)
 69 |             outfile.close()
 70 | 
 71 |         # Generate psql create table queries from psql schema generated on previous step
 72 |         # Write result into output/psql_tables.sql
 73 |         timeS, msg = time.time(), "Generating Schema...   "
 74 |         print(msg)
 75 |         if log_file: log_file.write(msg)
 76 |         psql_parser.generate_sql_schema(psql_schema, 'public', os.path.join(output_path, 'psql_tables.sql'))
 77 |         print(time.time() - timeS)
 78 |         if log_file: log_file.write(str(time.time() - timeS)+'\n')
 79 | 
 80 | 
 81 |         # Generate mysql dump file
 82 |         timeS, msg = time.time(), "Generating raw data...(might take few minutes)"
 83 |         print(msg)
 84 |         if log_file: log_file.write(msg)
 85 |         psql_parser.generate_dump_from_raw(mysql_parser, db_name, psql_schema, 'public',
 86 |                                                os.path.join(output_path, 'psql_data.sql'), tables_path, schema_changes)
 87 | 
 88 |         print(time.time() - timeS)
 89 |         if log_file: log_file.write(str(time.time() - timeS)+'\n')
 90 | 
 91 | 
 92 |         timeS, msg = time.time(), "Generating indexes and fk...   "
 93 |         print(msg)
 94 |         if log_file: log_file.write(msg)
 95 |         psql_parser.generate_psql_index_fk(mysql_schema_v2, os.path.join(output_path, 'psql_index_fk.sql'))
 96 |         print(time.time() - timeS)
 97 |         if log_file: log_file.write(str(time.time() - timeS)+'\n')
 98 | 
 99 | 
100 |         # Generate vies in case it is a client db
101 |         if 'v1_schema_name' in config and config['v1_schema_name'] and len(config['v1_schema_name']) > 0:
102 |             timeS, msg = time.time(), "Generating views...   "
103 |             print(msg)
104 |             if log_file: log_file.write(msg)
105 |             psql_parser.generate_psql_views(mysql_schema_v2, config['v1_schema_name'], 'public',
106 |                                                         os.path.join(output_path, 'psql_views.sql'))
107 |         else:
108 |             open(os.path.join(output_path, 'psql_views.sql'), 'w').close()
109 |         print(time.time() - timeS)
110 |         if log_file: log_file.write(str(time.time() - timeS)+'\n')
111 |     # except:
112 |     #     e = sys.exc_info()[0]
113 |     #     print "ERROR: %s" % str(e)
114 |     finally:
115 |         mysql_parser.close()
116 |         pg_cursor.close()
117 | 
118 | def get_all_databases(config):
119 |     mysql_conn_params = config['mysql']
120 |     mysql_parser = MysqlParser(mysql_conn_params, information_schema)
121 |     dbs = mysql_parser.get_all_databases(config['prefix'])
122 |     mysql_parser.close()
123 |     return dbs
124 | 
125 | def migrate_db(params, psql_conn_params, database, tables=[], skip_pre_sql=False):
126 |     print('-------------------------------------')
127 |     print('\t %s ' % (database))
128 |     print('-------------------------------------')
129 | 
130 |     log_file_path = os.path.join(path, 'logs', database+'.log')
131 |     log_file = open(log_file_path, 'w')
132 |     try:
133 |         migrate(config, psql_conn_params, database, tables, skip_pre_sql, log_file)
134 |         timeS = time.time()
135 |         print("Running ./bin/migrate.sh .....logs in " + log_file_path)
136 |         subprocess.check_call(['bash', path+'/bin/migrate.sh', '-d', database, '-Wf', config['psql']['password'], '-p',
137 |                                str(config['psql']['port']), '-U', config['psql']['user']]
138 |                               , stderr=log_file, stdout=log_file)
139 |         print(time.time() - timeS)
140 |     except  Exception:
141 |         e = sys.exc_info()[0]
142 |         print("ERROR: %s" % str(e))
143 |         log_file.write("Python exception during generating\n")
144 |         log_file.write("ERROR: %s" % e)
145 |         print(traceback.format_exc())
146 |     finally:
147 |         log_file.close()
148 |         return database
149 | 
150 | def migration_completed(database):
151 |     global pending_dbs; pending_dbs -= 1
152 |     log_file_path = os.path.join(path, 'logs', database+'.log')
153 |     log_file = open(log_file_path, 'r')
154 |     print(log_file.read())
155 |     log_file.close()
156 |     return database
157 | 
158 | def test_f(params, pg_cursor, database, tables=[]):
159 |     print(params, pg_cursor, database, tables)
160 |     return database
161 | 
162 | if __name__ == '__main__':
163 |     database = sys.argv[1]
164 |     tables = sys.argv[2:]
165 | 
166 |     config = json.loads(open('./config/parameters.json').read())
167 |     databases = [database] if database != "all-databases" else get_all_databases(config)
168 |     pending_dbs = len(databases)
169 |     isThreading, n_threads = False, 0
170 |     if pending_dbs > 1 and 'threads' in config and int(config['threads']) > 0:
171 |         isThreading = True
172 |         if int(config['threads']) > MAX_THREADS: print("WARNING: Max number of threads are %d" % config['threads'])
173 |         n_threads =  MAX_THREADS if (int(config['threads']) > MAX_THREADS) else config['threads']
174 | 
175 |     psql_conn_params = config['psql']
176 |     psql_conn_params['dbname'] = 'postgres'
177 |     if isThreading: pool = Pool(processes=n_threads)
178 | 
179 |     for database in databases:
180 |         if isThreading: 
181 |             pool.apply_async(migrate_db, [config, psql_conn_params, database, tables, skip_pre_sql], callback=migration_completed)
182 |         else: 
183 |             migrate_db(config, psql_conn_params, database, tables, skip_pre_sql)
184 |             migration_completed(database)
185 |     try:
186 |         while isThreading and pending_dbs>0: print("Pending dbs %s...") % pending_dbs; sys.stdout.flush(); time.sleep(5)
187 |     except KeyboardInterrupt: terminate=True; print("Interrupt!!!")
188 | 
189 |     if isThreading:
190 |         if terminate: pool.terminate()
191 |         else:
192 |             print("Waiting threads to complete"); pool.close()
193 |             print("Waiting threads to wrap-up"); pool.join()


--------------------------------------------------------------------------------
/libs/MysqlParser.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | __author__ = 'ggarrido'
  4 | 
  5 | import subprocess
  6 | from collections import OrderedDict
  7 | import pymysql.cursors
  8 | from pymysql.converters import decoders, through
  9 | 
 10 | 
 11 | RED, GREEN, YELLOW, NC = '\033[0;31m', '\033[0;32m', '\033[0;33m', '\033[0m'
 12 | 
 13 | class MysqlParser():
 14 |     """
 15 |     Generate MySql schema on JSON format given a Mysql Connectior, defined DbName and Tables
 16 |     """
 17 | 
 18 |     information_schema = 'information_schema'
 19 | 
 20 |     def __init__(self, conn_params, information_schema=None):
 21 |         # replace date, datetime, timstamp decoders so they're not decoded into date objects
 22 |         custom_decoders = decoders.copy()
 23 |         custom_decoders[7] = through
 24 |         custom_decoders[10] = through
 25 |         custom_decoders[11] = through
 26 |         custom_decoders[12] = through
 27 | 
 28 |         conn_params['conv'] = custom_decoders
 29 |         self.connection = pymysql.connect(**conn_params)
 30 |         self.cursor = self.connection.cursor()
 31 |         self.skip_pre_sql = False
 32 |         if information_schema is not None: self.information_schema = information_schema
 33 | 
 34 |     def close(self):
 35 |         self.cursor.close()
 36 |         self.connection.close()
 37 | 
 38 |     def set_skip_pre_sql(self, skip=False):
 39 |         self.skip_pre_sql = skip
 40 | 
 41 |     def get_schema(self, db_name, tables=[]):
 42 |         """
 43 |         Iterate over every table from give DbName, filter by passed tables array
 44 |         :param db_name: DbName to generate schema from
 45 |         :param tables: Filtered tables
 46 |         :return: Json schema
 47 |         """
 48 |         mysql_schema = {
 49 |             'tables': self._get_db_tables_schema(db_name, tables)
 50 |         }
 51 | 
 52 |         return mysql_schema
 53 | 
 54 |     @staticmethod
 55 |     def mysqldump_data(config, db_name, tables, output_path):
 56 |         cmd = "mysqldump -h%s -u%s %s %s --compatible=postgresql --no-create-info --compact  " \
 57 |               "--extended-insert=FALSE --default-character-set=utf8 --complete-insert %s %s > %s "\
 58 |               % (config['host'], config['user'], ('-p'+config['password'] if 'password' in config else ''),
 59 |                  ('-P'+str(config['port']) if 'port' in config else ''), db_name, ' '.join(tables), output_path)
 60 |         subprocess.call(cmd, shell=True)
 61 | 
 62 |     @staticmethod
 63 |     def mysqldump_tables(config, db_name, tables, output_path):
 64 |         cmd = "mysqldump -h%s -u%s %s %s --compatible=postgresql --no-data --compact %s %s > %s "\
 65 |               % (config['host'], config['user'], ('-p'+config['password'] if 'password' in config else ''),
 66 |                  ('-P'+str(config['port']) if 'port' in config else ''), db_name, ' '.join(tables), output_path)
 67 |         subprocess.call(cmd, shell=True)
 68 | 
 69 | 
 70 |     def run_pre_sql(self, db_name, table, table_attrs, schema_changes):
 71 |         # def sql_fix_violates_fk(col_name, col_reference):
 72 |         #     reg = re.compile(ur'^[\\]?["]?(\w*)[\\]?["]?\s*\((\w*)\)$')
 73 |         #     reg_match = re.search(reg, col_reference)
 74 |         #     ref_table, ref_col = reg_match.group(1), reg_match.group(2)
 75 |         #     # Getting original table name from new schema
 76 |         #     ref_table_attr = [  cg_table_name for cg_table_name, cg_table_attrs in schema_changes['tables'].iteritems() \
 77 |         #             if cg_table_name == ref_table or ('name' in cg_table_attrs and cg_table_attrs['name'] ==  ref_table)
 78 |         #     ]
 79 |         #
 80 |         #     if len(ref_table_attr) != 0: ref_table = ref_table_attr[0]
 81 |         #
 82 |         #     return """UPDATE IGNORE `%s` AS x
 83 |         #       LEFT JOIN `%s` y ON (y.%s = x.%s)
 84 |         #       SET x.%s = 0 WHERE y.%s IS NULL""" % \
 85 |         #     (table, ref_table, ref_col, col_name, col_name, ref_col)
 86 |         if self.skip_pre_sql: return 0
 87 | 
 88 |         def get_utc_pre_sql(tableName, columns):
 89 |             if tableName == 'channel_log_resa': return []
 90 |             return ["UPDATE IGNORE `%s` SET `%s` = `%s` - INTERVAL 2 HOUR WHERE `%s` IS NOT NULL" % (tableName, col_name, col_name, col_name) \
 91 |                         for col_name, col_attr in columns.iteritems() \
 92 |                         if col_attr['type'] == 'datetime' or col_attr['type'] == 'timestamp']
 93 | 
 94 |         if not '_PRE_SQL_' in table_attrs: table_attrs['_PRE_SQL_'] = []
 95 | 
 96 |         table_attrs['_PRE_SQL_'] += get_utc_pre_sql(table, table_attrs['columns'])
 97 |         for pre_sql in table_attrs['_PRE_SQL_']:
 98 |             try:
 99 |                 res = self.cursor.execute(pre_sql)
100 |                 if res is not None and res != 0: print("%s (Affected rows %d)" % (pre_sql, res))
101 |             except Exception as e: 
102 |                 print(RED + ("ERROR: %s\n MSG: %s" % (pre_sql, str(e))) + NC)
103 | 
104 | 
105 |     def get_table_raw_data(self, db_name, table, cols, table_attrs, schema_changes):
106 |         """
107 |         Return raw data from passed table cols, applying conversion rules
108 |         :param table:
109 |         :param cols:
110 |         :param export_rules:
111 |         :return:
112 |         """
113 | 
114 |         def append_join(idx, ref_alias, join_attrs):
115 |             alias = 'j'+str(idx)
116 |             return ' INNER JOIN %s.%s AS %s ON (%s.%s = %s.%s) ' % (
117 |                 db_name,
118 |                 join_attrs['table'],
119 |                 alias,
120 |                 ref_alias,
121 |                 join_attrs['col'],
122 |                 alias,
123 |                 join_attrs['col_ref']
124 |             )
125 | 
126 |         # Generate SELECT SQL to export raw data
127 |         sql, res = '', None
128 |         alias = 't'; sql = "SELECT t.`%s` FROM %s.%s as %s" % ('`, t.`'.join(cols), db_name, table, alias)
129 |         if '_JOIN_' in table_attrs:
130 |             if not isinstance(table_attrs, list): table_attrs['_JOIN_'] = [table_attrs['_JOIN_']]
131 |             for idx, join_attrs in enumerate(table_attrs['_JOIN_']):
132 |                 sql += append_join(idx, alias, join_attrs)
133 | 
134 |         if '_WHERE_' in table_attrs:
135 |             sql += ' WHERE ' + table_attrs['_WHERE_']
136 | 
137 |         if len(sql) > 0: self.cursor.execute(sql); res = self.cursor.fetchall()
138 |         return res
139 | 
140 |     def _get_db_tables_schema(self, db_name, tables=[]):
141 |         """
142 |         Iterate over every table(filtered) and obtain information from information_schema
143 |         :param db_name: DbName where table belongs
144 |         :param tables: Tables to filter
145 |         :return: Json with every table from given dbname
146 |         """
147 |         output = OrderedDict()
148 |         sql = """
149 |         SELECT
150 |          T.table_name, T.engine, T.table_collation, T.auto_increment
151 |         FROM
152 |          %s.tables as T
153 |         WHERE
154 |          T.table_schema = '%s'
155 |         """ % (self.information_schema, db_name)
156 | 
157 |         self.cursor.execute(sql)
158 |         res = self.cursor.fetchall()
159 |         for table_info in res:
160 |             # Due to an update in the PyMysql lib it return list instead of dict
161 |             if isinstance(table_info, tuple):
162 |                 table_info = dict(zip(('table_name', 'engine', 'table_collation', 'auto_increment')
163 |                                       , list(table_info)))
164 |             if len(tables) > 0 and table_info['table_name'] not in tables:
165 |                 continue
166 |             output[table_info['table_name']] = {
167 |                 'name': table_info['table_name'],
168 |                 'collation': table_info['table_collation'],
169 |                 'engine': table_info['engine'],
170 |                 'autoIncrement': table_info['auto_increment'],
171 |                 'columns': self._get_table_columns_schema(db_name, table_info['table_name']),
172 |                 'indexes': self._get_table_indexes_schema(db_name, table_info['table_name'])
173 |             }
174 | 
175 |         return output
176 | 
177 |     def _get_table_columns_schema(self, db_name, table_name):
178 |         """
179 |         Iterate over every column from given DbName and Table
180 |         :param db_name: DbName where columns belongs to
181 |         :param table_name: Table where columns belongs to
182 |         :return: Json with every column from given table
183 |         """
184 |         columns = OrderedDict()
185 |         sql = """
186 |         SELECT
187 |          C.column_name, C.is_nullable, C.data_type, C.column_default, C.column_type,
188 |          C.character_maximum_length as size, C.column_key as isPk, C.extra,
189 |          CONCAT('"', K.referenced_table_name, '"("',  referenced_column_name, '")') as reference,
190 |          CONCAT_WS(',', C.numeric_precision, C.numeric_scale) as dsize
191 |         FROM
192 |          %s.columns C
193 |         LEFT JOIN  %s.KEY_COLUMN_USAGE K ON (
194 |             K.column_name = C.column_name and K.constraint_schema = C.table_schema and C.table_name = K.table_name
195 |         )
196 |         WHERE
197 |          C.table_name = '%s'
198 |          AND C.table_schema = '%s'
199 |         """ % (self.information_schema, self.information_schema, table_name, db_name)
200 | 
201 |         self.cursor.execute(sql)
202 |         res = self.cursor.fetchall()
203 |         for column_info in res:
204 |             # Due to an update in the PyMysql lib it return list instead of dict
205 |             if isinstance(column_info, tuple):
206 |                 column_info = dict(
207 |                     zip(('column_name', 'is_nullable', 'data_type', 'column_default', 'column_type','size',
208 |                          'isPk', 'extra', 'reference', 'dsize'),
209 |                         list(column_info))
210 |                 )
211 | 
212 |             columns[column_info['column_name']] = {
213 |                 'name': column_info['column_name'],
214 |                 'type': column_info['data_type'],
215 |                 'nullable': column_info['is_nullable'] == 'YES',
216 |                 'size': column_info['size'] if column_info['data_type'] not in ['double', 'decimal'] else column_info['dsize'],
217 |                 'default': column_info['column_default'],
218 |                 'extra': column_info['extra'],
219 |                 'isPk': column_info['isPk'] == 'PRI',
220 |                 'fullType': column_info['column_type'],
221 |                 'reference': column_info['reference'],
222 |             }
223 | 
224 |         return columns
225 | 
226 |     def _get_table_indexes_schema(self, db_name, table_name):
227 |         """
228 |         Iterate over every column from given DbName and Table
229 |         :param db_name: DbName where columns belongs to
230 |         :param table_name: Table where columns belongs to
231 |         :return: Json with every column from given table
232 |         """
233 |         indexes = OrderedDict()
234 |         sql = """
235 |         SELECT table_name AS `table_name`,
236 |                index_name AS `index_name`,
237 |                GROUP_CONCAT(column_name ORDER BY seq_in_index) AS `columns`
238 |         FROM %s.statistics S
239 |         WHERE S.table_schema = '%s'
240 |           AND S.table_name = '%s'
241 |           AND S.index_name <> 'PRIMARY'
242 |         GROUP BY 1,2;
243 |         """ % (self.information_schema, db_name, table_name)
244 | 
245 |         self.cursor.execute(sql)
246 |         res = self.cursor.fetchall()
247 |         for index_info in res:
248 |             # Due to an update in the PyMysql lib it return list instead of dict
249 |             if isinstance(index_info, tuple):
250 |                 index_info = dict(
251 |                     zip(('table_name', 'index_name', 'columns'),
252 |                         list(index_info))
253 |                 )
254 | 
255 |             indexes[index_info['index_name']] = {
256 |                 'name': index_info['index_name'],
257 |                 'columns': index_info['columns'].split(',')
258 |             }
259 | 
260 |         return indexes
261 | 
262 | 
263 |     def get_user_user_pass(self, opcode):
264 |         sql = """
265 |         SELECT cl.db_login as login,
266 |         cl.db_pass as pass
267 |         FROM base7_config.client_db as cl
268 |         WHERE cl.identifier = '%s'
269 |         """ % (opcode)
270 | 
271 |         self.cursor.execute(sql)
272 |         res = self.cursor.fetchone()
273 | 
274 |         if res is None: return None, None
275 |         return res[0], res[1]
276 | 
277 | 
278 |     def get_all_databases(self, prefix):
279 |         sql = """
280 |         SELECT TABLE_SCHEMA as db_name
281 |         FROM """+self.information_schema+""".tables
282 |         WHERE TABLE_SCHEMA <> 'mysql'
283 |         AND TABLE_SCHEMA <> 'information_schema'"""
284 | 
285 |         if prefix and len(prefix)>0:
286 |             sql += ' AND TABLE_SCHEMA LIKE \''+prefix+'%\''
287 | 
288 |         sql += ' GROUP BY TABLE_SCHEMA'
289 | 
290 |         self.cursor.execute(sql)
291 |         res = self.cursor.fetchall()
292 |         return [str(db[0]) for db in res]
293 | 


--------------------------------------------------------------------------------
/libs/PsqlParser.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'ggarrido'
  2 | 
  3 | import os
  4 | import re
  5 | import json
  6 | import time
  7 | import copy
  8 | from libs.RuleHandler import RuleHandler
  9 | from libs.MysqlParser import MysqlParser
 10 | import dumperAuxFuncs
 11 | from decimal import Decimal
 12 | 
 13 | REGEX_TYPE = type(re.compile(''))
 14 | PGSQL_BLOCK = 1000
 15 | 
 16 | def merge_dicts(*dict_args):
 17 |     '''
 18 |     Given any number of dicts, shallow copy and merge into a new dict,
 19 |     precedence goes to key value pairs in latter dicts.
 20 |     '''
 21 |     result = {}
 22 |     for dictionary in dict_args:
 23 |         result.update(dictionary)
 24 |     return result
 25 | 
 26 | class PsqlParser():
 27 | 
 28 |     def __init__(self, cursor, conn):
 29 |         self.cur = cursor
 30 |         self.conn = conn
 31 |         self.raw_dump_rules = json.loads(open('./rules/mysql_raw_dump.json').read())
 32 |         self.rules = json.loads(open('./rules/mysql_to_psql.json').read())
 33 | 
 34 |     def close(self):
 35 |         self.cur.close()
 36 |         self.conn.close()
 37 | 
 38 |     def get_schema_from_mysql(self, mysql_schema):
 39 |         pq_schema = mysql_schema.copy()
 40 |         rule_handler = RuleHandler(None, {'table': self.rules['table'], 'column': self.rules['column']})
 41 |         return rule_handler.obtain_modified_schema(pq_schema)
 42 | 
 43 |     def generate_sql_user(self, mysql_parser, opcode, psql_users_path):
 44 |         output = open(psql_users_path, 'w')
 45 |         if opcode is not None and len(opcode) > 0 and opcode != 'empty':
 46 |             users_sql = self._get_sql_user(opcode, mysql_parser)
 47 |             output.write(users_sql)
 48 |         output.close()
 49 | 
 50 | 
 51 |     def generate_sql_schema(self, schema, schema_name, psql_tables_path):
 52 |         """
 53 |         Generate sql queries from given schema
 54 |         :param schema: Psql schema
 55 |         :return: Psql queries to generate tables
 56 |         """
 57 |         psql_tables = open(psql_tables_path, 'w')
 58 |         psql_tables.write("SET client_min_messages TO WARNING;\n")
 59 |         psql_tables.write("DROP SCHEMA IF EXISTS %s CASCADE;\n" % schema_name)
 60 |         psql_tables.write("CREATE SCHEMA IF NOT EXISTS %s;\n" % schema_name)
 61 |         psql_tables.write("SET SCHEMA '%s';\n" % schema_name)
 62 |         psql_tables.write("CREATE EXTENSION \"unaccent\";\n\n")
 63 | 
 64 |         for table_name, table_attr in schema['tables'].items():
 65 |             psql_tables.write("\n-- CREATE TABLE %s \n %s \n %s \n" % (
 66 |                 table_attr['name'], self._get_sql_drop_table(table_attr),
 67 |                 self._get_sql_create_table(table_attr)
 68 |             ))
 69 | 
 70 |         psql_tables.close()
 71 | 
 72 |     # @deprecated
 73 |     def generate_dump_from_mysql_dump(self, schema_changes, schema_name, mysql_dump_path, psql_dump_path):
 74 |         """
 75 |         Read an mysql dump file and convert it into psql syntax
 76 |         :param schema_changes: changes applied into mysql schema
 77 |         :param schema_name: psql schema name
 78 |         :param mysql_dump_path: mysql source file
 79 |         :param psql_dump_path: desc file for dump
 80 |         """
 81 |         mysql_dump = open(mysql_dump_path, 'r')
 82 |         psql_dump = open(psql_dump_path, 'w')
 83 |         psql_dump.write(self._get_dump_initial_statements())
 84 |         psql_dump.write("\n\n")
 85 | 
 86 |         for i, line in enumerate(mysql_dump):
 87 |             insert_sql = self._convert_mysql_insert_to_psql(schema_changes, line)
 88 |             if insert_sql is not None:
 89 |                 psql_dump.write(insert_sql)
 90 |                 psql_dump.write("\n")
 91 | 
 92 |         psql_dump.write("\n\n")
 93 |         psql_dump.write(self._get_dump_final_statements())
 94 |         for skip in RuleHandler.get_skip_colums(schema_changes):
 95 |             psql_dump.write("ALTER TABLE \"%s\" DROP COLUMN IF EXISTS \"%s\";\n" % skip)
 96 | 
 97 |         mysql_dump.close()
 98 |         psql_dump.close()
 99 | 
100 | 
101 |     def generate_dump_from_raw(self, mysql_parser, db_name, pg_schema, schema_name, psql_dump_path, tables_path, schema_changes):
102 |         """
103 |         Obtain raw data from mysql connection and convert into INSERT INTOs
104 |         :param mysql_parser
105 |         :param schema:
106 |         :param schema_name:
107 |         :param psql_dump_path:
108 |         :type mysql_parser: MysqlParser
109 |         """
110 |         psql_dump = open(psql_dump_path, 'w')
111 | 
112 |         pre_sql_tables = { table_name: pg_schema['tables'][table_name] \
113 |                            for table_name in schema_changes['tables'].keys() \
114 |                            if '_PRE_SQL_' in schema_changes['tables'][table_name] and table_name in pg_schema['tables']}
115 | 
116 |         for table_name, table_attrs in pre_sql_tables.items():
117 |             mysql_parser.run_pre_sql(db_name, table_name, table_attrs, schema_changes)
118 | 
119 |         for table_name, table_attrs in pg_schema['tables'].items():
120 |             print("Parsing table '%s' data...." % table_name)
121 |             table_name_to = table_attrs if not table_attrs.get('name', {}) else table_attrs['name']
122 |             table_filename = os.path.join(tables_path, "%s.sql" % (table_name_to))
123 |             table_dump = open(table_filename, 'w+')
124 | 
125 |             cols_from = [col_name for col_name, col_attr in table_attrs['columns'].items()
126 |                  if not col_attr.get('_SKIP_', False)]
127 |             cols_to = [col_name if not col_attr.get('name', {}) else col_attr['name']
128 |             for col_name, col_attr in table_attrs['columns'].items()
129 |                  if not col_attr.get('_SKIP_', False)]
130 | 
131 |             start_time = time.time()
132 |             rows = mysql_parser.get_table_raw_data(db_name, table_name, cols_from, table_attrs, schema_changes)
133 |             table_raw_rules = self._get_table_raw_dump_rules(table_name, cols_from, table_attrs['columns'])
134 |             sql_copy_data_template = ','.join(['%s' for x in range(0, len(cols_to))]) + '\n'
135 |             columns = '", "'.join(cols_to)
136 |             psql_dump.write("\copy \"%s\" (\"%s\") FROM '%s' WITH (FORMAT CSV, QUOTE '''', DELIMITER ',', NULL 'NULL');\n"
137 |                 % (table_name_to, columns, table_filename))
138 | 
139 |             for row_data in rows:
140 |                 row_data = list(row_data)
141 |                 self._apply_raw_dump_rules(row_data, table_raw_rules)
142 |                 csv_row_data = sql_copy_data_template % tuple(map(self._supaFilta, row_data))
143 |                 table_dump.write(csv_row_data)
144 | 
145 |             table_dump.close()
146 |         psql_dump.close()
147 | 
148 |     def generate_psql_index_fk(self, schema, output_file):
149 |         output = open(output_file, 'w')
150 |         output.write("SET client_min_messages TO ERROR;\n")
151 |         output.write("SET SCHEMA 'public';\n")
152 | 
153 |         output.write("\n\n")
154 |         for table_name, table_attr in schema['tables'].items():
155 |             output.write(self._get_sql_sequence(table_attr))
156 |             output.write(self._get_sql_fkeys(table_attr))
157 |             output.write(self._get_sql_indexes(table_attr))
158 | 
159 |         output.close()
160 | 
161 | 
162 |     def generate_psql_views(self, schema, schema_name_v1, schema_name_v2, psql_views_path):
163 |         """
164 |         Generate view to be able to query on old db schema trough new v2 db schema
165 |         :param schema:
166 |         :param schema_name_v1:
167 |         :param schema_name_v2:
168 |         :param psql_views_path:
169 |         :return:
170 |         """
171 |         psql_views = open(psql_views_path, 'w')
172 |         psql_views.write("SET client_min_messages TO ERROR;\n")
173 |         psql_views.write("DROP SCHEMA IF EXISTS %s CASCADE;\n\n" % schema_name_v1)
174 |         psql_views.write("CREATE SCHEMA IF NOT EXISTS %s;\n\n" % schema_name_v1)
175 | 
176 |         for table_name_v1, table_attr in schema['tables'].iteritems():
177 |             table_name_v2 = table_attr['name']
178 |             columns_pri, columns_ref, columns, columns_ignore = \
179 |                 PsqlParser._get_categorized_columns(table_attr['columns'])
180 | 
181 |             columns =  merge_dicts(columns_pri, columns_ref, columns)
182 | 
183 |             columns_v2 = [ '"'+col_attr['name']+'"' for col_name_v1, col_attr in columns.iteritems() ]
184 |             columns_v2 += [ 'NULL' for col_name_v1, col_attr in columns_ignore.iteritems() ]
185 | 
186 |             columns_v1 = [ '"'+col_name_v1+'"' for col_name_v1, col_attr in columns.iteritems()]
187 |             columns_v1 += [ '"'+col_name_v1+'"' for col_name_v1, col_attr in columns_ignore.iteritems() ]
188 | 
189 |             view_sql = ('CREATE VIEW %s (%s) AS \n SELECT %s FROM %s WITH CASCADED CHECK OPTION;\n\n' % (
190 |                 "%s.%s" % (schema_name_v1, table_name_v1),
191 |                 ', '.join(columns_v1),
192 |                 ', '.join(columns_v2),
193 |                 "%s.%s" % (schema_name_v2, table_name_v2)
194 |             ))
195 | 
196 |             psql_views.write(view_sql + "\n")
197 |         psql_views.close()
198 | 
199 | 
200 |     def _get_table_raw_dump_rules(self, table_name, cols, attrs):
201 |         tuple_to_check = []
202 |         for rule_attr, rule_conds in self.raw_dump_rules.get('column', {}).items():
203 |             for rule_cond in rule_conds:
204 |                 tuple_to_check += [(col_key, attrs[col_name], rule_cond['method']) \
205 |                     for col_key, col_name in enumerate(cols) \
206 |                         if attrs[col_name].get(rule_attr, None) == rule_cond['value']
207 |                             or (rule_cond['value'] == "notNone" and attrs[col_name].get(rule_attr, None) is not None)
208 |                 ]
209 |         return tuple_to_check
210 | 
211 |     def _apply_raw_dump_rules(self, row_data, tuple_to_check):
212 |         for col_key, col_attrs, rule_method in tuple_to_check:
213 |             params = [row_data[col_key], col_attrs]
214 |             row_data[col_key] = getattr(dumperAuxFuncs, rule_method)(*params)
215 | 
216 | 
217 |     @staticmethod
218 |     def _convert_mysql_insert_to_psql(schema_changes, line):
219 |         """
220 |         Convert mysql insert sql statement into psql one
221 |         :param schema_changes:
222 |         :param line:
223 |         :return:
224 |         """
225 |         insert_regex = re.compile('^INSERT INTO "([\w\d]+)"([\w\W]+)VALUES([\w\W]+);$')
226 |         try:
227 |             line = line.decode("utf8").strip().replace(r"\\", "WUBWUBREALSLASHWUB").\
228 |                 replace(r"\'", "''").replace("WUBWUBREALSLASHWUB", r"\\").\
229 |                 replace("0000-00-00 00:00:00", "2000-01-01 00:00:00").\
230 |                 replace("0000-00-00", "2000-01-01")
231 |         except:
232 |             print("Can't decode value")
233 |             print(line)
234 |             return None
235 | 
236 |         # Grag table name from insert query and check if there is a new name for it
237 |         table_name = insert_regex.match(line).group(1)
238 |         orig_table_name = insert_regex.match(line).group(1)
239 |         if RuleHandler.STR_SKIP == schema_changes.get('tables', {}).get(table_name, {}):
240 |             return None
241 |         if 'name' in schema_changes.get('tables', {}).get(table_name, {}):
242 |             table_name = schema_changes['tables'][table_name]['name']
243 | 
244 |         # Grag columns names from insert query and check if there is a new name for them
245 |         columns = re.findall('"([^"]*)"', insert_regex.match(line).group(2))
246 |         for key, col in enumerate(columns):
247 |             if 'name' in schema_changes.get('tables', {}).get(orig_table_name, {}).get('columns', {}).get(col, {}):
248 |                 columns[key] = schema_changes['tables'][orig_table_name]['columns'][col]['name']
249 | 
250 |         column_str = '("' + '", "'.join(columns) + '")'
251 | 
252 |         # Values to be inserted
253 |         values = insert_regex.match(line).group(3)
254 | 
255 |         # Re-build insert query with new names
256 |         insert_sql = "INSERT INTO \"%s\" %s VALUES %s;" % (table_name, column_str, values)
257 |         return insert_sql.encode('utf8')
258 | 
259 | 
260 |     @staticmethod
261 |     def _create_rules(rules, node_rules, node_atrrs):
262 |         """
263 |         Generates list of rules from class general rules
264 |         :param rules: Dict to allocate new rules
265 |         :param node_rules: Global class rules defined on the node level
266 |         :param node_atrrs: List of available attrs on that level
267 |         :return:
268 |         """
269 |         for node_attr, node_value in node_atrrs.iteritems():
270 |             if node_attr not in node_rules:
271 |                 continue
272 |             for rule in node_rules[node_attr]:
273 |                 # if isinstance(rule['from'], REGEX_TYPE) and node_value.startswith('mediumtext'):
274 |                 if rule['from'] == node_value:
275 |                     rules[node_attr] = rule['to']
276 | 
277 |     @staticmethod
278 |     def _get_sql_drop_table(table_attr):
279 |         """
280 |         Generate drop database statement
281 |         :param table_attr: table attrs
282 |         :return: SQL statement for dropping
283 |         """
284 |         template = 'DROP TABLE IF EXISTS "%s" CASCADE;' % (table_attr['name'])
285 |         return template
286 | 
287 | 
288 |     def _get_sql_user(self, opcode, mysql_parser):
289 |         db_name = 'client_'+opcode
290 |         b7_user, b7_pass = mysql_parser.get_user_user_pass(opcode)
291 | 
292 |         if b7_user is None or b7_pass is None: return ""
293 |         if len(b7_user) == 0 or b7_user == 'root': return ""
294 |         b7_pass = b7_pass.replace('$', '\\0024')
295 |         return """-- Adding PG User
296 |         DO $$DECLARE r record;
297 |         BEGIN
298 |            IF NOT EXISTS (
299 |               SELECT *
300 |               FROM   pg_catalog.pg_user
301 |               WHERE  usename = '%s') THEN
302 | 
303 |               CREATE USER %s WITH PASSWORD U&'%s';
304 |            ELSE
305 |               ALTER USER %s WITH PASSWORD U&'%s';
306 |            END IF;
307 |         END$$;
308 |         ALTER DATABASE %s OWNER TO %s;
309 |         GRANT CONNECT ON DATABASE %s TO %s;
310 | 
311 |         GRANT %s TO %s;
312 | 
313 |         GRANT USAGE ON SCHEMA %s TO %s;
314 |         GRANT ALL ON ALL SEQUENCES IN SCHEMA %s TO %s;
315 |         GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA %s TO %s;
316 |         GRANT USAGE ON SCHEMA %s TO %s;
317 |         GRANT ALL ON ALL SEQUENCES IN SCHEMA %s TO %s;
318 |         GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA %s TO %s;
319 | 
320 |         ALTER DATABASE %s SET search_path TO %s;
321 |         ALTER USER %s SET search_path TO %s;
322 |         ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON TABLES TO %s;
323 |         ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT ALL ON SEQUENCES TO %s;
324 |         ALTER DEFAULT PRIVILEGES IN SCHEMA v1 GRANT ALL ON TABLES TO %s;
325 |         ALTER DEFAULT PRIVILEGES IN SCHEMA v1 GRANT ALL ON SEQUENCES TO %s;
326 |         """ % (b7_user, b7_user, b7_pass, b7_user, b7_pass,
327 |                db_name, b7_user,
328 |                db_name, b7_user,
329 | 
330 |                'b7group_user', b7_user,
331 | 
332 |                'v1', b7_user,
333 |                'v1', b7_user,
334 |                'v1', b7_user,
335 |                'public', b7_user,
336 |                'public', b7_user,
337 |                'public', b7_user,
338 | 
339 |                db_name, ', '.join(('v1', 'public')),
340 |                b7_user, ', '.join(('v1', 'public')),
341 |                b7_user,
342 |                b7_user,
343 |                b7_user,
344 |                b7_user,
345 |             )
346 | 
347 | 
348 |     def _get_sql_sequence(self, table_attr):
349 |         """
350 |         Return psql statement to set SEQUENCE value for specific table
351 |         """
352 |         template = 'SELECT setval(\'%s_%s_seq\', %d, false);\n'
353 |         return "\n".join([template % (
354 |                     table_attr['name'], col_attrs['name'], table_attr['autoIncrement']
355 |                 ) for col_name, col_attrs in table_attr['columns'].items() if col_attrs['isPk'] and table_attr['autoIncrement'] ])
356 | 
357 |     @staticmethod
358 |     def _get_dump_initial_statements():
359 |         return """-- Converted by db_converter
360 |         SET standard_conforming_strings=on;
361 |         SET escape_string_warning=on;
362 |         SET client_min_messages TO ERROR;
363 |         SET client_encoding = 'UTF8';
364 |         SET NAMES 'UTF8';
365 |         SET CONSTRAINTS ALL DEFERRED;
366 |         """
367 | 
368 |     @staticmethod
369 |     def _get_dump_final_statements():
370 |         return """
371 |         SET CONSTRAINTS ALL IMMEDIATE;
372 |         """
373 | 
374 |     @staticmethod
375 |     def _supaFilta( v):
376 |         if v is None: return 'NULL'
377 |         if v is True: return 'true'
378 |         if v is False: return 'false'
379 | 
380 |         # if isinstance(v, unicode): v = v.encode('utf8')
381 |         if isinstance(v, str): return "'" + v.replace("'", "''") + "'"
382 | 
383 |         return v
384 | 
385 |     def _get_sql_fkeys(self, table_attr):
386 |         """
387 |         Generate create database statement
388 |         :param table_attr: table attrs
389 |         :return: SQL statement for creating
390 |         """
391 |         default_on_def = 'RESTRICT DEFERRABLE INITIALLY IMMEDIATE'
392 |         fkey_template = 'ALTER TABLE "%s" ADD CONSTRAINT %s_%s_fkey FOREIGN KEY (%s) REFERENCES %s ON DELETE %s;'
393 |         # index_template = 'CREATE INDEX %s_%s_idx ON %s (%s);'
394 |         fkeys = ''
395 | 
396 |         for col_name, col_attrs in table_attr['columns'].items():
397 |             if col_attrs['reference']:
398 |                 fkeys += '\n' + fkey_template % \
399 |                 (table_attr['name'], table_attr['name'], col_attrs['name'], '"' + col_attrs['name'] + '"',
400 |                  col_attrs['reference'], col_attrs['on_delete'] if 'on_delete' in col_attrs else default_on_def)
401 |                 # fkeys += '\n' + index_template % (table_attr['name'], col_attrs['name'], table_attr['name'], col_attrs['name'])
402 | 
403 |         return fkeys
404 | 
405 |     def _get_sql_indexes(self, table_attr):
406 |         """
407 |         Generate indexes database statement
408 |         :param table_attr: table attrs
409 |         :return: SQL statement for creating
410 |         """
411 |         index_template = 'CREATE INDEX %s_%s_x ON %s ("%s");\n'
412 |         indexes = '\n';
413 | 
414 |         for index_name, index_attrs in table_attr['indexes'].items():
415 |             columns = list()
416 |             for index_column_name in index_attrs['columns']:
417 |                 columns.append(table_attr['columns'][index_column_name]['name'])
418 |             indexes += index_template % (table_attr['name'], index_attrs['name'], 'public."' + table_attr['name'] + '"', '" ,"'.join(columns))
419 | 
420 |         return indexes
421 | 
422 |     def _get_sql_create_table(self, table_attr):
423 |         """
424 |         Generate create database statement
425 |         :param table_attr: table attrs
426 |         :return: SQL statement for creating
427 |         """
428 |         template = 'CREATE TABLE IF NOT EXISTS "%s" (\n %s );'
429 |         columns_pri, columns_ref, columns, columns_ignore = \
430 |             PsqlParser._get_categorized_columns(table_attr['columns'])
431 |         v2_columns = []
432 |         for columnName, columnAttr in merge_dicts(columns_pri, columns_ref, columns).items():
433 |             v2_columns.append(PsqlParser._get_sql_column(columnAttr))
434 |         return template % (table_attr['name'], ", \n ".join(v2_columns))
435 | 
436 |     @staticmethod
437 |     def _get_categorized_columns(tableColumns):
438 |         """
439 |         Generate return table columns by category
440 |         :return: PK, ref_cols, remaining, skipped
441 |         """
442 |         columns = {}
443 |         columns_ref = {}
444 |         columns_pri = {}
445 |         columns_ignore = {}
446 |         first_pk_col = None
447 | 
448 |         for col_name, col_attrs in tableColumns.items():
449 |             if RuleHandler.STR_SKIP in col_attrs:
450 |                 columns_ignore[col_name] = col_attrs
451 |             elif col_attrs['isPk']:
452 |                 if first_pk_col is None:
453 |                     first_pk_col = col_name
454 |                     columns_pri[first_pk_col] = col_attrs
455 |                 else:
456 |                     col_attrs['isPk'] = False
457 |                     columns[col_name] = col_attrs
458 | 
459 |                     if first_pk_col in columns_pri:
460 |                         columns_pri[first_pk_col]['isPk'] = False
461 |                         columns[first_pk_col] = columns_pri[first_pk_col]
462 |                         del columns_pri[first_pk_col]
463 | 
464 |                     if 'pkC' not in columns_pri:
465 |                         columns_pri['pkC'] = copy.copy(col_attrs)
466 |                         columns_pri['pkC']['isPkC'] = list([first_pk_col])
467 | 
468 |                     columns_pri['pkC']['isPkC'].append(col_name)
469 |             elif col_attrs['reference']:
470 |                 columns_ref[col_name] = col_attrs
471 |             else:
472 |                 columns[col_name] = col_attrs
473 | 
474 |         return columns_pri, columns_ref, columns, columns_ignore
475 | 
476 |     @staticmethod
477 |     def _get_sql_column(column_attr):
478 |         """
479 |         Generate table columns statements
480 |         :param column_attr: col attrs
481 |         :return: SQL statement adding columns
482 |         """
483 |         col_def_sql = ' "%s"' % column_attr['name']
484 |         if column_attr['extra'] == 'auto_increment':
485 |             col_def_sql += ' SERIAL'
486 |         else:
487 |             col_def_sql += ' %s' % column_attr['type'].upper()
488 | 
489 |         if column_attr['size'] and column_attr['type'] not in ['text', 'bytea', 'smallint', 'decimal', 'set']:
490 |             col_def_sql += '(' + str(column_attr['size']) + ')'
491 |         if not column_attr['nullable']:
492 |             col_def_sql += ' NOT NULL'
493 |         if column_attr['isPk']:
494 |             col_def_sql += ' PRIMARY KEY'
495 |         if column_attr['default'] is not None:
496 |             if column_attr['default'].replace(".", "", 1).isdigit():
497 |                 if column_attr['type'] == 'boolean':
498 |                     col_def_sql += ' DEFAULT ' + ('true' if column_attr['default'] != '0' else 'false')
499 |                 else:
500 |                     col_def_sql += ' DEFAULT ' + column_attr['default']
501 |             elif column_attr['default'] == 'current_timestamp':
502 |                 col_def_sql += ' DEFAULT ' + column_attr['default']
503 |             elif column_attr['default'].lower() == "true" or column_attr['default'].lower() == "false":
504 |                 col_def_sql += ' DEFAULT ' + column_attr['default'].upper()
505 |             else:
506 |                 col_def_sql += " DEFAULT U&'%s'" % column_attr['default']
507 | 
508 |         if 'isPkC' in column_attr:
509 |             col_def_sql = 'PRIMARY KEY (' + ','.join(column_attr['isPkC']) + ')'
510 | 
511 |         return col_def_sql
512 | 
513 |     @staticmethod
514 |     # NOT IN USE
515 |     def _psql_escape(value, value_type):
516 |         if value is None:
517 |             return 'null'
518 |         if value_type in ['int', 'decimal']:
519 |             return value if not isinstance(value, Decimal) else float(value)
520 |         if value_type in ['boolean']:
521 |             return 'false' if value == '0' else 'true'
522 |         if value_type.startswith('timestamp') or value_type == 'date':
523 |             return str(value)
524 |         # return str(value.encode('utf8').replace('\'', '\'\'').replace('\\', '\\\\'))
525 |         # return psycopg2._param_escape(value.encode('utf8'))
526 |         return '$$'+str(value.encode('utf8'))+'$$'
527 | 
528 |     @staticmethod
529 |     def sql_copy_format(row_data):
530 |         row_data = re.sub(r"::(\w*)", "", row_data)
531 |         row_data = row_data.replace("', E'", "', '")
532 |         return row_data
533 | 


--------------------------------------------------------------------------------