├── README.md └── parse_sqlite_sql.py /README.md: -------------------------------------------------------------------------------- 1 | # sqlite\_sql\_parser 2 | 3 | 4 | ## Introduction 5 | This script parses the SQL files exported form `sqlite3 .dump`, and make it compatible for MySQL import. 6 | 7 | 8 | ## Basic Usage 9 | 10 | sqlite3 .dump > dump.sql 11 | python parse_sqlite_sql.py dump.sql 12 | 13 | Two files would be generated: `dump.sql.schema.sql` and `dump.sql.data.sql` 14 | 15 | One is for DB schema, and the other is for DB data, both are updated for MySQL import purpose. 16 | 17 | After final manual modification, one could use the following commands to import the database: 18 | 19 | mysql -u -p --default-character-set=utf8 < dump.sql.schema.sql 20 | mysql -u -p --default-character-set=utf8 < dump.sql.data.sql 21 | 22 | 23 | ## Further Notes 24 | 25 | It's strongly advised that one should further modify the DB schema for his own purpose, especially: 26 | 27 | 1. Replace some `text` field with `varchar(255)`, for better performance 28 | 2. Replace some `integer` with `bigint` 29 | 3. add quote for tables named by reserved keywords 30 | 31 | One should also note that this script would replace _all_ values of `t` with `1`, and _all_ values of `f` with `0`, in order to adapt to boolean field change. If you really need a `t` there, you might change back manually. 32 | 33 | 34 | 35 | ## Advantages 36 | Unlike most other line based parsers, this parser treat literal strings and non-literal strings _separately_. So even if you table data contains some special statements like `CREATE TABLE`, `INSERT VALUE` or 'AUTOINCREMENT`, they would _not_ be updated. 37 | 38 | ## Disadvantages 39 | It's very slow. Took about 2 seconds to parse a SQL file of 100,000 lines. 40 | 41 | For other Perl or Python based scripts, it could be done in less than 0.1 second. 42 | 43 | ## Hacking Points 44 | The following methods are likely to be modified for futher customization: 45 | 46 | process_literal 47 | process_schema 48 | 49 | 50 | -------------------------------------------------------------------------------- /parse_sqlite_sql.py: -------------------------------------------------------------------------------- 1 | #The MIT License (MIT) 2 | # 3 | #Copyright (c) 2015 Motherapp Limited 4 | # 5 | #Permission is hereby granted, free of charge, to any person obtaining a copy 6 | #of this software and associated documentation files (the "Software"), to deal 7 | #in the Software without restriction, including without limitation the rights 8 | #to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | #copies of the Software, and to permit persons to whom the Software is 10 | #furnished to do so, subject to the following conditions: 11 | # 12 | #The above copyright notice and this permission notice shall be included in 13 | #all copies or substantial portions of the Software. 14 | # 15 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | #FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | #AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | #LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | #OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | #THE SOFTWARE. 22 | 23 | 24 | #author: walty@motherapp.com 25 | #description: parse the sql generated from sqlite3, so it could be imported to mysql 26 | #document: https://github.com/motherapp/sqlite_sql_parser/blob/master/README.md 27 | 28 | import sys 29 | import re 30 | import datetime 31 | 32 | class SQLParser(): 33 | def __init__(self, input_file): 34 | self.buffer_string = "" 35 | self.fin = open(input_file) 36 | self.schema_file = input_file + ".schema.sql" 37 | self.data_file = input_file + ".data.sql" 38 | 39 | self.fw_schema = open(self.schema_file, "w", buffering=0) 40 | self.fw_data = open(self.data_file, "w") 41 | 42 | self.previous_string_quote = "" 43 | self.buffer_string = "" 44 | 45 | self.literal_string = "" 46 | 47 | self.current_char = "" 48 | self.prev_char = "" 49 | self.next_char = "" 50 | 51 | self.current_quote = "" 52 | self.current_line = "" 53 | 54 | self.curent_create_table_statement_bracket_count = 0 55 | 56 | #better set the encoding in the database first 57 | #self.fw_data.write("SET NAMES 'utf8' COLLATE 'utf8_general_ci';\n") 58 | 59 | return 60 | 61 | 62 | def flush_buffer(self, skip_last_char=False, write_to_file=False): 63 | if skip_last_char: 64 | final_buffer = self.buffer_string[:-1] 65 | self.buffer_string = self.buffer_string[-1] #clean all except last char 66 | else: 67 | final_buffer = self.buffer_string 68 | self.buffer_string = "" #clean all 69 | 70 | 71 | if self.is_in_quote(): 72 | final_buffer = self.process_literal(final_buffer) #do misc final processing 73 | 74 | self.current_line += final_buffer 75 | 76 | if write_to_file: 77 | if self.current_line.startswith("INSERT INTO"): 78 | self.fw_data.write(self.current_line) 79 | if not self.current_line.strip().endswith(";"): 80 | self.fw_data.write(";\n") 81 | else: 82 | self.current_line = self.process_schema(self.current_line) 83 | self.fw_schema.write(self.current_line) 84 | 85 | self.current_line = "" 86 | 87 | return 88 | 89 | def add_buffer(self, c): 90 | self.buffer_string += c 91 | 92 | 93 | def read_next_char(self): 94 | self.prev_char = self.current_char 95 | self.current_char = self.next_char 96 | self.next_char = self.fin.read(1) 97 | 98 | if self.current_char: 99 | self.add_buffer(self.current_char) 100 | elif self.next_char:#for the first char of the file 101 | self.read_next_char() 102 | 103 | return self.current_char 104 | 105 | def set_current_quote(self, c): 106 | self.current_quote = c 107 | 108 | def clean_current_quote(self): 109 | self.current_quote = "" 110 | 111 | 112 | def is_in_quote(self): 113 | return self.current_quote != "" 114 | 115 | 116 | 117 | def is_skip_line(self, value): #no need to copy this line 118 | return value.startswith("BEGIN TRANSACTION") or value.startswith("COMMIT") or \ 119 | value.startswith("sqlite_sequence") or value.startswith("CREATE UNIQUE INDEX") or \ 120 | value.startswith("PRAGMA") 121 | 122 | 123 | 124 | def is_in_create_table(self): 125 | bracket_count = 0 126 | if self.buffer_string.strip().startswith("CREATE TABLE"): 127 | bracket_count += self.buffer_string.count("(") 128 | bracket_count -= self.buffer_string.count(")") 129 | 130 | #print "@130", self.buffer_string, bracket_count 131 | 132 | 133 | return bracket_count > 0 134 | 135 | def start(self): 136 | line_number = 1; 137 | start_time = datetime.datetime.now() 138 | 139 | 140 | 141 | while True: 142 | c = self.read_next_char() 143 | 144 | if not c: 145 | print "End of file" 146 | break 147 | 148 | 149 | if (c == "'" or c == "\""): 150 | #if self.prev_char == "\\" and self.next_char != c: #it's just an escaped single quote 151 | # continue 152 | 153 | if not self.is_in_quote(): 154 | self.flush_buffer(skip_last_char = True) 155 | self.set_current_quote(c) 156 | 157 | elif self.current_quote == c: #end of string 158 | if self.next_char == c: #double single quote, or double double quote 159 | self.read_next_char() #discard the paired one 160 | continue 161 | else: 162 | self.flush_buffer() 163 | self.clean_current_quote() 164 | 165 | 166 | if (c == "\n" or c == "\r"): 167 | #flush teh buffer 168 | line_number += 1 169 | 170 | if line_number % 10000 == 0: 171 | print "Processing line: ", line_number, "elpased: ", datetime.datetime.now() - start_time, "seconds" 172 | 173 | 174 | if not self.is_in_quote() and not self.is_in_create_table(): 175 | self.flush_buffer(write_to_file = True) 176 | 177 | #print "@119, current line", self.current_line 178 | 179 | #flush the last buffer 180 | self.flush_buffer(write_to_file = True) 181 | 182 | 183 | return 184 | 185 | 186 | #HAKCING POINT, process literal strings 187 | def process_literal(self, value): 188 | #print "@75: processing literal", value 189 | 190 | if value == 't': 191 | return 1 192 | 193 | if value == 'f': 194 | return 0 195 | 196 | if self.current_line.endswith("INSERT INTO "): 197 | return value.strip("\"") #mysql has no quote for insert into table name 198 | 199 | 200 | value = value.replace("\\", "\\\\") 201 | 202 | return value 203 | 204 | 205 | #HACKING POINT, process schema 206 | def process_schema(self, value): 207 | 208 | 209 | if self.is_skip_line(value): 210 | return "" 211 | 212 | new_value = value 213 | 214 | 215 | #http://stackoverflow.com/questions/18671/quick-easy-way-to-migrate-sqlite3-to-mysql 216 | new_lines = [] 217 | for line in new_value.split("\n"): 218 | searching_for_end = False 219 | 220 | # this line was necessary because ''); was getting 221 | # converted (inappropriately) to \'); 222 | if re.match(r".*, ''\);", line): 223 | line = re.sub(r"''\);", r'``);', line) 224 | 225 | if re.match(r'^CREATE TABLE.*', line): 226 | searching_for_end = True 227 | 228 | m = re.search('CREATE TABLE "?([a-z_]*)"?(.*)', line) 229 | if m: 230 | name, sub = m.groups() 231 | line = "DROP TABLE IF EXISTS `%(name)s` ;\nCREATE TABLE IF NOT EXISTS `%(name)s`%(sub)s\n" 232 | line = line % dict(name=name, sub=sub) 233 | 234 | 235 | # Add auto_increment if it's not there since sqlite auto_increments ALL 236 | # primary keys 237 | if searching_for_end: 238 | if re.search(r"integer(?:\s+\w+)*\s*PRIMARY KEY(?:\s+\w+)*\s*,", line): 239 | line = line.replace("PRIMARY KEY", "PRIMARY KEY AUTO_INCREMENT") 240 | # replace " and ' with ` because mysql doesn't like quotes in CREATE commands 241 | if line.find('DEFAULT') == -1: 242 | line = line.replace('"', '`').replace("'", '`') 243 | else: 244 | parts = line.split('DEFAULT') 245 | parts[0].replace('"', '`').replace("'", '`') 246 | line = 'DEFAULT'.join(parts) 247 | 248 | # And now we convert it back (see above) 249 | if re.match(r".*, ``\);", line): 250 | line = re.sub(r'``\);', r"'');", line) 251 | 252 | if searching_for_end and re.match(r'.*\);', line): 253 | searching_for_end = False 254 | 255 | if re.match(r"CREATE INDEX", line): 256 | line = re.sub('"', '`', line) 257 | 258 | 259 | new_lines.append(line) 260 | 261 | 262 | new_value = "\n".join(new_lines) 263 | 264 | # print "@239, after processing: ", value 265 | 266 | return new_value 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | def main(): 275 | if __name__ == "__main__": 276 | if len(sys.argv) != 2: 277 | print "Usage: python " + sys.argv[0] + " input_file\n" 278 | return -1 279 | 280 | input_file = sys.argv[1] 281 | 282 | parser = SQLParser(input_file) 283 | 284 | parser.start() 285 | 286 | print "Done." 287 | print "Schema and data files are generated." 288 | 289 | main() 290 | --------------------------------------------------------------------------------