├── README.md
└── parse_sqlite_sql.py


/README.md:
--------------------------------------------------------------------------------
 1 | # sqlite\_sql\_parser
 2 | 
 3 | 
 4 | ## Introduction
 5 | This script parses the SQL files exported form `sqlite3 .dump`, and make it compatible for MySQL import.
 6 | 
 7 | 
 8 | ## Basic Usage
 9 | 
10 |     sqlite3 <database_file> .dump   >   dump.sql
11 |     python parse_sqlite_sql.py dump.sql
12 | 
13 | Two files would be generated: `dump.sql.schema.sql` and `dump.sql.data.sql`
14 | 
15 | One is for DB schema, and the other is for DB data, both are updated for MySQL import purpose.
16 | 
17 | After final manual modification, one could use the following commands to import the database:
18 | 
19 |     mysql -u <user_name> -p <database_name>  --default-character-set=utf8 < dump.sql.schema.sql
20 |     mysql -u <user_name> -p <database_name>  --default-character-set=utf8 < dump.sql.data.sql
21 | 
22 | 
23 | ## Further Notes
24 | 
25 | It's strongly advised that one should further modify the DB schema for his own purpose, especially:
26 | 
27 | 1. Replace some `text` field with `varchar(255)`, for better performance 
28 | 2. Replace some `integer` with  `bigint`
29 | 3. add quote for tables named by reserved keywords
30 | 
31 | One should also note that this script would replace _all_ values of `t` with `1`, and _all_ values of `f` with `0`, in order to adapt to boolean field change. If you really need a `t` there, you might change back manually.
32 | 
33 | 
34 | 
35 | ## Advantages
36 | Unlike most other line based parsers, this parser treat literal strings and non-literal strings _separately_. So even if you table data contains some special statements like `CREATE TABLE`, `INSERT VALUE` or 'AUTOINCREMENT`, they would _not_ be updated. 
37 | 
38 | ## Disadvantages
39 | It's very slow. Took about 2 seconds to parse a SQL file of 100,000 lines.
40 | 
41 | For other Perl or Python based scripts, it could be done in less than 0.1 second.
42 | 
43 | ## Hacking Points
44 | The following methods are likely to be modified for futher customization:
45 | 
46 |     process_literal
47 |     process_schema
48 | 
49 | 
50 | 


--------------------------------------------------------------------------------
/parse_sqlite_sql.py:
--------------------------------------------------------------------------------
  1 | #The MIT License (MIT)
  2 | #
  3 | #Copyright (c) 2015 Motherapp Limited
  4 | #
  5 | #Permission is hereby granted, free of charge, to any person obtaining a copy
  6 | #of this software and associated documentation files (the "Software"), to deal
  7 | #in the Software without restriction, including without limitation the rights
  8 | #to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  9 | #copies of the Software, and to permit persons to whom the Software is
 10 | #furnished to do so, subject to the following conditions:
 11 | #
 12 | #The above copyright notice and this permission notice shall be included in
 13 | #all copies or substantial portions of the Software.
 14 | #
 15 | #THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | #IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 17 | #FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 18 | #AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 19 | #LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 20 | #OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 21 | #THE SOFTWARE.
 22 | 
 23 | 
 24 | #author: walty@motherapp.com
 25 | #description: parse the sql generated from sqlite3, so it could be imported to mysql
 26 | #document: https://github.com/motherapp/sqlite_sql_parser/blob/master/README.md
 27 | 
 28 | import sys
 29 | import re
 30 | import datetime
 31 | 
 32 | class SQLParser():
 33 |     def __init__(self, input_file):
 34 |         self.buffer_string = ""
 35 |         self.fin = open(input_file)
 36 |         self.schema_file = input_file + ".schema.sql"
 37 |         self.data_file = input_file + ".data.sql"
 38 | 
 39 |         self.fw_schema = open(self.schema_file, "w", buffering=0)
 40 |         self.fw_data = open(self.data_file, "w")
 41 | 
 42 |         self.previous_string_quote = ""
 43 |         self.buffer_string = ""
 44 | 
 45 |         self.literal_string = ""
 46 | 
 47 |         self.current_char = ""
 48 |         self.prev_char = ""
 49 |         self.next_char = ""
 50 | 
 51 |         self.current_quote = ""
 52 |         self.current_line = ""
 53 | 
 54 |         self.curent_create_table_statement_bracket_count = 0
 55 | 
 56 |         #better set the encoding in the database first
 57 |         #self.fw_data.write("SET NAMES 'utf8' COLLATE 'utf8_general_ci';\n")
 58 | 
 59 |         return
 60 | 
 61 | 
 62 |     def flush_buffer(self, skip_last_char=False, write_to_file=False):
 63 |         if skip_last_char:
 64 |             final_buffer = self.buffer_string[:-1]
 65 |             self.buffer_string = self.buffer_string[-1] #clean all except last char
 66 |         else:
 67 |             final_buffer = self.buffer_string
 68 |             self.buffer_string = "" #clean all
 69 |         
 70 | 
 71 |         if self.is_in_quote():
 72 |             final_buffer = self.process_literal(final_buffer)  #do misc final processing
 73 | 
 74 |         self.current_line += final_buffer
 75 | 
 76 |         if write_to_file:
 77 |             if self.current_line.startswith("INSERT INTO"):
 78 |                 self.fw_data.write(self.current_line)
 79 |                 if not self.current_line.strip().endswith(";"):
 80 |                     self.fw_data.write(";\n")
 81 |             else:
 82 |                 self.current_line = self.process_schema(self.current_line)
 83 |                 self.fw_schema.write(self.current_line)
 84 | 
 85 |             self.current_line = ""
 86 | 
 87 |         return
 88 | 
 89 |     def add_buffer(self, c):
 90 |         self.buffer_string += c
 91 | 
 92 | 
 93 |     def read_next_char(self):
 94 |         self.prev_char = self.current_char
 95 |         self.current_char = self.next_char
 96 |         self.next_char = self.fin.read(1)
 97 | 
 98 |         if self.current_char:
 99 |             self.add_buffer(self.current_char)
100 |         elif self.next_char:#for the first char of the file
101 |             self.read_next_char()
102 | 
103 |         return self.current_char
104 | 
105 |     def set_current_quote(self, c):
106 |         self.current_quote = c
107 | 
108 |     def clean_current_quote(self):
109 |         self.current_quote = ""
110 | 
111 | 
112 |     def is_in_quote(self):
113 |         return self.current_quote != ""
114 | 
115 | 
116 | 
117 |     def is_skip_line(self, value):  #no need to copy this line
118 |         return value.startswith("BEGIN TRANSACTION") or value.startswith("COMMIT") or \
119 |                 value.startswith("sqlite_sequence") or value.startswith("CREATE UNIQUE INDEX") or \
120 |                 value.startswith("PRAGMA")
121 | 
122 | 
123 | 
124 |     def is_in_create_table(self):
125 |         bracket_count = 0
126 |         if self.buffer_string.strip().startswith("CREATE TABLE"):
127 |             bracket_count += self.buffer_string.count("(")
128 |             bracket_count -= self.buffer_string.count(")")
129 | 
130 |         #print "@130", self.buffer_string, bracket_count
131 | 
132 | 
133 |         return bracket_count > 0
134 | 
135 |     def start(self):
136 |         line_number = 1;
137 |         start_time = datetime.datetime.now()
138 | 
139 | 
140 | 
141 |         while True:
142 |             c = self.read_next_char()
143 | 
144 |             if not c:
145 |                 print "End of file"
146 |                 break
147 | 
148 | 
149 |             if (c == "'" or c == "\""):
150 |                 #if self.prev_char == "\\" and self.next_char != c: #it's just an escaped single quote
151 |                 #    continue
152 | 
153 |                 if not self.is_in_quote():
154 |                     self.flush_buffer(skip_last_char = True)
155 |                     self.set_current_quote(c)
156 | 
157 |                 elif self.current_quote == c:    #end of string
158 |                     if self.next_char == c: #double single quote, or double double quote
159 |                         self.read_next_char()   #discard the paired one
160 |                         continue
161 |                     else:
162 |                         self.flush_buffer()
163 |                         self.clean_current_quote()
164 | 
165 | 
166 |             if (c == "\n" or c == "\r"):
167 |                 #flush teh buffer
168 |                 line_number += 1
169 | 
170 |                 if line_number % 10000 == 0:
171 |                     print "Processing line: ", line_number, "elpased: ", datetime.datetime.now() - start_time, "seconds"
172 | 
173 | 
174 |                 if not self.is_in_quote() and not self.is_in_create_table():
175 |                     self.flush_buffer(write_to_file = True)
176 | 
177 |                     #print "@119, current line", self.current_line
178 | 
179 |         #flush the last buffer
180 |         self.flush_buffer(write_to_file = True)
181 | 
182 | 
183 |         return
184 | 
185 | 
186 |     #HAKCING POINT, process literal strings
187 |     def process_literal(self, value):   
188 |         #print "@75: processing literal", value
189 | 
190 |         if value == 't':
191 |             return 1
192 | 
193 |         if value == 'f':
194 |             return 0
195 | 
196 |         if self.current_line.endswith("INSERT INTO "):
197 |             return value.strip("\"")    #mysql has no quote for insert into table name
198 | 
199 | 
200 |         value = value.replace("\\", "\\\\")
201 | 
202 |         return value
203 | 
204 | 
205 |     #HACKING POINT, process schema
206 |     def process_schema(self, value):
207 |       
208 | 
209 |         if self.is_skip_line(value):
210 |             return ""
211 | 
212 |         new_value = value
213 | 
214 | 
215 |         #http://stackoverflow.com/questions/18671/quick-easy-way-to-migrate-sqlite3-to-mysql
216 |         new_lines = []
217 |         for line in new_value.split("\n"):
218 |             searching_for_end = False
219 | 
220 |             # this line was necessary because ''); was getting
221 |             # converted (inappropriately) to \');
222 |             if re.match(r".*, ''\);", line):
223 |                 line = re.sub(r"''\);", r'``);', line)
224 | 
225 |             if re.match(r'^CREATE TABLE.*', line):
226 |                 searching_for_end = True
227 | 
228 |             m = re.search('CREATE TABLE "?([a-z_]*)"?(.*)', line)
229 |             if m:
230 |                 name, sub = m.groups()
231 |                 line = "DROP TABLE IF EXISTS `%(name)s` ;\nCREATE TABLE IF NOT EXISTS `%(name)s`%(sub)s\n"
232 |                 line = line % dict(name=name, sub=sub)
233 | 
234 | 
235 |             # Add auto_increment if it's not there since sqlite auto_increments ALL
236 |             # primary keys
237 |             if searching_for_end:
238 |                 if re.search(r"integer(?:\s+\w+)*\s*PRIMARY KEY(?:\s+\w+)*\s*,", line):
239 |                     line = line.replace("PRIMARY KEY", "PRIMARY KEY AUTO_INCREMENT")
240 |                 # replace " and ' with ` because mysql doesn't like quotes in CREATE commands
241 |                 if line.find('DEFAULT') == -1:
242 |                     line = line.replace('"', '`').replace("'", '`')
243 |                 else:
244 |                     parts = line.split('DEFAULT')
245 |                     parts[0].replace('"', '`').replace("'", '`')
246 |                     line = 'DEFAULT'.join(parts)
247 | 
248 |             # And now we convert it back (see above)
249 |             if re.match(r".*, ``\);", line):
250 |                 line = re.sub(r'``\);', r"'');", line)
251 | 
252 |             if searching_for_end and re.match(r'.*\);', line):
253 |                 searching_for_end = False
254 | 
255 |             if re.match(r"CREATE INDEX", line):
256 |                 line = re.sub('"', '`', line)
257 | 
258 | 
259 |             new_lines.append(line)
260 | 
261 | 
262 |         new_value = "\n".join(new_lines)
263 | 
264 | #        print "@239, after processing: ", value
265 | 
266 |         return new_value
267 | 
268 | 
269 | 
270 | 
271 | 
272 | 
273 | 
274 | def main():
275 |     if __name__ == "__main__":
276 |         if len(sys.argv) != 2:
277 |             print "Usage: python " + sys.argv[0] + " input_file\n"
278 |             return -1
279 | 
280 |         input_file = sys.argv[1]
281 | 
282 |         parser = SQLParser(input_file)
283 | 
284 |         parser.start()
285 | 
286 |         print "Done."
287 |         print "Schema and data files are generated."
288 | 
289 | main()
290 | 


--------------------------------------------------------------------------------