├── README.md ├── .gitignore ├── LICENSE └── mysql_dump_to_csv.py /README.md: -------------------------------------------------------------------------------- 1 | # mysql-dump-to-csv 2 | Quickly put together script to parse a mysql dump and generate CSVs for all of the tables in the dump. 3 | 4 | Example: 5 | 6 | `python mysql_dump_to_csv.py dump.sql output` 7 | 8 | Should output a CSV file for each table that has data within the dump file. 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Michael Hahn 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /mysql_dump_to_csv.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import csv 3 | import os 4 | import sys 5 | import re 6 | 7 | SCHEMAS = {} 8 | 9 | 10 | def is_create_statement(line): 11 | return line.startswith('CREATE TABLE') 12 | 13 | 14 | def is_field_definition(line): 15 | return line.strip().startswith('`') 16 | 17 | 18 | def is_insert_statement(line): 19 | return line.startswith('INSERT INTO') 20 | 21 | 22 | def get_mysql_name_value(line): 23 | value = None 24 | result = re.search(r'\`([^\`]*)\`', line) 25 | if result: 26 | value = result.groups()[0] 27 | return value 28 | 29 | 30 | def get_value_tuples(line): 31 | values = line.partition(' VALUES ')[-1].strip().replace('NULL', "''") 32 | if values[-1] == ';': 33 | values = values[:-1] 34 | 35 | return ast.literal_eval(values) 36 | 37 | 38 | def write_file(output_directory, table_name, schema, values): 39 | file_name = os.path.join(output_directory, '%s.csv' % (table_name,)) 40 | with open(file_name, 'w') as write_file: 41 | writer = csv.DictWriter(write_file, fieldnames=schema) 42 | writer.writeheader() 43 | for value in values: 44 | writer.writerow(dict(zip(schema, value))) 45 | 46 | 47 | def parse_file(file_name, output_directory): 48 | current_table_name = None 49 | 50 | with open(file_name, 'r') as read_file: 51 | for line in read_file: 52 | if is_create_statement(line): 53 | current_table_name = get_mysql_name_value(line) 54 | SCHEMAS[current_table_name] = [] 55 | elif current_table_name and is_field_definition(line): 56 | field_name = get_mysql_name_value(line) 57 | SCHEMAS[current_table_name].append(field_name) 58 | elif is_insert_statement(line): 59 | current_table_name = get_mysql_name_value(line) 60 | current_schema = SCHEMAS[current_table_name] 61 | values = get_value_tuples(line) 62 | write_file(output_directory, current_table_name, current_schema, values) 63 | 64 | if __name__ == '__main__': 65 | parse_file(sys.argv[1], sys.argv[2]) 66 | --------------------------------------------------------------------------------