├── mtgsqlive ├── __init__.py ├── enums │ ├── __init__.py │ └── data_type.py ├── converters │ ├── parents │ │ ├── __init__.py │ │ ├── sqlite_based_converter.py │ │ ├── abstract.py │ │ └── sql_like.py │ ├── __init__.py │ ├── csv.py │ ├── parquet.py │ ├── postgresql.py │ ├── mysql.py │ └── sqlite.py └── __main__.py ├── requirements_test.txt ├── .isort.cfg ├── requirements.txt ├── mypy.ini ├── tox.ini ├── LICENSE ├── README.md ├── .gitignore ├── setup.py └── .pylintrc /mtgsqlive/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /mtgsqlive/enums/__init__.py: -------------------------------------------------------------------------------- 1 | from .data_type import MtgjsonDataType 2 | -------------------------------------------------------------------------------- /requirements_test.txt: -------------------------------------------------------------------------------- 1 | black==23.11.0 2 | isort==5.12.0 3 | mypy==1.7.1 4 | pylint==3.0.2 5 | tox==4.11.4 6 | 7 | types-PyMySQL==1.1.0.1 -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [settings] 2 | multi_line_output = 3 3 | include_trailing_comma = True 4 | force_grid_wrap = 0 5 | use_parentheses = True 6 | line_length = 88 -------------------------------------------------------------------------------- /mtgsqlive/enums/data_type.py: -------------------------------------------------------------------------------- 1 | import enum 2 | 3 | 4 | class MtgjsonDataType(enum.Enum): 5 | MTGJSON_CARDS = "AllPrintings" 6 | MTGJSON_CARD_PRICES = "AllPricesToday" 7 | -------------------------------------------------------------------------------- /mtgsqlive/converters/parents/__init__.py: -------------------------------------------------------------------------------- 1 | from .abstract import AbstractConverter 2 | from .sql_like import SqlLikeConverter 3 | from .sqlite_based_converter import SqliteBasedConverter 4 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | PyMySQL==1.1.0 2 | argparse==1.4.0 3 | mysql-connector-python==8.2.0 4 | pandas==2.1.3 5 | pyarrow==14.0.1 6 | requests==2.31.0 7 | setuptools==69.0.2 8 | SQLAlchemy==2.0.23 9 | -------------------------------------------------------------------------------- /mtgsqlive/converters/__init__.py: -------------------------------------------------------------------------------- 1 | from .csv import CsvConverter 2 | from .mysql import MysqlConverter 3 | from .parquet import ParquetConverter 4 | from .postgresql import PostgresqlConverter 5 | from .sqlite import SqliteConverter 6 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | python_version = 3.11 3 | 4 | check_untyped_defs = True 5 | disallow_untyped_calls = True 6 | disallow_untyped_defs = True 7 | disallow_subclassing_any = True 8 | follow_imports = normal 9 | incremental = True 10 | ignore_missing_imports = True 11 | strict_optional = True 12 | warn_no_return = True 13 | warn_redundant_casts = True 14 | warn_return_any = True 15 | warn_unused_ignores = True 16 | disable_error_code = union-attr 17 | 18 | [mypy-pkg/generated_code/*] 19 | ignore_errors = True 20 | -------------------------------------------------------------------------------- /mtgsqlive/converters/csv.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict 2 | 3 | from ..enums import MtgjsonDataType 4 | from .parents import SqliteBasedConverter 5 | 6 | 7 | class CsvConverter(SqliteBasedConverter): 8 | def __init__( 9 | self, mtgjson_data: Dict[str, Any], output_dir: str, data_type: MtgjsonDataType 10 | ) -> None: 11 | super().__init__(mtgjson_data, output_dir, data_type) 12 | self.output_obj.root_dir.joinpath("csv").mkdir(parents=True, exist_ok=True) 13 | 14 | def convert(self) -> None: 15 | for table_name in self.get_table_names(): 16 | pd_table = self.get_table_dataframe(table_name) 17 | 18 | pd_table.to_csv( 19 | str( 20 | self.output_obj.root_dir.joinpath("csv").joinpath( 21 | f"{table_name}.csv" 22 | ) 23 | ), 24 | encoding="utf-8", 25 | index=False, 26 | ) 27 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = isort-inplace, black-inplace, mypy, lint 3 | 4 | [testenv] 5 | setenv = PYTHONPATH = {toxinidir} 6 | deps = 7 | -r {toxinidir}/requirements.txt 8 | -r {toxinidir}/requirements_test.txt 9 | 10 | [testenv:isort-inplace] 11 | description = Sort imports 12 | commands = isort --profile black mtgsqlive/ 13 | 14 | [testenv:isort-check] 15 | description = dry-run isort to see if imports need resorting 16 | commands = isort --profile black --check-only mtgsqlive/ 17 | 18 | [testenv:black-inplace] 19 | description = Run black and edit all files in place 20 | commands = black mtgsqlive/ 21 | 22 | [testenv:black-check] 23 | description = Run black and edit all files in place 24 | commands = black --check mtgsqlive/ 25 | 26 | [testenv:mypy] 27 | description = mypy static type checking only 28 | commands = mypy {posargs:mtgsqlive/} 29 | 30 | [testenv:lint] 31 | description = Run linting tools 32 | commands = pylint mtgsqlive/ --rcfile=.pylintrc 33 | 34 | ;[testenv:unit] 35 | ;description = Run unit tests with coverage and mypy type checking 36 | ;commands = pytest tests/ 37 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright © 2018-Present Zachary Halpern 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /mtgsqlive/converters/parquet.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Dict 2 | 3 | import pyarrow 4 | import pyarrow.parquet 5 | 6 | from ..enums import MtgjsonDataType 7 | from .parents import SqliteBasedConverter 8 | 9 | 10 | class ParquetConverter(SqliteBasedConverter): 11 | def __init__( 12 | self, mtgjson_data: Dict[str, Any], output_dir: str, data_type: MtgjsonDataType 13 | ) -> None: 14 | super().__init__(mtgjson_data, output_dir, data_type) 15 | self.output_obj.root_dir.joinpath("parquet").mkdir(parents=True, exist_ok=True) 16 | 17 | def convert(self) -> None: 18 | for table_name in self.get_table_names(): 19 | pd_table = self.get_table_dataframe(table_name) 20 | 21 | parquet_table = pyarrow.Table.from_pandas( 22 | pd_table, preserve_index=False 23 | ).replace_schema_metadata(self.get_metadata()) 24 | 25 | pyarrow.parquet.write_table( 26 | parquet_table, 27 | str( 28 | self.output_obj.root_dir.joinpath("parquet").joinpath( 29 | f"{table_name}.parquet" 30 | ) 31 | ), 32 | ) 33 | -------------------------------------------------------------------------------- /mtgsqlive/converters/parents/sqlite_based_converter.py: -------------------------------------------------------------------------------- 1 | import abc 2 | from typing import Any, Dict, List 3 | 4 | import pandas as pd 5 | import sqlalchemy 6 | 7 | from ...enums import MtgjsonDataType 8 | from .abstract import AbstractConverter 9 | 10 | 11 | class SqliteBasedConverter(AbstractConverter, abc.ABC): 12 | sqlite_engine: sqlalchemy.Engine 13 | 14 | def __init__( 15 | self, mtgjson_data: Dict[str, Any], output_dir: str, data_type: MtgjsonDataType 16 | ) -> None: 17 | super().__init__(mtgjson_data, output_dir, data_type) 18 | 19 | db_path = self.output_obj.root_dir.joinpath(f"{data_type.value}.sqlite") 20 | if not db_path.exists(): 21 | raise FileNotFoundError() 22 | 23 | self.sqlite_engine = sqlalchemy.create_engine(f"sqlite:///{db_path}") 24 | 25 | def get_table_names(self) -> List[str]: 26 | with self.sqlite_engine.connect() as connection: 27 | result = connection.execute(sqlalchemy.text("""SELECT name FROM sqlite_master WHERE type = 'table';""")) 28 | table_names = [r.name for r in result] 29 | return table_names 30 | 31 | def get_table_dataframe(self, table_name: str) -> pd.DataFrame: 32 | return pd.read_sql_table(table_name, self.sqlite_engine) 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MTGSQLive 2 | A project to ETL MTGJSON data into multiple other consumer formats 3 | 4 | # Connect With Us 5 | Discord via [![Discord](https://img.shields.io/discord/224178957103136779.svg)](https://discord.gg/74GUQDE) 6 | 7 | # About Us 8 | 9 | MTGJSON and MTGSQLive are open sourced database creation and distribution tool for [*Magic: The Gathering*](https://magic.wizards.com/) cards. 10 | 11 | You can find our documentation with all properties [here](https://mtgjson.com/data-models/). 12 | 13 | To provide feedback or to report a bug, please [open a ticket](https://github.com/mtgjson/mtgsqlite/issues/new/). 14 | 15 | If you would like to join or assist the development of the project, you can [join us on Discord](https://mtgjson.com/discord) to discuss things further. 16 | 17 | # Usage 18 | ```bash 19 | $ pip install -r requirements.txt 20 | 21 | $ python3 -m mtgsqlive [--args] 22 | 23 | options: 24 | -h, --help show this help message and exit 25 | -i INPUT_FILE, --input-file INPUT_FILE 26 | Path to MTGJSON AllPrintings.json 27 | -o OUTPUT_DIR, --output-dir OUTPUT_DIR 28 | Where to place translated files 29 | 30 | Converters: 31 | --all Run all ETL operations 32 | --csv Compile CSV AllPrinting files 33 | --mysql Compile AllPrintings.sql 34 | --parquet Compile Parquet AllPrinting files 35 | --postgresql Compile AllPrintings.psql 36 | --sqlite Compile AllPrintings.sqlite 37 | ``` -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | *.ipynb 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # dotenv 80 | .env 81 | 82 | # virtualenv 83 | venv/ 84 | ENV/ 85 | 86 | # Spyder project settings 87 | .spyderproject 88 | 89 | # Rope project settings 90 | .ropeproject 91 | .idea/ 92 | 93 | # Mac Files 94 | .DS_Store 95 | 96 | # No VIM saves 97 | *~ 98 | 99 | # Json file 100 | *.json 101 | 102 | # data base 103 | *.db* 104 | *.sqlite* 105 | *.csv* 106 | *.sql* -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import setuptools 3 | 4 | project_root: pathlib.Path = pathlib.Path(__file__).resolve().parent 5 | 6 | setuptools.setup( 7 | name="MTGSQLite", 8 | version="0.3", 9 | author="Zach Halpern", 10 | author_email="zach@mtgjson.com", 11 | url="https://github.com/mtgjson/mtgsqlive/", 12 | description="Convert MTGJSON files into alternative formats", 13 | long_description=project_root.joinpath("README.md").open(encoding="utf-8").read(), 14 | long_description_content_type="text/markdown", 15 | license="MIT", 16 | classifiers=[ 17 | "Intended Audience :: Developers", 18 | "Intended Audience :: Education", 19 | "Intended Audience :: Science/Research", 20 | "License :: OSI Approved :: MIT License", 21 | "Natural Language :: English", 22 | "Operating System :: MacOS :: MacOS X", 23 | "Operating System :: Microsoft :: Windows :: Windows 10", 24 | "Operating System :: Microsoft :: Windows :: Windows 11", 25 | "Operating System :: Unix", 26 | "Programming Language :: Python :: 3 :: Only", 27 | "Programming Language :: Python :: 3.7", 28 | "Programming Language :: Python :: 3.8", 29 | "Programming Language :: Python :: 3.9", 30 | "Programming Language :: Python :: 3.10", 31 | "Programming Language :: Python :: 3.11", 32 | "Programming Language :: Python :: 3.12", 33 | "Programming Language :: Python", 34 | "Topic :: Database", 35 | "Topic :: Software Development :: Version Control :: Git", 36 | ], 37 | keywords="Magic: The Gathering, MTG, JSON, Card Games, Collectible, Trading Cards", 38 | include_package_data=True, 39 | packages=setuptools.find_packages(), 40 | install_requires=project_root.joinpath("requirements.txt") 41 | .open(encoding="utf-8") 42 | .readlines() 43 | if project_root.joinpath("requirements.txt").is_file() 44 | else [], # Use the requirements file, if able 45 | ) 46 | -------------------------------------------------------------------------------- /.pylintrc: -------------------------------------------------------------------------------- 1 | [MASTER] 2 | # Pickle collected data for later comparisons. 3 | persistent=yes 4 | 5 | [MESSAGES CONTROL] 6 | # Disable the message, report, category or checker with the given id(s). You 7 | # can either give multiple identifiers separated by comma (,) or put this 8 | # option multiple times (only on the command line, not in the configuration 9 | # file where it should appear only once).You can also use "--disable=all" to 10 | # disable everything first and then reenable specific checks. For example, if 11 | # you want to run only the similarities checker, you can use "--disable=all 12 | # --enable=similarities". If you want to run only the classes checker, but have 13 | # no Warning level messages displayed, use"--disable=all --enable=classes 14 | # --disable=W" 15 | disable= 16 | broad-except, 17 | duplicate-code, 18 | import-outside-toplevel, 19 | line-too-long, 20 | logging-format-interpolation, 21 | logging-fstring-interpolation, 22 | too-few-public-methods, 23 | too-many-arguments, 24 | too-many-branches, 25 | too-many-instance-attributes, 26 | too-many-lines, 27 | too-many-locals, 28 | too-many-nested-blocks, 29 | too-many-return-statements, 30 | too-many-statements, 31 | unsubscriptable-object, 32 | wrong-import-order, 33 | wrong-import-position, 34 | missing-function-docstring, 35 | missing-class-docstring, 36 | missing-module-docstring 37 | 38 | [REPORTS] 39 | # Set the output format. Available formats are text, parseable, colorized, json 40 | # and msvs (visual studio).You can also give a reporter class, eg 41 | # mypackage.mymodule.MyReporterClass. 42 | output-format=colorized 43 | 44 | [BASIC] 45 | # Good variable names which should always be accepted, separated by a comma. 46 | good-names= 47 | f, 48 | i, 49 | j, 50 | k, 51 | _, 52 | q, 53 | fp 54 | 55 | # Regular expression which should only match function or class names that do 56 | # not require a docstring. 57 | no-docstring-rgx=__.*__|test_.* 58 | 59 | [MISCELLANEOUS] 60 | # List of note tags to take in consideration, separated by a comma. 61 | notes= 62 | FIXME, 63 | TODO 64 | 65 | 66 | [VARIABLES] 67 | # A regular expression matching the name of dummy variables (i.e. expectedly 68 | # not used). 69 | dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$) 70 | -------------------------------------------------------------------------------- /mtgsqlive/converters/postgresql.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from typing import Any, Dict, Iterator 3 | 4 | import pymysql 5 | 6 | from ..enums import MtgjsonDataType 7 | from .parents import SqlLikeConverter 8 | 9 | 10 | class PostgresqlConverter(SqlLikeConverter): 11 | def __init__( 12 | self, mtgjson_data: Dict[str, Any], output_dir: str, data_type: MtgjsonDataType 13 | ) -> None: 14 | super().__init__(mtgjson_data, output_dir, data_type) 15 | self.output_obj.fp = self.output_obj.root_dir.joinpath( 16 | f"{data_type.value}.psql" 17 | ).open("w", encoding="utf-8") 18 | 19 | def convert(self) -> None: 20 | sql_schema_as_dict = self._generate_sql_schema_dict() 21 | schema_query = self._convert_schema_dict_to_query( 22 | sql_schema_as_dict, 23 | engine="", 24 | primary_key_op="SERIAL PRIMARY KEY", 25 | ) 26 | 27 | header = "\n".join( 28 | ( 29 | "-- MTGSQLive Output File", 30 | f"-- {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n" 31 | f"-- MTGJSON Version: {self.get_version()}", 32 | "", 33 | "START TRANSACTION;", 34 | "", 35 | schema_query, 36 | "", 37 | "COMMIT;", 38 | "", 39 | "", 40 | ) 41 | ) 42 | self.output_obj.fp.write(header) 43 | 44 | insert_data_generator = self.generate_database_insert_statements() 45 | self.write_statements_to_file(insert_data_generator) 46 | 47 | def create_insert_statement_body(self, data: Dict[str, Any]) -> str: 48 | pre_processed_values = [] 49 | for value in data.values(): 50 | if not value: 51 | pre_processed_values.append("NULL") 52 | continue 53 | 54 | if isinstance(value, list): 55 | statement = ( 56 | pymysql.converters.escape_string(", ".join(map(str, value))) 57 | .replace("\\'", "''") 58 | .replace('\\"', '"') 59 | ) 60 | pre_processed_values.append(f"'{statement}'") 61 | else: 62 | statement = ( 63 | pymysql.converters.escape_string(str(value)) 64 | .replace("\\'", "''") 65 | .replace('\\"', '"') 66 | ) 67 | pre_processed_values.append(f"'{statement}'") 68 | 69 | return ", ".join(pre_processed_values) 70 | 71 | def write_statements_to_file(self, data_generator: Iterator[str]) -> None: 72 | statements = [] 73 | for statement in data_generator: 74 | statements.append(statement) 75 | if len(statements) >= 1_000: 76 | self.output_obj.fp.writelines(statements) 77 | statements = [] 78 | self.output_obj.fp.writelines(statements) 79 | -------------------------------------------------------------------------------- /mtgsqlive/converters/mysql.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | from typing import Any, Dict, Iterator 3 | 4 | import pymysql.converters 5 | 6 | from .parents import SqlLikeConverter 7 | from .parents.sql_like import MtgjsonDataType 8 | 9 | 10 | class MysqlConverter(SqlLikeConverter): 11 | def __init__( 12 | self, mtgjson_data: Dict[str, Any], output_dir: str, data_type: MtgjsonDataType 13 | ): 14 | super().__init__(mtgjson_data, output_dir, data_type) 15 | self.output_obj.fp = self.output_obj.root_dir.joinpath( 16 | f"{data_type.value}.sql" 17 | ).open("w", encoding="utf-8") 18 | 19 | def convert(self) -> None: 20 | sql_schema_as_dict = self._generate_sql_schema_dict() 21 | schema_query = self._convert_schema_dict_to_query( 22 | sql_schema_as_dict, 23 | engine="ENGINE=InnoDB DEFAULT CHARSET=utf8mb4", 24 | primary_key_op="INTEGER PRIMARY KEY AUTO_INCREMENT", 25 | ) 26 | 27 | header = "\n".join( 28 | ( 29 | "-- MTGSQLive Output File", 30 | f"-- {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n" 31 | f"-- MTGJSON Version: {self.get_version()}", 32 | "", 33 | "START TRANSACTION;", 34 | "SET names 'utf8mb4';", 35 | "", 36 | schema_query, 37 | "", 38 | "COMMIT;", 39 | "", 40 | "", 41 | ) 42 | ) 43 | self.output_obj.fp.write(header) 44 | 45 | insert_data_generator = self.generate_database_insert_statements() 46 | self.write_statements_to_file(insert_data_generator) 47 | self.output_obj.fp.write("\nCOMMIT;") 48 | 49 | def create_insert_statement_body(self, data: Dict[str, Any]) -> str: 50 | pre_processed_values = [] 51 | for value in data.values(): 52 | if value is None: 53 | pre_processed_values.append("NULL") 54 | continue 55 | 56 | if isinstance(value, list): 57 | pre_processed_values.append( 58 | '"' 59 | + pymysql.converters.escape_string(", ".join(map(str, value))) 60 | + '"' 61 | ) 62 | elif isinstance(value, bool): 63 | pre_processed_values.append("1" if value else "0") 64 | else: 65 | pre_processed_values.append( 66 | '"' + pymysql.converters.escape_string(str(value)) + '"' 67 | ) 68 | 69 | return ", ".join(pre_processed_values) 70 | 71 | def write_statements_to_file(self, data_generator: Iterator[str]) -> None: 72 | statements = [] 73 | for statement in data_generator: 74 | statements.append(statement) 75 | if len(statements) >= 1_000: 76 | self.output_obj.fp.writelines(statements) 77 | statements = [] 78 | self.output_obj.fp.writelines(statements) 79 | -------------------------------------------------------------------------------- /mtgsqlive/converters/sqlite.py: -------------------------------------------------------------------------------- 1 | import json 2 | import sqlite3 3 | from collections import defaultdict 4 | from typing import Any, Dict, Iterator 5 | 6 | import pymysql 7 | 8 | from ..enums import MtgjsonDataType 9 | from .parents import SqlLikeConverter 10 | 11 | nested_dict: Any = lambda: defaultdict(nested_dict) 12 | 13 | 14 | class SqliteConverter(SqlLikeConverter): 15 | def __init__( 16 | self, mtgjson_data: Dict[str, Any], output_dir: str, data_type: MtgjsonDataType 17 | ) -> None: 18 | super().__init__(mtgjson_data, output_dir, data_type) 19 | 20 | self.output_obj.fp = sqlite3.connect( 21 | self.output_obj.root_dir.joinpath(f"{data_type.value}.sqlite") 22 | ) 23 | self.output_obj.fp.execute("pragma journal_mode=wal;") 24 | 25 | def convert(self) -> None: 26 | sql_schema_as_dict = self._generate_sql_schema_dict() 27 | schema_query = self._convert_schema_dict_to_query( 28 | sql_schema_as_dict, engine="", primary_key_op=None 29 | ) 30 | 31 | self.output_obj.fp.executescript(schema_query) 32 | 33 | insert_data_generator = self.generate_database_insert_statements() 34 | self.write_statements_to_file(insert_data_generator) 35 | 36 | def create_insert_statement_body(self, data: Dict[str, Any]) -> str: 37 | pre_processed_values = [] 38 | for value in data.values(): 39 | if value is None: 40 | pre_processed_values.append("NULL") 41 | 42 | elif isinstance(value, list): 43 | statement = ( 44 | pymysql.converters.escape_string(", ".join(map(str, value))) 45 | .replace("\\'", "'") 46 | .replace('\\"', '""') 47 | ) 48 | pre_processed_values.append(f'"{statement}"') 49 | 50 | elif isinstance(value, dict): 51 | statement = ( 52 | pymysql.converters.escape_string(json.dumps(value)) 53 | .replace("\\'", "'") 54 | .replace('\\"', '""') 55 | ) 56 | pre_processed_values.append(f'"{statement}"') 57 | 58 | elif isinstance(value, bool): 59 | pre_processed_values.append("1" if value else "0") 60 | 61 | else: 62 | statement = ( 63 | pymysql.converters.escape_string(str(value)) 64 | .replace("\\'", "'") 65 | .replace('\\"', '""') 66 | ) 67 | pre_processed_values.append(f'"{statement}"') 68 | 69 | return ", ".join(pre_processed_values) 70 | 71 | def write_statements_to_file(self, data_generator: Iterator[str]) -> None: 72 | cursor = self.output_obj.fp.cursor() 73 | statements = [] 74 | 75 | for statement in data_generator: 76 | statements.append(statement) 77 | if len(statements) >= 1_000: 78 | cursor.executescript("\n".join(statements)) 79 | self.output_obj.fp.commit() 80 | statements = [] 81 | cursor.executescript("\n".join(statements)) 82 | self.output_obj.fp.commit() 83 | -------------------------------------------------------------------------------- /mtgsqlive/__main__.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import json 3 | import logging 4 | import pathlib 5 | from collections import OrderedDict 6 | from datetime import datetime 7 | from typing import Any, Dict 8 | 9 | from mtgsqlive.converters import ( 10 | CsvConverter, 11 | MysqlConverter, 12 | ParquetConverter, 13 | PostgresqlConverter, 14 | SqliteConverter, 15 | ) 16 | from mtgsqlive.enums.data_type import MtgjsonDataType 17 | 18 | TOP_LEVEL_DIR: pathlib.Path = pathlib.Path(__file__).resolve().parent.parent 19 | LOG_DIR: pathlib.Path = TOP_LEVEL_DIR.joinpath("logs") 20 | LOGGER = logging.getLogger(__name__) 21 | 22 | 23 | def init_logger() -> None: 24 | LOG_DIR.mkdir(exist_ok=True) 25 | logging.basicConfig( 26 | level=logging.INFO, 27 | format="[%(levelname)s] %(asctime)s: %(message)s", 28 | handlers=[ 29 | logging.StreamHandler(), 30 | logging.FileHandler( 31 | str( 32 | LOG_DIR.joinpath( 33 | "mtgsqlive_" 34 | + str(datetime.now().strftime("%Y_%m_%d_%H_%M_%S")) 35 | + ".log" 36 | ) 37 | ) 38 | ), 39 | ], 40 | ) 41 | 42 | 43 | def get_converters() -> Dict[str, Any]: 44 | return OrderedDict( 45 | { 46 | "mysql": MysqlConverter, 47 | "postgresql": PostgresqlConverter, 48 | "sqlite": SqliteConverter, 49 | "csv": CsvConverter, 50 | "parquet": ParquetConverter, 51 | } 52 | ) 53 | 54 | 55 | def parse_args() -> argparse.Namespace: 56 | parser = argparse.ArgumentParser() 57 | 58 | parser.add_argument( 59 | "-i", 60 | "--input-dir", 61 | type=str, 62 | required=True, 63 | help="Path to directory that has MTGJSON compiled files, like AllPrintings.json and AllPricesToday.json", 64 | ) 65 | parser.add_argument( 66 | "-o", 67 | "--output-dir", 68 | type=str, 69 | default="/tmp/mtgsqlive", 70 | help="Where to place translated files", 71 | ) 72 | parser.add_argument( 73 | "-s", 74 | "--sets", 75 | type=str.upper, 76 | nargs="*", 77 | help="Transpose specific sets instead of all sets", 78 | ) 79 | 80 | converter_group = parser.add_argument_group(title="Converters") 81 | converter_group.add_argument( 82 | "--all", action="store_true", help="Run all ETL operations" 83 | ) 84 | converter_group.add_argument( 85 | "--csv", action="store_true", help="Compile CSV AllPrinting files" 86 | ) 87 | converter_group.add_argument( 88 | "--mysql", action="store_true", help="Compile AllPrintings.sql" 89 | ) 90 | converter_group.add_argument( 91 | "--parquet", action="store_true", help="Compile Parquet AllPrinting files" 92 | ) 93 | converter_group.add_argument( 94 | "--postgresql", action="store_true", help="Compile AllPrintings.psql" 95 | ) 96 | converter_group.add_argument( 97 | "--sqlite", action="store_true", help="Compile AllPrintings.sqlite" 98 | ) 99 | 100 | return parser.parse_args() 101 | 102 | 103 | def main() -> None: 104 | init_logger() 105 | 106 | args = parse_args() 107 | 108 | converters_map = get_converters() 109 | if not args.all: 110 | for converter_input_param in converters_map.copy().keys(): 111 | if not getattr(args, converter_input_param): 112 | del converters_map[converter_input_param] 113 | 114 | mtgjson_input_dir = pathlib.Path(args.input_dir).expanduser() 115 | for data_type in MtgjsonDataType: 116 | mtgjson_input_file = mtgjson_input_dir.joinpath(f"{data_type.value}.json") 117 | if not mtgjson_input_file.exists(): 118 | LOGGER.error(f"Cannot locate {mtgjson_input_file}, skipping.") 119 | continue 120 | 121 | with mtgjson_input_file.open(encoding="utf-8") as fp: 122 | mtgjson_input_data = json.load(fp) 123 | 124 | if args.sets: 125 | for set_key in list(mtgjson_input_data["data"].keys()): 126 | if set_key not in args.sets: 127 | del mtgjson_input_data["data"][set_key] 128 | 129 | for converter in converters_map.values(): 130 | LOGGER.info(f"Converting {data_type.value} via {converter.__name__}") 131 | converter(mtgjson_input_data, args.output_dir, data_type).convert() 132 | LOGGER.info(f"Converted {data_type.value} via {converter.__name__}") 133 | 134 | 135 | if __name__ == "__main__": 136 | main() 137 | -------------------------------------------------------------------------------- /mtgsqlive/converters/parents/abstract.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import datetime 3 | import pathlib 4 | from sqlite3 import Connection 5 | from typing import Any, Dict, Iterator, Optional, TextIO 6 | 7 | from ...enums import MtgjsonDataType 8 | 9 | 10 | class OutputObject: 11 | fp: TextIO | Connection 12 | root_dir: pathlib.Path 13 | 14 | def __init__(self, root_dir: pathlib.Path): 15 | self.root_dir = root_dir 16 | 17 | 18 | class AbstractConverter(abc.ABC): 19 | mtgjson_data: Dict[str, Any] 20 | output_obj: OutputObject 21 | data_type: MtgjsonDataType 22 | 23 | set_keys_to_skip = { 24 | "booster", # Broken out into BoosterContents, BoosterContentWeights, BoosterSheets, BoosterSheetCards 25 | "cards", # Broken out into cards 26 | "decks", # WIP for own tables 27 | "sealedProduct", # WIP for own table 28 | "tokens", # Broken out into tokens 29 | "translations", # Broken out into setTranslations 30 | } 31 | card_keys_to_skip = { 32 | "convertedManaCost", # Redundant with manaValue 33 | "foreignData", # Broken out into cardForeignData 34 | "identifiers", # Broken out into cardIdentifiers & tokenIdentifiers 35 | "legalities", # Broken out into cardLegalities 36 | "purchaseUrls", # Broken out into cardPurchaseUrls 37 | "rulings", # Broken out into cardRulings 38 | } 39 | 40 | def __init__( 41 | self, mtgjson_data: Dict[str, Any], output_dir: str, data_type: MtgjsonDataType 42 | ) -> None: 43 | self.mtgjson_data = mtgjson_data 44 | self.output_obj = OutputObject(pathlib.Path(output_dir).expanduser()) 45 | self.data_type = data_type 46 | 47 | @abc.abstractmethod 48 | def convert(self) -> None: 49 | raise NotImplementedError() 50 | 51 | def get_metadata(self) -> Iterator[Dict[str, Any]]: 52 | yield self.mtgjson_data.get("meta", {}) 53 | 54 | def get_version(self) -> Optional[str]: 55 | return str(self.mtgjson_data["meta"]["version"]) 56 | 57 | def get_next_set(self) -> Iterator[Dict[str, Any]]: 58 | for set_data in self.mtgjson_data["data"].values(): 59 | local_set_data = {} 60 | for key, value in set_data.items(): 61 | if key not in self.set_keys_to_skip: 62 | local_set_data[key] = value 63 | yield local_set_data 64 | 65 | def get_next_set_field_with_normalization( 66 | self, set_attribute: str 67 | ) -> Iterator[Dict[str, Any]]: 68 | for set_code, set_data in self.mtgjson_data["data"].items(): 69 | if not set_data.get(set_attribute): 70 | continue 71 | 72 | for language, translation in set_data[set_attribute].items(): 73 | yield { 74 | "language": language, 75 | "setCode": set_code, 76 | "translation": translation 77 | } 78 | 79 | def get_next_card_like(self, set_attribute: str) -> Iterator[Dict[str, Any]]: 80 | for set_data in self.mtgjson_data["data"].values(): 81 | for card in set_data.get(set_attribute): 82 | local_card = {} 83 | for key, value in card.items(): 84 | if key not in self.card_keys_to_skip: 85 | local_card[key] = value 86 | yield local_card 87 | 88 | def get_next_card_identifier(self, set_attribute: str) -> Iterator[Dict[str, Any]]: 89 | return self.get_next_card_field_with_normalization(set_attribute, "identifiers") 90 | 91 | def get_next_card_legalities(self, set_attribute: str) -> Iterator[Dict[str, Any]]: 92 | return self.get_next_card_field_with_normalization(set_attribute, "legalities") 93 | 94 | def get_next_card_ruling_entry( 95 | self, set_attribute: str 96 | ) -> Iterator[Dict[str, Any]]: 97 | return self.get_next_card_field_with_normalization(set_attribute, "rulings") 98 | 99 | def get_next_card_foreign_data_entry( 100 | self, set_attribute: str 101 | ) -> Iterator[Dict[str, Any]]: 102 | return self.get_next_card_field_with_normalization(set_attribute, "foreignData") 103 | 104 | def get_next_card_purchase_url_entry( 105 | self, set_attribute: str 106 | ) -> Iterator[Dict[str, Any]]: 107 | return self.get_next_card_field_with_normalization( 108 | set_attribute, "purchaseUrls" 109 | ) 110 | 111 | def get_next_card_field_with_normalization( 112 | self, set_attribute: str, secondary_attribute: str 113 | ) -> Iterator[Dict[str, Any]]: 114 | for set_data in self.mtgjson_data["data"].values(): 115 | for card in set_data.get(set_attribute): 116 | if secondary_attribute not in card: 117 | continue 118 | 119 | if isinstance(card[secondary_attribute], list): 120 | for sub_entity in card[secondary_attribute]: 121 | yield self.__camelize_and_normalize_card(sub_entity, card) 122 | else: 123 | yield self.__camelize_and_normalize_card( 124 | card[secondary_attribute], card 125 | ) 126 | 127 | @staticmethod 128 | def __camelize_and_normalize_card( 129 | entity: Dict[str, Any], card: Dict[str, Any] 130 | ) -> Dict[str, Any]: 131 | entity["uuid"] = card.get("uuid") 132 | return entity 133 | 134 | def get_next_card_price( 135 | self, 136 | oldest_date: datetime.date, 137 | ) -> Iterator[Dict[str, str]]: 138 | oldest_date_str = str(oldest_date) 139 | 140 | for card_uuid, card_uuid_data in self.mtgjson_data["data"].items(): 141 | for game_availability, game_availability_data in card_uuid_data.items(): 142 | for ( 143 | price_provider, 144 | price_provider_data, 145 | ) in game_availability_data.items(): 146 | currency = price_provider_data["currency"] 147 | for ( 148 | provider_listing, 149 | provider_listing_data, 150 | ) in price_provider_data.items(): 151 | if provider_listing == "currency": 152 | continue 153 | 154 | for ( 155 | card_finish, 156 | card_finish_data, 157 | ) in provider_listing_data.items(): 158 | for price_date, price_amount in card_finish_data.items(): 159 | if price_date < oldest_date_str: 160 | continue 161 | yield { 162 | "uuid": card_uuid, 163 | "gameAvailability": game_availability, 164 | "priceProvider": price_provider, 165 | "providerListing": provider_listing, 166 | "cardFinish": card_finish, 167 | "date": price_date, 168 | "price": price_amount, 169 | "currency": currency, 170 | } 171 | 172 | def get_next_booster_contents_entry(self) -> Iterator[Dict[str, str | int]]: 173 | for set_code, set_data in self.mtgjson_data["data"].items(): 174 | for booster_name, booster_object in set_data.get("booster", {}).items(): 175 | for index, booster_contents in enumerate(booster_object["boosters"]): 176 | for sheet_name, sheet_picks in booster_contents["contents"].items(): 177 | yield { 178 | "setCode": set_code, 179 | "boosterName": booster_name, 180 | "boosterIndex": index, 181 | "sheetName": sheet_name, 182 | "sheetPicks": sheet_picks, 183 | } 184 | 185 | def get_next_booster_weights_entry(self) -> Iterator[Dict[str, str | int]]: 186 | for set_code, set_data in self.mtgjson_data["data"].items(): 187 | for booster_name, booster_object in set_data.get("booster", {}).items(): 188 | for index, booster_contents in enumerate(booster_object["boosters"]): 189 | yield { 190 | "setCode": set_code, 191 | "boosterName": booster_name, 192 | "boosterIndex": index, 193 | "boosterWeight": booster_contents["weight"], 194 | } 195 | 196 | def get_next_booster_sheets_entry(self) -> Iterator[Dict[str, str | bool]]: 197 | for set_code, set_data in self.mtgjson_data["data"].items(): 198 | for booster_name, booster_object in set_data.get("booster", {}).items(): 199 | for sheet_name, sheet_contents in booster_object["sheets"].items(): 200 | yield { 201 | "setCode": set_code, 202 | "sheetName": sheet_name, 203 | "boosterName": booster_name, 204 | "sheetIsFoil": sheet_contents.get("foil", False), 205 | "sheetHasBalanceColors": sheet_contents.get( 206 | "balanceColors", False 207 | ), 208 | } 209 | 210 | def get_next_booster_sheet_cards_entry(self) -> Iterator[Dict[str, str | int]]: 211 | for set_code, set_data in self.mtgjson_data["data"].items(): 212 | for booster_name, booster_object in set_data.get("booster", {}).items(): 213 | for sheet_name, sheet_contents in booster_object["sheets"].items(): 214 | for card_uuid, card_weight in sheet_contents["cards"].items(): 215 | yield { 216 | "setCode": set_code, 217 | "sheetName": sheet_name, 218 | "boosterName": booster_name, 219 | "cardUuid": card_uuid, 220 | "cardWeight": card_weight, 221 | } 222 | -------------------------------------------------------------------------------- /mtgsqlive/converters/parents/sql_like.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import datetime 3 | from collections import defaultdict 4 | from typing import Any, Dict, Iterator, List, Optional 5 | 6 | from ...enums import MtgjsonDataType 7 | from .abstract import AbstractConverter 8 | 9 | nested_dict: Any = lambda: defaultdict(nested_dict) 10 | 11 | 12 | class SqlLikeConverter(AbstractConverter, abc.ABC): 13 | @abc.abstractmethod 14 | def create_insert_statement_body(self, data: Dict[str, Any]) -> str: 15 | raise NotImplementedError() 16 | 17 | @abc.abstractmethod 18 | def write_statements_to_file(self, data_generator: Iterator[str]) -> None: 19 | raise NotImplementedError() 20 | 21 | def generate_database_insert_statements(self) -> Iterator[str]: 22 | if self.data_type == MtgjsonDataType.MTGJSON_CARDS: 23 | generators = self.__get_mtgjson_card_generators() 24 | elif self.data_type == MtgjsonDataType.MTGJSON_CARD_PRICES: 25 | generators = self.__get_mtgjson_card_prices_generators() 26 | else: 27 | raise ValueError() 28 | 29 | for generator in generators: 30 | for statement in generator: 31 | yield statement 32 | 33 | def __get_mtgjson_card_generators(self) -> List[Iterator[str]]: 34 | return [ 35 | self.__generate_insert_statement("meta", self.get_metadata()), 36 | self.__generate_insert_statement("sets", self.get_next_set()), 37 | self.__generate_insert_statement("cards", self.get_next_card_like("cards")), 38 | self.__generate_insert_statement( 39 | "tokens", self.get_next_card_like("tokens") 40 | ), 41 | self.__generate_insert_statement( 42 | "cardIdentifiers", self.get_next_card_identifier("cards") 43 | ), 44 | self.__generate_insert_statement( 45 | "cardLegalities", self.get_next_card_legalities("cards") 46 | ), 47 | self.__generate_insert_statement( 48 | "cardRulings", self.get_next_card_ruling_entry("cards") 49 | ), 50 | self.__generate_insert_statement( 51 | "cardForeignData", self.get_next_card_foreign_data_entry("cards") 52 | ), 53 | self.__generate_insert_statement( 54 | "cardPurchaseUrls", self.get_next_card_purchase_url_entry("cards") 55 | ), 56 | self.__generate_insert_statement( 57 | "tokenIdentifiers", self.get_next_card_identifier("tokens") 58 | ), 59 | self.__generate_insert_statement( 60 | "setTranslations", 61 | self.get_next_set_field_with_normalization("translations"), 62 | ), 63 | self.__generate_insert_statement( 64 | "setBoosterContents", self.get_next_booster_contents_entry() 65 | ), 66 | self.__generate_insert_statement( 67 | "setBoosterContentWeights", self.get_next_booster_weights_entry() 68 | ), 69 | self.__generate_insert_statement( 70 | "setBoosterSheets", self.get_next_booster_sheets_entry() 71 | ), 72 | self.__generate_insert_statement( 73 | "setBoosterSheetCards", self.get_next_booster_sheet_cards_entry() 74 | ), 75 | ] 76 | 77 | def __get_mtgjson_card_prices_generators(self) -> List[Iterator[str]]: 78 | return [ 79 | self.__generate_batch_insert_statement( 80 | "cardPrices", 81 | self.get_next_card_price( 82 | datetime.date.today() - datetime.timedelta(days=14) 83 | ), 84 | ) 85 | ] 86 | 87 | def __generate_insert_statement( 88 | self, table_name: str, data_generator: Iterator[Dict[str, Any]] 89 | ) -> Iterator[str]: 90 | for obj in data_generator: 91 | data_keys = ", ".join(obj.keys()) 92 | safe_values = self.create_insert_statement_body(obj) 93 | yield f"INSERT INTO {table_name} ({data_keys}) VALUES ({safe_values});\n" 94 | 95 | def __generate_batch_insert_statement( 96 | self, table_name: str, data_generator: Iterator[Dict[str, Any]] 97 | ) -> Iterator[str]: 98 | insert_values = [] 99 | data_keys = "" 100 | for obj in data_generator: 101 | data_keys = ", ".join(obj.keys()) 102 | safe_values = f"({self.create_insert_statement_body(obj)})" 103 | insert_values.append(safe_values) 104 | 105 | if len(insert_values) >= 2_000: 106 | yield_values = ",\n".join(insert_values) 107 | insert_values = [] 108 | yield f"INSERT INTO {table_name} ({data_keys}) VALUES\n{yield_values};\n" 109 | 110 | yield_values = ",\n".join(insert_values) 111 | yield f"INSERT INTO {table_name} ({data_keys}) VALUES\n{yield_values};\n" 112 | 113 | def _generate_sql_schema_dict(self) -> Dict[str, Any]: 114 | schema = nested_dict() 115 | 116 | if self.data_type == MtgjsonDataType.MTGJSON_CARDS: 117 | self._add_meta_table_schema(schema) 118 | self._add_set_table_schema(schema) 119 | self._add_card_table_schema(schema) 120 | self._add_token_table_schema(schema) 121 | self._add_card_identifiers_table_schema(schema) 122 | self._add_card_legalities_table_schema(schema) 123 | self._add_card_rulings_table_schema(schema) 124 | self._add_card_foreign_data_table_schema(schema) 125 | self._add_card_purchase_urls_table_schema(schema) 126 | self._add_token_identifiers_table_schema(schema) 127 | self._add_set_translation_table_schema(schema) 128 | self._add_set_booster_contents_schema(schema) 129 | self._add_set_booster_content_weights_schema(schema) 130 | self._add_set_booster_sheets_schema(schema) 131 | self._add_set_booster_sheet_cards_schema(schema) 132 | elif self.data_type == MtgjsonDataType.MTGJSON_CARD_PRICES: 133 | self._add_all_prices_schema(schema) 134 | 135 | return dict(schema) 136 | 137 | @staticmethod 138 | def _add_meta_table_schema(schema: Dict[str, Any]) -> None: 139 | schema["meta"]["date"]["type"] = "DATE" 140 | schema["meta"]["version"]["type"] = "TEXT" 141 | 142 | def _add_set_table_schema(self, schema: Dict[str, Any]) -> None: 143 | for set_data in self.mtgjson_data["data"].values(): 144 | for set_attribute, set_attribute_data in set_data.items(): 145 | if set_attribute in self.set_keys_to_skip: 146 | continue 147 | 148 | schema["sets"][set_attribute]["type"] = self._get_sql_type( 149 | set_attribute_data 150 | ) 151 | 152 | schema["sets"]["code"]["type"] = "VARCHAR(8) UNIQUE NOT NULL" 153 | 154 | def _get_card_like_schema(self, schema: Dict[str, Any], key_name: str) -> None: 155 | for set_data in self.mtgjson_data["data"].values(): 156 | for mtgjson_card in set_data.get(key_name): 157 | for card_attribute, card_attribute_data in mtgjson_card.items(): 158 | if card_attribute in self.card_keys_to_skip: 159 | continue 160 | 161 | schema[key_name][card_attribute]["type"] = self._get_sql_type( 162 | card_attribute_data 163 | ) 164 | schema[key_name]["uuid"]["type"] = "VARCHAR(36) NOT NULL" 165 | 166 | def _add_card_table_schema(self, schema: Dict[str, Any]) -> None: 167 | self._get_card_like_schema(schema, "cards") 168 | 169 | def _add_token_table_schema(self, schema: Dict[str, Any]) -> None: 170 | self._get_card_like_schema(schema, "tokens") 171 | 172 | @staticmethod 173 | def _add_card_rulings_table_schema(schema: Dict[str, Any]) -> None: 174 | schema["cardRulings"]["text"]["type"] = "TEXT" 175 | schema["cardRulings"]["date"]["type"] = "DATE" 176 | schema["cardRulings"]["uuid"]["type"] = "VARCHAR(36) NOT NULL" 177 | 178 | def __add_card_field_with_normalization( 179 | self, 180 | table_name: str, 181 | schema: Dict[str, Any], 182 | card_field: str, 183 | iterate_subfield: bool = False, 184 | ) -> None: 185 | schema[table_name]["uuid"]["type"] = "VARCHAR(36) NOT NULL" 186 | for set_data in self.mtgjson_data["data"].values(): 187 | for mtgjson_card in set_data.get("cards"): 188 | for card_field_sub_entry in mtgjson_card.get(card_field, []): 189 | if iterate_subfield: 190 | for key in card_field_sub_entry: 191 | schema[table_name][key]["type"] = "TEXT" 192 | else: 193 | schema[table_name][card_field_sub_entry]["type"] = "TEXT" 194 | 195 | def _add_card_identifiers_table_schema(self, schema: Dict[str, Any]) -> None: 196 | return self.__add_card_field_with_normalization( 197 | "cardIdentifiers", schema, "identifiers" 198 | ) 199 | 200 | def _add_card_legalities_table_schema(self, schema: Dict[str, Any]) -> None: 201 | return self.__add_card_field_with_normalization( 202 | "cardLegalities", schema, "legalities" 203 | ) 204 | 205 | def _add_card_foreign_data_table_schema(self, schema: Dict[str, Any]) -> None: 206 | self.__add_card_field_with_normalization( 207 | "cardForeignData", schema, "foreignData", True 208 | ) 209 | schema["cardForeignData"]["multiverseId"]["type"] = "INTEGER" 210 | 211 | def _add_card_purchase_urls_table_schema(self, schema: Dict[str, Any]) -> None: 212 | return self.__add_card_field_with_normalization( 213 | "cardPurchaseUrls", schema, "purchaseUrls" 214 | ) 215 | 216 | def _add_token_identifiers_table_schema(self, schema: Dict[str, Any]) -> None: 217 | return self.__add_card_field_with_normalization( 218 | "tokenIdentifiers", schema, "identifiers" 219 | ) 220 | 221 | @staticmethod 222 | def _add_set_translation_table_schema(schema: Dict[str, Any]) -> None: 223 | schema["setTranslations"]["setCode"]["type"] = "VARCHAR(20)" 224 | schema["setTranslations"]["language"]["type"] = "TEXT" 225 | schema["setTranslations"]["translation"]["type"] = "TEXT" 226 | 227 | @staticmethod 228 | def _add_all_prices_schema(schema: Dict[str, Any]) -> None: 229 | schema["cardPrices"]["gameAvailability"]["type"] = "VARCHAR(15)" 230 | schema["cardPrices"]["priceProvider"]["type"] = "VARCHAR(20)" 231 | schema["cardPrices"]["providerListing"]["type"] = "VARCHAR(15)" 232 | schema["cardPrices"]["cardFinish"]["type"] = "VARCHAR(15)" 233 | schema["cardPrices"]["date"]["type"] = "DATE" 234 | schema["cardPrices"]["price"]["type"] = "FLOAT" 235 | schema["cardPrices"]["currency"]["type"] = "VARCHAR(10)" 236 | schema["cardPrices"]["uuid"]["type"] = "VARCHAR(36) NOT NULL" 237 | 238 | @staticmethod 239 | def _add_set_booster_contents_schema(schema: Dict[str, Any]) -> None: 240 | schema["setBoosterContents"]["setCode"]["type"] = "VARCHAR(20)" 241 | schema["setBoosterContents"]["boosterName"]["type"] = "VARCHAR(255)" 242 | schema["setBoosterContents"]["boosterIndex"]["type"] = "INTEGER" 243 | schema["setBoosterContents"]["sheetName"]["type"] = "VARCHAR(255)" 244 | schema["setBoosterContents"]["sheetPicks"]["type"] = "INTEGER" 245 | schema["setBoosterContents"]["unique_constraint"] = ["setCode", "sheetName", "boosterName", "boosterIndex"] 246 | 247 | @staticmethod 248 | def _add_set_booster_content_weights_schema(schema: Dict[str, Any]) -> None: 249 | schema["setBoosterContentWeights"]["setCode"]["type"] = "VARCHAR(20)" 250 | schema["setBoosterContentWeights"]["boosterName"]["type"] = "VARCHAR(255)" 251 | schema["setBoosterContentWeights"]["boosterIndex"]["type"] = "INTEGER" 252 | schema["setBoosterContentWeights"]["boosterWeight"]["type"] = "INTEGER" 253 | 254 | @staticmethod 255 | def _add_set_booster_sheets_schema(schema: Dict[str, Any]) -> None: 256 | schema["setBoosterSheets"]["setCode"]["type"] = "VARCHAR(20)" 257 | schema["setBoosterSheets"]["boosterName"]["type"] = "VARCHAR(255)" 258 | schema["setBoosterSheets"]["sheetName"]["type"] = "VARCHAR(255)" 259 | schema["setBoosterSheets"]["sheetIsFoil"]["type"] = "BOOLEAN" 260 | schema["setBoosterSheets"]["sheetHasBalanceColors"]["type"] = "BOOLEAN" 261 | schema["setBoosterSheets"]["unique_constraint"] = ["setCode", "sheetName", "boosterName"] 262 | 263 | @staticmethod 264 | def _add_set_booster_sheet_cards_schema(schema: Dict[str, Any]) -> None: 265 | schema["setBoosterSheetCards"]["setCode"]["type"] = "VARCHAR(20)" 266 | schema["setBoosterSheetCards"]["sheetName"]["type"] = "VARCHAR(255)" 267 | schema["setBoosterSheetCards"]["boosterName"]["type"] = "VARCHAR(255)" 268 | schema["setBoosterSheetCards"]["cardUuid"]["type"] = "VARCHAR(36) NOT NULL" 269 | schema["setBoosterSheetCards"]["cardWeight"]["type"] = "BIGINT" 270 | schema["setBoosterSheetCards"]["unique_constraint"] = ["setCode", "sheetName", "boosterName", "cardUuid"] 271 | 272 | @staticmethod 273 | def _convert_schema_dict_to_query( 274 | schema: Dict[str, Any], 275 | engine: str, 276 | primary_key_op: Optional[str], 277 | ) -> str: 278 | q = "" 279 | for table_name, table_data in schema.items(): 280 | q += f"CREATE TABLE {table_name} (\n" 281 | if primary_key_op: 282 | q += f"\tid {primary_key_op},\n" 283 | for attribute in sorted(table_data.keys()): 284 | if "unique_constraint" == attribute: 285 | continue 286 | q += f"\t{attribute} {table_data[attribute]['type']},\n" 287 | 288 | if "unique_constraint" in table_data.keys(): 289 | q += f"\tUNIQUE ({','.join(table_data['unique_constraint'])}),\n" 290 | 291 | q = f"{q[:-2]}\n){engine};\n\n" 292 | 293 | if "uuid" in table_data.keys(): 294 | q = f"{q[:-2]}\nCREATE INDEX {table_name}_uuid ON {table_name}(uuid);\n\n" 295 | 296 | return q[:-2] 297 | 298 | @staticmethod 299 | def _get_sql_type(mixed: Any) -> Optional[str]: 300 | if isinstance(mixed, (str, list, dict)): 301 | return "TEXT" 302 | if isinstance(mixed, bool): 303 | return "BOOLEAN" 304 | if isinstance(mixed, float): 305 | return "FLOAT" 306 | if isinstance(mixed, int): 307 | return "INTEGER" 308 | return None 309 | --------------------------------------------------------------------------------