├── .gitignore ├── .idea ├── .gitignore ├── ddl_compare.iml ├── inspectionProfiles │ ├── Project_Default.xml │ └── profiles_settings.xml ├── misc.xml ├── modules.xml └── vcs.xml ├── LICENSE ├── README.md ├── images ├── cli_app_terminal.png ├── compare_result.png └── logo.png ├── requirement.txt ├── setup.py ├── sondesh ├── __init__.py ├── apps │ ├── __init__.py │ └── cli_app.py ├── compare.py ├── ddl_parser.py ├── dialects │ ├── __init__.py │ ├── bigquery.py │ ├── hql.py │ ├── mssql.py │ ├── mysql.py │ ├── oracle.py │ ├── redshift.py │ ├── snowflake.py │ ├── spark_sql.py │ └── sql.py ├── output │ ├── __init__.py │ ├── common.py │ └── dialects.py ├── parser.py ├── parsetab.py ├── tokens.py └── utils.py └── test ├── read_from_file.py ├── sql_files ├── one.sql ├── test_sql.sql └── two.sql ├── test_oracle.py └── test_redshift.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | app/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /.idea/.gitignore: -------------------------------------------------------------------------------- 1 | # Default ignored files 2 | /shelf/ 3 | /workspace.xml 4 | -------------------------------------------------------------------------------- /.idea/ddl_compare.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 17 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/profiles_settings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 6 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Project : Sondesh 2 | 3 | [![Maintenance](https://img.shields.io/badge/Maintained%3F-yes-green.svg)](https://GitHub.com/Naereen/StrapDown.js/graphs/commit-activity) 4 | ![Maintainer](https://img.shields.io/badge/maintainer-Koushik-blue) 5 | [![PyPI license](https://img.shields.io/pypi/l/ansicolortags.svg)](https://pypi.python.org/pypi/ansicolortags/) 6 | [![made-with-python](https://img.shields.io/badge/Made%20with-Python-1f425f.svg)](https://www.python.org/) 7 | [![Generic badge](https://img.shields.io/badge/release-1.0-green.svg)](https://shields.io/) 8 |
9 | ![logo.png](https://i.ibb.co/x596NHL/logo.png) 10 | 11 | ## Description 12 | 13 | Sondesh is the name of my cat . I love him very much 14 | so i've decided to name this project after him. 15 | 16 | This project is all about a parser and comparator . 17 | Question is what it is parsing ? 18 | - It parse SQL statements , but only DDL statements 19 | - It supports many sql dialects , example oracle , postgresql , sparksql , hive .. 20 | - There is a cli app ( cli_app.py ), it compares two DDL statements and show you the differences in terminal 21 | 22 | ![compare_result.png](https://i.ibb.co/94VWWTy/compare-result.png) 23 | 24 | ### Dependencies 25 | 26 | * Windows 10 , Debian , BSD these are the supported platform 27 | * Python version >= 3.8 28 | 29 | ### Installing 30 | 31 | * ddl_compare can be installed using pip 32 | 33 | ``` 34 | pip install sondesh 35 | ``` 36 | 37 | ### Usage 38 | 39 | ```python 40 | from sondesh import ddl_parser 41 | import pprint 42 | 43 | result = ddl_parser.parse_from_file('/home/koushik/sample_ddl.sql') 44 | pprint.pprint(result) 45 | ``` 46 | 47 | Using the CLI APP . 48 | 49 | 1. Just Open the Terminal 50 | 2. type sondesh 51 | 3. VOALAA !!!!! 52 | 53 | ![logo_terminal.png](https://i.ibb.co/F67hnjf/cli-app-terminal.png) 54 | 55 | ## What Next : 56 | 57 | 1. Integration to remote file system to load .sql from there and parse it 58 | 2. Integration with data-catalogues like spark catalogue or hive metastore and compare ddl. 59 | 60 | 61 | 62 | -------------------------------------------------------------------------------- /images/cli_app_terminal.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koustreak/Sondesh/1fc5274b21ac3a69de56b6e56b1a67649b486ae1/images/cli_app_terminal.png -------------------------------------------------------------------------------- /images/compare_result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koustreak/Sondesh/1fc5274b21ac3a69de56b6e56b1a67649b486ae1/images/compare_result.png -------------------------------------------------------------------------------- /images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koustreak/Sondesh/1fc5274b21ac3a69de56b6e56b1a67649b486ae1/images/logo.png -------------------------------------------------------------------------------- /requirement.txt: -------------------------------------------------------------------------------- 1 | colorama==0.4.6 2 | commonmark==0.9.1 3 | ply==3.11 4 | pyfiglet==0.8.post1 5 | Pygments==2.14.0 6 | rich==13.0.1 7 | tqdm==4.64.1 8 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | 4 | # Note: To use the 'upload' functionality of this file, you must: 5 | # $ pipenv install twine --dev 6 | 7 | import io 8 | import os 9 | import sys 10 | from shutil import rmtree 11 | 12 | from setuptools import find_packages, setup, Command 13 | 14 | # Package meta-data. 15 | NAME = 'sondesh' 16 | DESCRIPTION = 'parse sql , compare two .sql file , generate optimization hint for your sql and various other utilities' 17 | URL = 'https://github.com/koustreak/dot.parser' 18 | EMAIL = 'dot.py@yahoo.com' 19 | AUTHOR = 'Koushik Dutta' 20 | REQUIRES_PYTHON = '>=3.9.0' 21 | VERSION = '1.0' 22 | 23 | def parse_requirements(requirements): 24 | with open(requirements) as f: 25 | return [l.strip('\n') for l in f if l.strip('\n') and not l.startswith('#')] 26 | 27 | # What packages are required for this module to be executed? 28 | REQUIRED = parse_requirements('requirement.txt') 29 | 30 | 31 | # The rest you shouldn't have to touch too much :) 32 | # ------------------------------------------------ 33 | # Except, perhaps the License and Trove Classifiers! 34 | # If you do change the License, remember to change the Trove Classifier for that! 35 | 36 | here = os.path.abspath(os.path.dirname(__file__)) 37 | 38 | # Import the README and use it as the long-description. 39 | # Note: this will only work if 'README.md' is present in your MANIFEST.in file! 40 | try: 41 | with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f: 42 | long_description = '\n' + f.read() 43 | except FileNotFoundError: 44 | long_description = DESCRIPTION 45 | 46 | # Load the package's __version__.py module as a dictionary. 47 | about = {} 48 | if not VERSION: 49 | project_slug = NAME.lower().replace("-", "_").replace(" ", "_") 50 | with open(os.path.join(here, project_slug, '__version__.py')) as f: 51 | exec(f.read(), about) 52 | else: 53 | about['__version__'] = VERSION 54 | 55 | 56 | class UploadCommand(Command): 57 | """Support setup.py upload.""" 58 | 59 | description = 'Build and publish the package.' 60 | user_options = [] 61 | 62 | @staticmethod 63 | def status(s): 64 | """Prints things in bold.""" 65 | print('\033[1m{0}\033[0m'.format(s)) 66 | 67 | def initialize_options(self): 68 | pass 69 | 70 | def finalize_options(self): 71 | pass 72 | 73 | def run(self): 74 | try: 75 | self.status('Removing previous builds…') 76 | rmtree(os.path.join(here, 'app')) 77 | except OSError: 78 | pass 79 | 80 | self.status('Building Source and Wheel (universal) distribution…') 81 | os.system('{0} setup.py sdist bdist_wheel --universal'.format(sys.executable)) 82 | 83 | self.status('Uploading the package to PyPI via Twine…') 84 | os.system('twine upload app/*') 85 | 86 | self.status('Pushing git tags…') 87 | os.system('git tag v{0}'.format(about['__version__'])) 88 | os.system('git push --tags') 89 | 90 | sys.exit() 91 | 92 | 93 | # Where the magic happens: 94 | setup( 95 | name=NAME, 96 | version=about['__version__'], 97 | description=DESCRIPTION, 98 | long_description=long_description, 99 | long_description_content_type='text/markdown', 100 | author=AUTHOR, 101 | entry_points = { 102 | 'console_scripts': ['sondesh=sondesh.apps.cli_app:main_app'], 103 | }, 104 | author_email=EMAIL, 105 | python_requires=REQUIRES_PYTHON, 106 | url=URL, 107 | packages=find_packages(exclude=["tests", "*.tests", "*.tests.*", "tests.*","test","test.*","*.test.*","*.test","images","images.*"]), 108 | # If your package is a single module, use this instead of 'packages': 109 | # py_modules=['mypackage'], 110 | 111 | # entry_points={ 112 | # 'console_scripts': ['mycli=mymodule:cli'], 113 | # }, 114 | install_requires=REQUIRED, 115 | include_package_data=True, 116 | license='MIT', 117 | classifiers=[ 118 | # Trove classifiers 119 | # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers 120 | 'License :: OSI Approved :: MIT License', 121 | 'Programming Language :: Python', 122 | 'Programming Language :: Python :: 3', 123 | 'Programming Language :: Python :: 3.9', 124 | 'Programming Language :: Python :: Implementation :: CPython', 125 | 'Programming Language :: Python :: Implementation :: PyPy' 126 | ], 127 | # $ setup.py publish support. 128 | cmdclass={ 129 | 'upload': UploadCommand, 130 | }, 131 | ) -------------------------------------------------------------------------------- /sondesh/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koustreak/Sondesh/1fc5274b21ac3a69de56b6e56b1a67649b486ae1/sondesh/__init__.py -------------------------------------------------------------------------------- /sondesh/apps/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koustreak/Sondesh/1fc5274b21ac3a69de56b6e56b1a67649b486ae1/sondesh/apps/__init__.py -------------------------------------------------------------------------------- /sondesh/apps/cli_app.py: -------------------------------------------------------------------------------- 1 | #koushik dutta 2 | import pyfiglet 3 | from colorama import Fore,Back, init,Style 4 | from time import sleep 5 | from tqdm import tqdm 6 | import sys 7 | import os 8 | import json 9 | from rich.console import Console 10 | from rich.table import Table 11 | from collections import defaultdict 12 | 13 | init() 14 | 15 | print() 16 | print() 17 | 18 | 19 | def print_cli_table(df,context_name=None): 20 | if df: 21 | print(Fore.CYAN + 'visualizing ' + context_name + ' parse result ' + Style.RESET_ALL) 22 | for i in df: 23 | table = Table(title='column details for '+i['table_name']) 24 | columns = ["column_name", "column_type", "size", "foreign_key", "refers_to", 25 | "on_delete", "on_update", "unique", "nullable", "default", "check"] 26 | data = list() 27 | if i['columns']: 28 | for j in i['columns']: 29 | refers_to, on_delete, on_update, is_foreign_key = None, None, None, None 30 | 31 | if j.get('references'): 32 | refers_to = str(j.get('references').get('table')) 33 | if j.get('on_delete'): 34 | on_delete = str(j.get('references').get('on_delete')) 35 | if j.get('on_update'): 36 | on_update = str(j.get('references').get('on_update')) 37 | if j.get('references'): 38 | is_foreign_key = 'yes' 39 | 40 | data.append([str(j.get('name')), str(j.get('type')), str(j.get('size')), 41 | is_foreign_key, refers_to, on_delete, on_update, 42 | str(j.get('unique')), str(j.get('nullable')), str(j.get('default')), 43 | str(j.get('check'))]) 44 | else: 45 | print(Fore.YELLOW + 'warning!! no column could be found in first sql' + Style.RESET_ALL) 46 | 47 | for col in columns: 48 | table.add_column(col) 49 | for row in data: 50 | table.add_row(*row, style='bright_green') 51 | 52 | console = Console() 53 | print(Fore.BLUE + '*****************************************************************************************'+Style.RESET_ALL) 54 | console.print(table) 55 | print() 56 | 57 | for i in df : 58 | table = Table(title='column details for ' + i['table_name']) 59 | columns = ['table property name','property value'] 60 | data = [ 61 | ['index', str(i.get('index'))], 62 | ['diststyle', str(i.get('diststyle'))], 63 | ['distkey', str(i.get('distkey'))], 64 | ['primary key', str(i.get('primary_key'))], 65 | ['sort key', str(i.get('sortkey'))], 66 | ['schema', str(i.get('schema'))], 67 | ['table space', str(i.get('tablespace'))] 68 | ] 69 | 70 | for col in columns: 71 | table.add_column(col) 72 | for row in data: 73 | table.add_row(*row, style='bright_green') 74 | 75 | console = Console() 76 | console.print(table) 77 | print(Fore.BLUE +'*****************************************************************************************'+Style.RESET_ALL) 78 | print() 79 | 80 | else: 81 | print(Fore.RED + 'Error occurred while parsing ' + context_name + ' aborting ' + Style.RESET_ALL) 82 | 83 | def main_app(): 84 | ''' 85 | This will be exposed as CLI app in setup.py 86 | :return: Object 87 | ''' 88 | f = pyfiglet.Figlet(font='big') 89 | print(Fore.CYAN + f.renderText('Compare DDL') + Style.RESET_ALL) 90 | sleep(0.5) 91 | print(Fore.BLUE + '> author : koushik dutta ') 92 | sleep(0.5) 93 | print(Fore.BLUE + '> date : 28-Dec-2022 ') 94 | sleep(0.5) 95 | print(Fore.BLUE + '> purpose : compare two DDL ') 96 | sleep(0.5) 97 | print(Fore.BLUE + '> version : 1.0.0 ') 98 | sleep(0.5) 99 | print(Fore.BLUE + '> OS : ubuntu 18.04 ') 100 | sleep(0.5) 101 | print(Fore.BLUE + '> python version : 3.8 ') 102 | sleep(0.5) 103 | print(Fore.BLUE + '> help : please give me a star in github ') 104 | sleep(0.6) 105 | print(Fore.BLUE + '> docs : read the docs making is in progress ') 106 | sleep(0.6) 107 | print(Fore.BLUE + '> unit test : check sondesh/tests ') 108 | sleep(0.6) 109 | print(Fore.BLUE + '> powered by : Flex and YACC in python ') 110 | sleep(0.6) 111 | print(Fore.BLUE + '> Supported DDL : Redshift , Oracle , Mysql , sparkSQL ( tested ) ' + Style.RESET_ALL) 112 | print() 113 | print(Fore.BLUE) 114 | 115 | with tqdm(total=100) as pbar: 116 | pbar.set_description('initiating process') 117 | pbar.update(3) 118 | sleep(0.5) 119 | try: 120 | from sondesh.dialects import redshift 121 | pbar.update(10) 122 | pbar.set_description('Loading Redshift Dialect') 123 | except: 124 | print(Fore.RED + 'No Redshift Dialect detected , aborting . To fix it contact koushik') 125 | sys.exit() 126 | 127 | try: 128 | sleep(0.5) 129 | from sondesh.dialects import oracle 130 | pbar.update(10) 131 | pbar.set_description('Loading Oracle Dialect') 132 | except: 133 | print(Fore.RED + 'No Oracle Dialect detected , aborting . To fix it contact koushik') 134 | sys.exit() 135 | 136 | try: 137 | sleep(0.5) 138 | from sondesh.dialects import spark_sql 139 | pbar.update(12) 140 | pbar.set_description('Loading spark sql Dialect') 141 | except: 142 | print(Fore.RED + 'No spark sql Dialect detected , aborting . To fix it contact koushik') 143 | sys.exit() 144 | 145 | try: 146 | sleep(0.5) 147 | from sondesh.dialects import sql 148 | pbar.update(25) 149 | pbar.set_description('Loading ansi sql Dialect') 150 | except: 151 | print(Fore.RED + 'No Ansi sql Dialect detected , aborting . To fix it contact koushik') 152 | sys.exit() 153 | 154 | try: 155 | sleep(0.5) 156 | from sondesh.dialects import mysql 157 | pbar.update(8) 158 | pbar.set_description('Loading mysql Dialect') 159 | except: 160 | print(Fore.RED + 'No mysql Dialect detected , aborting . To fix it contact koushik') 161 | sys.exit() 162 | 163 | try: 164 | sleep(0.5) 165 | from sondesh.dialects import hql 166 | pbar.update(7) 167 | pbar.set_description('Loading HiveQL Dialect') 168 | except: 169 | print(Fore.RED + 'No hiveQL Dialect detected , aborting . To fix it contact koushik') 170 | sys.exit() 171 | 172 | try: 173 | sleep(0.5) 174 | from sondesh.ddl_parser import parse_from_file 175 | pbar.update(10) 176 | pbar.set_description('Loading SQL file parser') 177 | except: 178 | print(Fore.RED + 'No .sql file parser detected , aborting . To fix it contact koushik') 179 | sys.exit() 180 | 181 | try: 182 | sleep(0.5) 183 | from sondesh.ddl_parser import parse_the_ddl 184 | pbar.update(7) 185 | pbar.set_description('Loading raw sql parser') 186 | except: 187 | print(Fore.RED + 'No raw sql parser detected , aborting . It is required to parse from user input .' 188 | ' To fix it contact koushik') 189 | sys.exit() 190 | 191 | try: 192 | sleep(0.5) 193 | from sondesh import compare 194 | pbar.update(8) 195 | pbar.set_description('Loading comparator') 196 | except: 197 | print(Fore.RED + 'No raw sql parser detected , aborting . It is required to parse from user input .' 198 | ' To fix it contact koushik') 199 | sys.exit() 200 | 201 | pbar.set_description('Everything is loaded') 202 | 203 | print() 204 | print(Fore.GREEN + "All dialects and parser have been loaded successfully"+ Style.RESET_ALL) 205 | print() 206 | print() 207 | 208 | if os.path.exists('profile.json') and os.path.getsize('profile.json'): 209 | print(Fore.BLUE + 'Profile already exist , proceeding with that , if you want to reset remove profile.json') 210 | print() 211 | else: 212 | while True: 213 | 214 | print(Fore.BLUE + 'There is no profile of you , let me set one , don\'t worry i m not a spy and this is one time only \n' + Style.RESET_ALL) 215 | name = input(Fore.BLUE + '> what should i call you : '+Style.RESET_ALL) 216 | print(Fore.BLUE + ' >> hey '+name+' welcome to DDL Comparator '+'\n') 217 | favourite_db = input(Fore.BLUE + '> which DB you like most : '+Style.RESET_ALL) 218 | purpose = input(Fore.BLUE + '> are you going to use it for commercial purpose : '+Style.RESET_ALL) 219 | what_you_do = input(Fore.BLUE + '> what is your job role : '+Style.RESET_ALL) 220 | default_outdir = input(Fore.BLUE + '> default output dir for report (leave blank for current directory) : ' + Style.RESET_ALL) 221 | cloud_platform = input(Fore.BLUE + '> which cloud platform you are going to use : ' + Style.RESET_ALL) 222 | reporting_style = input(Fore.BLUE + '> Reporting style \n1.excel\n2.html\ (leave blank for excel): ' + Style.RESET_ALL) 223 | print() 224 | profile = {name:name,favourite_db:favourite_db, 225 | purpose:purpose,what_you_do:what_you_do, 226 | default_outdir:default_outdir, 227 | cloud_platform:cloud_platform,reporting_style:reporting_style} 228 | with open('profile.json', 'w') as fp: 229 | json.dump(profile,fp) 230 | print(Fore.CYAN + 'profile has setup successfully \n'+ Style.RESET_ALL) 231 | break 232 | 233 | if os.path.exists('validator.json') and os.path.getsize('validator.json'): 234 | print(Fore.BLUE + 'DDL Validator already exist , proceeding with that , if you want to reset remove validator.json') 235 | print() 236 | else: 237 | validator_err_ct = 0 238 | validator_payload = dict() 239 | while True and validator_err_ct < 2: 240 | 241 | print(Fore.BLUE + 'There is no DDL Validator setup , let me set one, this is for first time only \n' + Style.RESET_ALL) 242 | 243 | string_vs_varchar = input(Fore.BLUE + '> Should i highlight STRING vs VARCHAR diff (regardless of size) (Y/N): '+Style.RESET_ALL) 244 | if string_vs_varchar.upper() not in ('Y','N'): 245 | print(Fore.RED + '\n Please enter either y/n'+Style.RESET_ALL) 246 | validator_err_ct+=1 247 | continue 248 | elif validator_err_ct == 2: 249 | print(Fore.RED + '\n Maximum limit reached . aborting'+Style.RESET_ALL) 250 | sys.exit() 251 | else: 252 | validator_payload['string_vs_varchar'] = string_vs_varchar 253 | validator_err_ct = 0 254 | 255 | timezone_diff = input(Fore.BLUE + '> Should i highlight timezone diff (Y/N): ' + Style.RESET_ALL) 256 | if timezone_diff.upper() not in ('Y', 'N'): 257 | print(Fore.RED + '\n Please enter either y/n' + Style.RESET_ALL) 258 | validator_err_ct += 1 259 | elif validator_err_ct == 2: 260 | print(Fore.RED + '\n Maximum limit reached . aborting' + Style.RESET_ALL) 261 | sys.exit() 262 | else: 263 | validator_payload['timezone_diff'] = timezone_diff 264 | validator_err_ct = 0 265 | 266 | encoding_diff = input(Fore.BLUE + '> Should i highlight encoding diff (Y/N): ' + Style.RESET_ALL) 267 | if encoding_diff.upper() not in ('Y', 'N'): 268 | print(Fore.RED + '\n Please enter either y/n' + Style.RESET_ALL) 269 | validator_err_ct += 1 270 | elif validator_err_ct == 2: 271 | print(Fore.RED + '\n Maximum limit reached . aborting' + Style.RESET_ALL) 272 | sys.exit() 273 | else: 274 | validator_payload['encoding_diff'] = encoding_diff 275 | validator_err_ct = 0 276 | 277 | distyle_diff = input(Fore.BLUE + '> Should i highlight distyle diff (Y/N): ' + Style.RESET_ALL) 278 | if distyle_diff.upper() not in ('Y', 'N'): 279 | print(Fore.RED + '\n Please enter either y/n' + Style.RESET_ALL) 280 | validator_err_ct += 1 281 | elif validator_err_ct == 2: 282 | print(Fore.RED + '\n Maximum limit reached . aborting' + Style.RESET_ALL) 283 | sys.exit() 284 | else: 285 | validator_payload['distyle_diff'] = distyle_diff 286 | validator_err_ct = 0 287 | 288 | with open('validator.json', 'w') as fp: 289 | json.dump(validator_payload,fp) 290 | print(Fore.CYAN + 'validator has setup successfully \n'+ Style.RESET_ALL) 291 | 292 | break 293 | 294 | # Validation profiler will be setup accordingly 295 | # if os.path.exists('validation.json') and os.path.getsize('profile.json'): 296 | error_ct = 0 297 | choice = 'none' 298 | while True and error_ct<2: 299 | choice = input(Fore.CYAN + 'Do you want to compare file or provide SQL as user input (please type either file or raw) \n'+Style.RESET_ALL) 300 | if choice.upper() not in ('FILE','RAW'): 301 | print(Fore.RED + '\n Wrong input given , answer should be either file or raw '+Style.RESET_ALL) 302 | error_ct+=1 303 | continue 304 | elif error_ct == 2: 305 | print(Fore.RED + '\n You have crossed maximum limit of choice , aborting '+Style.RESET_ALL) 306 | sys.exit() 307 | else: 308 | print(Fore.CYAN + '\n You have entered '+choice+' for this session '+Style.RESET_ALL) 309 | break 310 | 311 | print() 312 | 313 | err_dialect = 0 314 | while True and err_dialect < 2: 315 | dialect = input(Fore.BLUE + '> Which dialect you want to use now , \n' 316 | '1.redshift\n2.oracle\n3.hql\n4.snowflake\n5.mysql\n'+Style.RESET_ALL) 317 | if dialect.upper() not in ['REDSHIFT','ORACLE','SNOWFLAKE','MYSQL','HQL']: 318 | print(Fore.RED + '\n Please enter a valid value '+Style.RESET_ALL) 319 | err_dialect+=1 320 | continue 321 | elif err_dialect == 2: 322 | print(Fore.RED + '\n Exceeded maximum limit of providing input'+Style.RESET_ALL) 323 | sys.exit() 324 | else: 325 | break 326 | 327 | print() 328 | 329 | while True: 330 | if choice.upper() == 'FILE': 331 | first_file = input(Fore.BLUE + '> Your first .sql file ? '+Style.RESET_ALL) 332 | second_file = input(Fore.BLUE + '> Your second .sql file ? ' + Style.RESET_ALL) 333 | print() 334 | 335 | if os.path.exists(first_file) and os.path.getsize(first_file) : 336 | if os.path.splitext(first_file)[1].upper() != '.SQL': 337 | print(Fore.YELLOW + ' WARNING !! your first input '+first_file+' is not a .sql file '+Style.RESET_ALL) 338 | print() 339 | else: 340 | print(Fore.RED + ' file not found '+first_file+Style.RESET_ALL) 341 | print() 342 | sys.exit() 343 | 344 | if os.path.exists(second_file) and os.path.getsize(second_file): 345 | if os.path.splitext(second_file)[1].upper() != '.SQL': 346 | print(Fore.YELLOW + ' WARNING !! your second input '+second_file+' is not a .sql file '+Style.RESET_ALL) 347 | print() 348 | else: 349 | print(Fore.RED + ' file not found '+second_file+Style.RESET_ALL) 350 | print() 351 | sys.exit() 352 | 353 | print(Fore.CYAN + 'parsing '+first_file+Style.RESET_ALL) 354 | first_file_parse_result = parse_from_file(first_file) 355 | print(Fore.CYAN + 'done!!'+first_file+Style.RESET_ALL) 356 | 357 | print() 358 | 359 | print(Fore.CYAN + 'parsing '+second_file+Style.RESET_ALL) 360 | second_file_parse_result = parse_from_file(second_file) 361 | print(Fore.CYAN + 'done!!'+first_file+Style.RESET_ALL) 362 | 363 | print() 364 | 365 | print(Fore.CYAN + 'comparison engine initiated ' + Style.RESET_ALL) 366 | if first_file_parse_result and second_file_parse_result: 367 | compare.compare_df(first_file_parse_result[0], second_file_parse_result[0], first_file, second_file) 368 | 369 | print () 370 | 371 | question_ = input(Fore.CYAN + 'Do you want to see the table parse result leave blank for NO else YES '+Style.RESET_ALL) 372 | if question_.upper()=='YES' or question_.upper()=='Y' or (not question_): 373 | print_cli_table(first_file_parse_result,'first_file_parse_result') 374 | print() 375 | print_cli_table(second_file_parse_result, 'second_file_parse_result') 376 | print() 377 | 378 | else: 379 | print(Fore.YELLOW+'RAW Input Comparator has not been developed yet'+Style.RESET_ALL) 380 | ''' 381 | first_sql_input = input(Fore.BLUE + 'Please enter your first sql '+Style.RESET_ALL) 382 | second_sql_input = input(Fore.BLUE + 'Please enter your second sql ' + Style.RESET_ALL) 383 | 384 | if first_sql_input is None or second_sql_input is None: 385 | print(Fore.RED + 'Please provide both of the mandatory input'+Style.RESET_ALL) 386 | 387 | print(Fore.CYAN + 'parsing first_sql_input '+ Style.RESET_ALL) 388 | first_sql_parse_result = parse_from_file(first_sql_input) 389 | print(Fore.CYAN + 'done!!' + Style.RESET_ALL) 390 | 391 | print() 392 | 393 | print(Fore.CYAN + 'parsing second_sql_input ' + Style.RESET_ALL) 394 | second_sql_parse_result = parse_from_file(second_sql_input) 395 | print(Fore.CYAN + 'done!!' + Style.RESET_ALL) 396 | 397 | print() 398 | 399 | question_ = input('Do you want to see the table parse result leave blank for NO else YES ') 400 | if question_: 401 | print_cli_table(first_sql_parse_result, 'first_sql_parse_result') 402 | print() 403 | print_cli_table(second_sql_parse_result, 'second_sql_parse_result') 404 | print() 405 | 406 | print(Fore.CYAN + 'comparison engine initiated '+Style.RESET_ALL) 407 | if first_sql_parse_result and second_sql_parse_result: 408 | compare.compare_df(first_sql_parse_result,second_sql_parse_result,first_sql_input,second_sql_input) 409 | ''' 410 | 411 | 412 | redo_choice = input(Fore.CYAN + '> Do you want to use the tool again ? N for No , press anything else for Yes '+Style.RESET_ALL) 413 | if redo_choice.upper() == 'N': 414 | print(Fore.CYAN + 'Good Bye , have a good day\n'+ Style.RESET_ALL) 415 | break 416 | else: 417 | continue -------------------------------------------------------------------------------- /sondesh/compare.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from colorama import Fore,Back, init,Style 4 | from rich.console import Console 5 | from rich.table import Table 6 | 7 | init() 8 | 9 | def compare_df(query_one_df,query_two_df,context_one,context_two): 10 | validator = None 11 | string_vs_varchar = None 12 | timezone_diff = None 13 | encoding_diff = None 14 | distyle_diff = None 15 | 16 | if os.path.exists('../validator.json') and os.path.getsize('../validator.json'): 17 | with open('../validator.json') as fp: 18 | validator = json.load(fp) 19 | if validator : 20 | string_vs_varchar = validator['string_vs_varchar'] 21 | timezone_diff = validator['timezone_diff'] 22 | encoding_diff = validator['encoding_diff'] 23 | distyle_diff = validator['distyle_diff'] 24 | 25 | if validator is None: 26 | print(Fore.YELLOW + ' WARNING !! User validator profile is Blank ') 27 | 28 | if query_one_df and query_two_df : 29 | print(Fore.CYAN + 'visualizing compare result ' + Style.RESET_ALL) 30 | table = Table(title='comparing '+context_one+' vs '+context_two+' column level ') 31 | table_tab = Table(title='comparing ' + context_one + ' vs ' + context_two+' table level ') 32 | 33 | difference_tab = [] 34 | columns_tab = ["property name", "value in " + context_one, "value in " + context_two] 35 | 36 | if query_one_df.get('table_name')!=query_two_df.get('table_name'): 37 | difference_tab.append(['table name found in sql',query_one_df.get('table_name'),query_two_df.get('table_name')]) 38 | 39 | if query_one_df.get('tablespace')!=query_two_df.get('tablespace'): 40 | difference_tab.append(['tablespace',query_one_df.get('tablespace'),query_two_df.get('tablespace')]) 41 | 42 | if query_one_df.get('schema')!=query_two_df.get('schema'): 43 | difference_tab.append(['schema',query_one_df.get('schema'),query_two_df.get('schema')]) 44 | 45 | keys_one = None 46 | keys_two = None 47 | type_one = None 48 | type_two = None 49 | 50 | 51 | if query_one_df.get('sortkey'): 52 | keys_one = ','.join(query_one_df.get('sortkey').get('keys')) 53 | type_one = query_one_df.get('sortkey').get('type') 54 | 55 | if query_two_df.get('sortkey'): 56 | keys_two = ','.join(query_two_df.get('sortkey').get('keys')) 57 | type_two = query_two_df.get('sortkey').get('type') 58 | 59 | if (keys_one != keys_two) and (keys_one or keys_two): 60 | difference_tab.append(['sort keys',keys_one,keys_two]) 61 | 62 | if (type_one != type_two) and (type_one or type_two): 63 | difference_tab.append(['sort type',type_one,type_two]) 64 | 65 | if query_two_df.get('index') != query_one_df.get('index'): 66 | difference_tab.append(['index',','.join(query_one_df.get('index')),','.join(query_two_df.get('index'))]) 67 | 68 | if query_two_df.get('partitioned_by') != query_one_df.get('partitioned_by'): 69 | difference_tab.append(['partition',','.join(query_one_df.get('partitioned_by')), 70 | ','.join(query_two_df.get('partitioned_by'))]) 71 | 72 | if query_two_df.get('diststyle') != query_one_df.get('diststyle'): 73 | difference_tab.append(['distribution style',query_one_df.get('diststyle'),query_two_df.get('diststyle')]) 74 | 75 | if query_two_df.get('checks') != query_one_df.get('checks'): 76 | difference_tab.append(['checks constraints', ','.join(query_one_df.get('checks')), 77 | ','.join(query_two_df.get('checks'))]) 78 | 79 | if difference_tab: 80 | for col in columns_tab: 81 | table_tab.add_column(col) 82 | for row in difference_tab: 83 | table_tab.add_row(*row, style='bright_green') 84 | # table level difference 85 | 86 | console = Console() 87 | print(Fore.BLUE + '*****************************************************************************************' + Style.RESET_ALL) 88 | console.print(table_tab) 89 | print() 90 | else: 91 | print() 92 | print(Fore.GREEN + 'No Table Level Difference could be found '+Style.RESET_ALL) 93 | 94 | columns = ["column name", "property" , "value in "+context_one, "value in "+context_two] 95 | 96 | query_one_cols = query_one_df['columns'] 97 | query_two_cols = query_two_df['columns'] 98 | 99 | difference = [] 100 | 101 | for j in query_one_cols: 102 | refers_to_one, on_delete_one, on_update_one, is_foreign_key_one = None, None, None, None 103 | 104 | if j.get('references'): 105 | refers_to_one = str(j.get('references').get('table')) 106 | if j.get('on_delete'): 107 | on_delete_one = str(j.get('references').get('on_delete')) 108 | if j.get('on_update'): 109 | on_update_one = str(j.get('references').get('on_update')) 110 | if j.get('references'): 111 | is_foreign_key_one = 'yes' 112 | 113 | col_name_one = str(j.get('name')) 114 | col_type_one = str(j.get('type')) 115 | col_size_one = str(j.get('size')) 116 | isunique_one = str(j.get('unique')) 117 | isnull_one = str(j.get('nullable')) 118 | default_val_one = str(j.get('default')) 119 | check_val_one = str(j.get('check')) 120 | encode_one = str(j.get('encode')) 121 | 122 | temp_two = list(filter(lambda x:x['name']==col_name_one,query_two_cols)) 123 | col_name_two = None 124 | if temp_two: 125 | temp_two = temp_two[0] 126 | col_name_two = str(temp_two.get('name')) 127 | col_type_two = str(temp_two.get('type')) 128 | col_size_two = str(temp_two.get('size')) 129 | isunique_two = str(temp_two.get('unique')) 130 | isnull_two = str(temp_two.get('nullable')) 131 | default_val_two = str(temp_two.get('default')) 132 | check_val_two = str(temp_two.get('check')) 133 | encode_two = str(temp_two.get('encode')) 134 | 135 | if col_type_one != col_type_two: 136 | difference.append([col_name_two,'datatype',col_type_one,col_type_two]) 137 | 138 | if col_size_two != col_size_one: 139 | difference.append([col_name_two,'size',col_size_one,col_size_two]) 140 | 141 | if isunique_two != isunique_one: 142 | difference.append([col_name_two,'is unique',isunique_one,isunique_two]) 143 | 144 | if isnull_one != isnull_two: 145 | difference.append([col_name_two,'nullable',isnull_one,isnull_two]) 146 | 147 | if default_val_two != default_val_one: 148 | difference.append([col_name_two,'default value',default_val_one,default_val_two]) 149 | 150 | if check_val_two != check_val_one: 151 | difference.append([col_name_two,'check constraint',check_val_one,check_val_two]) 152 | 153 | if encode_one != encode_two: 154 | difference.append([col_name_two,'encode',encode_one,encode_two]) 155 | 156 | refers_to_two, on_delete_two, on_update_two, is_foreign_key_two = None, None, None, None 157 | 158 | if j.get('references'): 159 | refers_to_two = str(j.get('references').get('table')) 160 | if j.get('on_delete'): 161 | on_delete_two = str(j.get('references').get('on_delete')) 162 | if j.get('on_update'): 163 | on_update_two = str(j.get('references').get('on_update')) 164 | if j.get('references'): 165 | is_foreign_key_two = 'yes' 166 | 167 | if is_foreign_key_two != is_foreign_key_one: 168 | difference.append([col_name_two,'foreign key',is_foreign_key_one,is_foreign_key_two]) 169 | 170 | if refers_to_two != refers_to_one: 171 | difference.append([col_name_two,'foreign key reference',refers_to_one,refers_to_two]) 172 | 173 | if on_delete_two != on_delete_one: 174 | difference.append([col_name_two,'on delete clause',on_delete_one,on_delete_two]) 175 | 176 | if on_update_two != on_update_one: 177 | difference.append([col_name_two,'on update clause',on_update_one,on_update_two]) 178 | 179 | query_two_cols = list(filter(lambda g:g['name']!=col_name_one,query_two_cols)) 180 | 181 | else: 182 | difference.append([col_name_one,'is_found','yes','no']) 183 | 184 | if query_two_cols: 185 | for k in query_two_cols: 186 | difference.append([k['name'],'is_found','no','yes']) 187 | 188 | if difference: 189 | for col in columns: 190 | table.add_column(col) 191 | for row in difference: 192 | table.add_row(*row, style='bright_green') 193 | # table level difference 194 | 195 | console = Console() 196 | print( 197 | Fore.BLUE + '*****************************************************************************************' + Style.RESET_ALL) 198 | console.print(table) 199 | print() 200 | else: 201 | print() 202 | print(Fore.GREEN + 'No Column Level Difference could be found '+Style.RESET_ALL) 203 | -------------------------------------------------------------------------------- /sondesh/ddl_parser.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Optional 2 | 3 | from ply.lex import LexToken 4 | 5 | from sondesh import tokens as tok 6 | from sondesh.dialects.bigquery import BigQuery 7 | from sondesh.dialects.hql import HQL 8 | from sondesh.dialects.mssql import MSSQL 9 | from sondesh.dialects.mysql import MySQL 10 | from sondesh.dialects.oracle import Oracle 11 | from sondesh.dialects.redshift import Redshift 12 | from sondesh.dialects.snowflake import Snowflake 13 | from sondesh.dialects.spark_sql import SparkSQL 14 | from sondesh.dialects.sql import BaseSQL 15 | from sondesh.parser import Parser 16 | 17 | 18 | class parse_the_ddl_error(Exception): 19 | pass 20 | 21 | 22 | class parse_the_ddl( 23 | Parser, SparkSQL, Snowflake, BaseSQL, HQL, MySQL, MSSQL, Oracle, Redshift, BigQuery 24 | ): 25 | 26 | tokens = tok.tokens 27 | t_ignore = "\t \r" 28 | 29 | def get_tag_symbol_value_and_increment(self, t: LexToken) -> LexToken: 30 | # todo: need to find less hacky way to parse HQL structure types 31 | if "<" in t.value: 32 | t.type = "LT" 33 | self.lexer.lt_open += t.value.count("<") 34 | if ">" in t.value and not self.lexer.check: 35 | t.type = "RT" 36 | self.lexer.lt_open -= t.value.count(">") 37 | return t 38 | 39 | def after_columns_tokens(self, t: LexToken) -> LexToken: 40 | t.type = tok.after_columns_tokens.get(t.value.upper(), t.type) 41 | if t.type != "ID": 42 | self.lexer.after_columns = True 43 | elif self.lexer.columns_def: 44 | t.type = tok.columns_defenition.get(t.value.upper(), t.type) 45 | return t 46 | 47 | def process_body_tokens(self, t: LexToken) -> LexToken: 48 | if ( 49 | self.lexer.last_par == "RP" and not self.lexer.lp_open 50 | ) or self.lexer.after_columns: 51 | t = self.after_columns_tokens(t) 52 | elif self.lexer.columns_def: 53 | t.type = tok.columns_defenition.get(t.value.upper(), t.type) 54 | elif self.lexer.sequence: 55 | t.type = tok.sequence_reserved.get(t.value.upper(), "ID") 56 | return t 57 | 58 | def parse_tags_symbols(self, t) -> Optional[LexToken]: 59 | """like symbols < >""" 60 | if not self.lexer.check: 61 | for key in tok.symbol_tokens_no_check: 62 | if key in t.value: 63 | return self.get_tag_symbol_value_and_increment(t) 64 | 65 | def tokens_not_columns_names(self, t: LexToken) -> LexToken: 66 | 67 | t_tag = self.parse_tags_symbols(t) 68 | if t_tag: 69 | return t_tag 70 | 71 | if "ARRAY" in t.value: 72 | t.type = "ARRAY" 73 | return t 74 | elif self.lexer.is_like: 75 | t.type = tok.after_columns_tokens.get(t.value.upper(), t.type) 76 | elif not self.lexer.is_table: 77 | # if is_table mean wi already met INDEX or TABLE statement and 78 | # the definition already done and this is a string 79 | t.type = tok.defenition_statements.get( 80 | t.value.upper(), t.type 81 | ) # Check for reserved word 82 | elif self.lexer.last_token != "COMMA": 83 | t.type = tok.common_statements.get(t.value.upper(), t.type) 84 | else: 85 | t.type = tok.first_liners.get(t.value.upper(), t.type) 86 | 87 | # get tokens from other token dicts 88 | t = self.process_body_tokens(t) 89 | 90 | self.set_lexer_tags(t) 91 | 92 | return t 93 | 94 | def set_lexer_tags(self, t: LexToken) -> None: 95 | if t.type == "SEQUENCE": 96 | self.lexer.sequence = True 97 | elif t.type == "CHECK": 98 | self.lexer.check = True 99 | 100 | def t_DOT(self, t: LexToken) -> LexToken: 101 | r"\." 102 | t.type = "DOT" 103 | return self.set_last_token(t) 104 | 105 | def t_STRING(self, t: LexToken) -> LexToken: 106 | r"((\')([a-zA-Z_,`0-9:><\=\-\+.\~\%$\!() {}\[\]\/\\\"\#\*&^|?;±§@~]*)(\')){1}" 107 | t.type = "STRING" 108 | return self.set_last_token(t) 109 | 110 | def t_DQ_STRING(self, t: LexToken) -> LexToken: 111 | r"((\")([a-zA-Z_,`0-9:><\=\-\+.\~\%$\!() {}'\[\]\/\\\\#\*&^|?;±§@~]*)(\")){1}" 112 | t.type = "DQ_STRING" 113 | return self.set_last_token(t) 114 | 115 | def is_token_column_name(self, t: LexToken) -> bool: 116 | """many of reserved words can be used as column name, 117 | to decide is it a column name or not we need do some checks""" 118 | skip_id_tokens = ["(", ")", ","] 119 | return ( 120 | t.value not in skip_id_tokens 121 | and self.lexer.is_table 122 | and self.lexer.lp_open 123 | and not self.lexer.is_like 124 | and (self.lexer.last_token == "COMMA" or self.lexer.last_token == "LP") 125 | and t.value.upper() not in tok.first_liners 126 | ) 127 | 128 | def is_creation_name(self, t: LexToken) -> bool: 129 | """many of reserved words can be used as column name, 130 | to decide is it a column name or not we need do some checks""" 131 | skip_id_tokens = ["(", ")", ","] 132 | exceptional_keys = [ 133 | "SCHEMA", 134 | "TABLE", 135 | "DATABASE", 136 | "TYPE", 137 | "DOMAIN", 138 | "TABLESPACE", 139 | "INDEX", 140 | "CONSTRAINT", 141 | "EXISTS", 142 | ] 143 | return ( 144 | t.value not in skip_id_tokens 145 | and t.value.upper() not in ["IF"] 146 | and self.lexer.last_token in exceptional_keys 147 | and not self.exceptional_cases(t.value.upper()) 148 | ) 149 | 150 | def exceptional_cases(self, value: str) -> bool: 151 | if value == "TABLESPACE" and self.lexer.last_token == "INDEX": 152 | return True 153 | return False 154 | 155 | def t_AUTOINCREMENT(self, t: LexToken): 156 | r"(AUTO_INCREMENT|AUTOINCREMENT)(?i)\b" 157 | t.type = "AUTOINCREMENT" 158 | return self.set_last_token(t) 159 | 160 | def t_ID(self, t: LexToken): 161 | r"([0-9]+[.][0-9]*([e][+-]?[0-9]+)?|[0-9]\.[0-9])\w|([a-zA-Z_,0-9:><\/\\\=\-\+\~\%$@#\|&?;*\()!{}\[\]\`\[\]]+)" 162 | t.type = tok.symbol_tokens.get(t.value, "ID") 163 | 164 | if t.type == "LP": 165 | self.lexer.lp_open += 1 166 | self.lexer.columns_def = True 167 | self.lexer.last_token = "LP" 168 | return t 169 | elif self.is_token_column_name(t) or self.lexer.last_token == "DOT": 170 | t.type = "ID" 171 | elif t.type != "DQ_STRING" and self.is_creation_name(t): 172 | t.type = "ID" 173 | else: 174 | t = self.tokens_not_columns_names(t) 175 | 176 | self.capitalize_tokens(t) 177 | self.commat_type(t) 178 | 179 | self.set_lexx_tags(t) 180 | 181 | return self.set_last_token(t) 182 | 183 | def commat_type(self, t: LexToken): 184 | if t.type == "COMMA" and self.lexer.lt_open: 185 | t.type = "COMMAT" 186 | 187 | def capitalize_tokens(self, t: LexToken): 188 | if t.type != "ID" and t.type not in ["LT", "RT"]: 189 | t.value = t.value.upper() 190 | 191 | def set_parathesis_tokens(self, t: LexToken): 192 | if t.type in ["RP", "LP"]: 193 | if t.type == "RP" and self.lexer.lp_open: 194 | self.lexer.lp_open -= 1 195 | self.lexer.last_par = t.type 196 | 197 | def set_lexx_tags(self, t: LexToken): 198 | self.set_parathesis_tokens(t) 199 | 200 | if t.type == "ALTER": 201 | self.lexer.is_alter = True 202 | if t.type == "LIKE": 203 | self.lexer.is_like = True 204 | elif t.type in ["TYPE", "DOMAIN", "TABLESPACE"]: 205 | self.lexer.is_table = False 206 | elif t.type in ["TABLE", "INDEX"] and not self.lexer.is_alter: 207 | self.lexer.is_table = True 208 | 209 | def set_last_token(self, t: LexToken): 210 | self.lexer.last_token = t.type 211 | return t 212 | 213 | def p_id(self, p): 214 | """id : ID 215 | | DQ_STRING""" 216 | delimeters_to_start = ["`", '"', "["] 217 | delimeters_to_end = ["`", '"', "]"] 218 | p[0] = p[1] 219 | 220 | if self.normalize_names: 221 | for num, symbol in enumerate(delimeters_to_start): 222 | if p[0].startswith(symbol) and p[0].endswith(delimeters_to_end[num]): 223 | p[0] = p[0][1:-1] 224 | 225 | def p_id_or_string(self, p): 226 | """id_or_string : id 227 | | STRING""" 228 | p[0] = p[1] 229 | 230 | def t_error(self, t: LexToken): 231 | raise parse_the_ddl_error("Unknown symbol %r" % (t.value[0],)) 232 | 233 | def p_error(self, p): 234 | if not self.silent: 235 | raise parse_the_ddl_error(f"Unknown statement at {p}") 236 | 237 | 238 | def parse_from_file(file_path: str, parser_settings: Optional[dict] = None, **kwargs) -> List[Dict]: 239 | """get useful data from ddl""" 240 | with open(file_path, "r") as df: 241 | return parse_the_ddl(df.read(), **(parser_settings or {})).run(file_path=file_path, **kwargs) 242 | -------------------------------------------------------------------------------- /sondesh/dialects/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koustreak/Sondesh/1fc5274b21ac3a69de56b6e56b1a67649b486ae1/sondesh/dialects/__init__.py -------------------------------------------------------------------------------- /sondesh/dialects/bigquery.py: -------------------------------------------------------------------------------- 1 | class BigQuery: 2 | def p_expression_options(self, p): 3 | """expr : expr multiple_options""" 4 | p[0] = p[1] 5 | p[1].update(p[2]) 6 | 7 | def p_multiple_options(self, p): 8 | """multiple_options : options 9 | | multiple_options options 10 | """ 11 | if len(p) > 2: 12 | p[1]["options"].extend(p[2]["options"]) 13 | p[0] = p[1] 14 | else: 15 | p[0] = p[1] 16 | 17 | def p_options(self, p): 18 | """options : OPTIONS LP id_equals RP""" 19 | p_list = list(p) 20 | if not isinstance(p[1], dict): 21 | p[0] = {"options": p[3]} 22 | else: 23 | p[0] = p[1] 24 | if len(p) == 4: 25 | p[0]["options"].append(p_list[-1][0]) 26 | -------------------------------------------------------------------------------- /sondesh/dialects/hql.py: -------------------------------------------------------------------------------- 1 | from sondesh.utils import check_spec, remove_par 2 | 3 | 4 | class HQL: 5 | def p_expression_location(self, p): 6 | """expr : expr LOCATION STRING 7 | | expr LOCATION DQ_STRING""" 8 | p[0] = p[1] 9 | p_list = list(p) 10 | p[0]["location"] = p_list[-1] 11 | 12 | def p_expression_clustered(self, p): 13 | """expr : expr ID ON LP pid RP 14 | | expr ID BY LP pid RP""" 15 | p[0] = p[1] 16 | p_list = list(p) 17 | p[0][f"{p_list[2].lower()}_{p_list[3].lower()}"] = p_list[-2] 18 | 19 | def p_expression_into_buckets(self, p): 20 | """expr : expr INTO ID ID""" 21 | p[0] = p[1] 22 | p_list = list(p) 23 | p[0][f"{p_list[2].lower()}_{p_list[-1].lower()}"] = p_list[-2] 24 | 25 | def p_row_format(self, p): 26 | """row_format : ROW FORMAT SERDE 27 | | ROW FORMAT 28 | """ 29 | p_list = list(p) 30 | p[0] = {"serde": p_list[-1] == "SERDE"} 31 | 32 | def p_expression_row_format(self, p): 33 | """expr : expr row_format id 34 | | expr row_format STRING 35 | """ 36 | p[0] = p[1] 37 | p_list = list(p) 38 | if p[2]["serde"]: 39 | format = {"serde": True, "java_class": p_list[-1]} 40 | else: 41 | format = check_spec(p_list[-1]) 42 | 43 | p[0]["row_format"] = format 44 | 45 | def p_expression_with_serde(self, p): 46 | """expr : expr WITH SERDEPROPERTIES multi_assigments""" 47 | p[0] = p[1] 48 | p_list = list(p) 49 | 50 | row_format = p[0]["row_format"] 51 | row_format["properties"] = p_list[-1] 52 | p[0]["row_format"] = row_format 53 | 54 | def p_expression_tblproperties(self, p): 55 | """expr : expr TBLPROPERTIES multi_assigments""" 56 | p[0] = p[1] 57 | p[0]["tblproperties"] = list(p)[-1] 58 | 59 | def p_multi_assigments(self, p): 60 | """multi_assigments : LP assigment 61 | | multi_assigments RP 62 | | multi_assigments COMMA assigment""" 63 | p_list = remove_par(list(p)) 64 | p[0] = p_list[1] 65 | p[0].update(p_list[-1]) 66 | 67 | def p_assigment(self, p): 68 | """assigment : id id id 69 | | STRING id STRING 70 | | id id STRING 71 | | STRING id id 72 | | STRING id""" 73 | p_list = remove_par(list(p)) 74 | if "state" in self.lexer.__dict__: 75 | p[0] = {p[1]: self.lexer.state.get(p_list[-1])} 76 | else: 77 | if "=" in p_list[-1]: 78 | p_list[-1] = p_list[-1].split("=")[-1] 79 | p[0] = {p_list[1]: p_list[-1]} 80 | 81 | def p_expression_comment(self, p): 82 | """expr : expr COMMENT STRING""" 83 | p[0] = p[1] 84 | p_list = list(p) 85 | p[0]["comment"] = check_spec(p_list[-1]) 86 | 87 | def p_expression_terminated_by(self, p): 88 | """expr : expr id TERMINATED BY id 89 | | expr id TERMINATED BY STRING 90 | """ 91 | p[0] = p[1] 92 | p_list = list(p) 93 | p[0][f"{p[2].lower()}_terminated_by"] = check_spec(p_list[-1]) 94 | 95 | def p_expression_map_keys_terminated_by(self, p): 96 | """expr : expr MAP KEYS TERMINATED BY id 97 | | expr MAP KEYS TERMINATED BY STRING 98 | """ 99 | p[0] = p[1] 100 | p_list = list(p) 101 | p[0]["map_keys_terminated_by"] = check_spec(p_list[-1]) 102 | 103 | def p_expression_skewed_by(self, p): 104 | """expr : expr SKEWED BY LP id RP ON LP pid RP""" 105 | p[0] = p[1] 106 | p_list = remove_par(list(p)) 107 | p[0]["skewed_by"] = {"key": p_list[4], "on": p_list[-1]} 108 | 109 | def p_expression_collection_terminated_by(self, p): 110 | """expr : expr COLLECTION ITEMS TERMINATED BY id 111 | | expr COLLECTION ITEMS TERMINATED BY STRING 112 | """ 113 | p[0] = p[1] 114 | p_list = list(p) 115 | p[0]["collection_items_terminated_by"] = check_spec(p_list[-1]) 116 | 117 | def p_expression_stored_as(self, p): 118 | """expr : expr STORED AS id 119 | | expr STORED AS id STRING 120 | | expr STORED AS id STRING id STRING 121 | """ 122 | p[0] = p[1] 123 | p_list = list(p) 124 | if len(p_list) >= 6: 125 | # only input or output format 126 | p[0]["stored_as"] = {p_list[-2].lower(): p_list[-1]} 127 | if len(p_list) == 8: 128 | # both input & output 129 | p[0]["stored_as"].update({p_list[-4].lower(): p_list[-3]}) 130 | else: 131 | p[0]["stored_as"] = p_list[-1] 132 | 133 | def p_expression_partitioned_by_hql(self, p): 134 | """expr : expr PARTITIONED BY pid_with_type 135 | | expr PARTITIONED BY LP pid RP 136 | | expr PARTITIONED BY LP multiple_funct RP 137 | """ 138 | p[0] = p[1] 139 | p_list = remove_par(list(p)) 140 | p[0]["partitioned_by"] = p_list[-1] 141 | 142 | def p_pid_with_type(self, p): 143 | """pid_with_type : LP column 144 | | pid_with_type COMMA column 145 | | pid_with_type RP 146 | """ 147 | p_list = remove_par(list(p)) 148 | if not isinstance(p_list[1], list): 149 | p[0] = [p_list[1]] 150 | else: 151 | p[0] = p_list[1] 152 | if len(p_list) > 2: 153 | p[0].append(p_list[-1]) 154 | -------------------------------------------------------------------------------- /sondesh/dialects/mssql.py: -------------------------------------------------------------------------------- 1 | import sondesh # noqa: F401 weird issue with failed tests 2 | 3 | 4 | class MSSQL: 5 | def p_pkey_constraint(self, p): 6 | """pkey_constraint : constraint pkey_statement id LP index_pid RP 7 | | constraint pkey_statement LP index_pid RP 8 | | pkey_constraint with 9 | | pkey_constraint with ON id 10 | """ 11 | p_list = list(p) 12 | p[0] = p[1] 13 | if isinstance(p[2], dict) and "with" in p[2]: 14 | data = p_list[2] 15 | if "ON" in p_list: 16 | data["with"]["on"] = p_list[-1] 17 | elif len(p_list) == 7: 18 | data = {"primary_key": True, "columns": p_list[-2], p[3]: True} 19 | else: 20 | data = {"primary_key": True, "columns": p_list[-2]} 21 | 22 | p[0]["constraint"].update(data) 23 | 24 | def p_with(self, p): 25 | """with : WITH with_args""" 26 | p_list = list(p) 27 | p[0] = {"with": {"properties": [], "on": None}} 28 | if ")" not in p_list: 29 | p[0]["with"]["properties"] = p_list[-1]["properties"] 30 | 31 | def p_equals(self, p): 32 | """equals : id id id 33 | | id id ON 34 | | id id id DOT id 35 | """ 36 | p_list = list(p) 37 | if "." in p_list: 38 | p[0] = {"name": p_list[1], "value": f"{p_list[3]}.{p_list[5]}"} 39 | else: 40 | p[0] = {"name": p_list[-3], "value": p_list[-1]} 41 | 42 | def p_with_args(self, p): 43 | """with_args : LP equals 44 | | with_args COMMA equals 45 | | with_args with_args 46 | | with_args RP 47 | """ 48 | p_list = list(p) 49 | if isinstance(p[1], dict): 50 | p[0] = p[1] 51 | else: 52 | p[0] = {"properties": []} 53 | if ")" != p_list[2]: 54 | if ")" == p_list[-1]: 55 | p[0]["properties"].append(p_list[-1]) 56 | else: 57 | p[0]["properties"].append(p_list[-1]) 58 | 59 | def p_period_for(self, p): 60 | """period_for : id FOR id LP pid RP""" 61 | p[0] = {"period_for_system_time": p[5]} 62 | 63 | def p_expression_on_primary(self, p): 64 | """expr : expr ON id""" 65 | p[0] = p[1] 66 | p[0]["on"] = p[3] 67 | 68 | def p_expression_with(self, p): 69 | """expr : expr with""" 70 | p[0] = p[1] 71 | p[0].update(p[2]) 72 | 73 | def p_expression_text_image_on(self, p): 74 | """expr : expr TEXTIMAGE_ON id""" 75 | p[0] = p[1] 76 | p[0].update({"textimage_on": p[3]}) 77 | -------------------------------------------------------------------------------- /sondesh/dialects/mysql.py: -------------------------------------------------------------------------------- 1 | import sondesh # noqa: F401 weird issue with failed tests 2 | 3 | 4 | class MySQL: 5 | def p_on_update(self, p): 6 | """on_update : ON UPDATE id 7 | | ON UPDATE STRING 8 | | ON UPDATE f_call 9 | """ 10 | p_list = list(p) 11 | if not ")" == p_list[-1]: 12 | p[0] = {"on_update": p_list[-1]} 13 | else: 14 | p[0] = {"on_update": p_list[-2]} 15 | -------------------------------------------------------------------------------- /sondesh/dialects/oracle.py: -------------------------------------------------------------------------------- 1 | from sondesh.utils import remove_par 2 | 3 | 4 | class Oracle: 5 | def p_encrypt(self, p): 6 | """encrypt : ENCRYPT 7 | | encrypt NO SALT 8 | | encrypt SALT 9 | | encrypt USING STRING 10 | | encrypt STRING 11 | """ 12 | p_list = list(p) 13 | if isinstance(p[1], dict): 14 | p[0] = p[1] 15 | if "NO" in p_list: 16 | p[0]["encrypt"]["salt"] = False 17 | elif "USING" in p_list: 18 | p[0]["encrypt"]["encryption_algorithm"] = p_list[-1] 19 | elif "SALT" not in p_list: 20 | p[0]["encrypt"]["integrity_algorithm"] = p_list[-1] 21 | 22 | else: 23 | p[0] = { 24 | "encrypt": { 25 | "salt": True, 26 | "encryption_algorithm": "'AES192'", 27 | "integrity_algorithm": "SHA-1", 28 | } 29 | } 30 | 31 | def p_storage(self, p): 32 | """storage : STORAGE LP 33 | | storage id id 34 | | storage id id RP 35 | """ 36 | # Initial 5m Next 5m Maxextents Unlimited 37 | p_list = remove_par(list(p)) 38 | param = {} 39 | if len(p_list) == 4: 40 | param = {p_list[2].lower(): p_list[3]} 41 | if isinstance(p_list[1], dict): 42 | p[0] = p[1] 43 | else: 44 | p[0] = {} 45 | p[0].update(param) 46 | 47 | def p_expr_storage(self, p): 48 | """expr : expr storage""" 49 | p_list = list(p) 50 | p[0] = p[1] 51 | p[0]["storage"] = p_list[-1] 52 | 53 | def p_expr_index(self, p): 54 | """expr : expr ID INDEX""" 55 | p[0] = p[1] 56 | p[0][f"{p[2].lower()}_index"] = True 57 | -------------------------------------------------------------------------------- /sondesh/dialects/redshift.py: -------------------------------------------------------------------------------- 1 | class Redshift: 2 | def p_expression_distkey(self, p): 3 | """expr : expr id LP id RP""" 4 | p_list = list(p) 5 | p[1].update({"distkey": p_list[-2]}) 6 | p[0] = p[1] 7 | 8 | def p_encode(self, p): 9 | """encode : ENCODE id""" 10 | p_list = list(p) 11 | p[0] = {"encode": p_list[-1]} 12 | 13 | def p_expression_diststyle(self, p): 14 | """expr : expr id id 15 | | expr id KEY 16 | """ 17 | p_list = list(p) 18 | p[1].update({p_list[-2]: p_list[-1]}) 19 | p[0] = p[1] 20 | 21 | def p_expression_sortkey(self, p): 22 | """expr : expr id id LP pid RP""" 23 | p_list = list(p) 24 | p[1].update({"sortkey": {"type": p_list[2], "keys": p_list[-2]}}) 25 | p[0] = p[1] 26 | -------------------------------------------------------------------------------- /sondesh/dialects/snowflake.py: -------------------------------------------------------------------------------- 1 | from sondesh.utils import remove_par 2 | 3 | 4 | class Snowflake: 5 | def p_clone(self, p): 6 | """clone : CLONE id""" 7 | p_list = list(p) 8 | p[0] = {"clone": {"from": p_list[-1]}} 9 | 10 | def p_expression_cluster_by(self, p): 11 | """expr : expr CLUSTER BY LP pid RP 12 | | expr CLUSTER BY pid 13 | """ 14 | p[0] = p[1] 15 | p_list = remove_par(list(p)) 16 | p[0]["cluster_by"] = p_list[-1] 17 | 18 | def p_table_comment(self, p): 19 | """expr : expr option_comment 20 | """ 21 | p[0] = p[1] 22 | if p[2]: 23 | p[0].update(p[2]) 24 | 25 | def p_option_comment(self, p): 26 | """option_comment : ID STRING 27 | | ID DQ_STRING 28 | | COMMENT ID STRING 29 | | COMMENT ID DQ_STRING 30 | """ 31 | p_list = remove_par(list(p)) 32 | if "comment" in p[1].lower(): 33 | p[0] = {"comment": p_list[-1]} 34 | -------------------------------------------------------------------------------- /sondesh/dialects/spark_sql.py: -------------------------------------------------------------------------------- 1 | class SparkSQL: 2 | def p_expression_using(self, p): 3 | """expr : expr using""" 4 | p[0] = p[1] 5 | p[1].update(p[2]) 6 | 7 | def p_using(self, p): 8 | """using : USING id""" 9 | p_list = list(p) 10 | p[0] = {"using": p_list[-1]} 11 | -------------------------------------------------------------------------------- /sondesh/dialects/sql.py: -------------------------------------------------------------------------------- 1 | import re 2 | from collections import defaultdict 3 | from copy import deepcopy 4 | from typing import Any, Dict, List, Optional, Tuple, Union 5 | 6 | from sondesh.utils import check_spec, remove_par 7 | 8 | auth = "AUTHORIZATION" 9 | 10 | 11 | class AfterColumns: 12 | def p_expression_partition_by(self, p: List) -> None: 13 | """expr : expr PARTITION BY LP pid RP 14 | | expr PARTITION BY id LP pid RP 15 | | expr PARTITION BY pid 16 | | expr PARTITION BY id pid""" 17 | p[0] = p[1] 18 | p_list = list(p) 19 | _type = None 20 | if isinstance(p[4], list): 21 | columns = p[4] 22 | else: 23 | columns = p_list[-2] 24 | if isinstance(p[4], str) and p[4].lower() != "(": 25 | _type = p[4] 26 | p[0]["partition_by"] = {"columns": columns, "type": _type} 27 | 28 | 29 | class Database: 30 | def p_expression_create_database(self, p: List) -> None: 31 | """expr : expr database_base""" 32 | p[0] = p[1] 33 | p_list = list(p) 34 | p[0].update(p_list[-1]) 35 | 36 | def p_database_base(self, p: List) -> None: 37 | """database_base : CREATE DATABASE id 38 | | CREATE ID DATABASE id 39 | | database_base clone 40 | """ 41 | if isinstance(p[1], dict): 42 | p[0] = p[1] 43 | else: 44 | p[0] = {} 45 | p_list = list(p) 46 | if isinstance(p_list[-1], dict): 47 | p[0].update(p_list[-1]) 48 | else: 49 | p[0]["database_name"] = p_list[-1] 50 | if len(p_list) == 5: 51 | p[0][p[2].lower()] = True 52 | 53 | 54 | class TableSpaces: 55 | @staticmethod 56 | def get_tablespace_data(p_list): 57 | if p_list[1] == "TABLESPACE": 58 | _type = None 59 | temp = False 60 | else: 61 | if p_list[1].upper() == "TEMPORARY": 62 | _type = None 63 | temp = True 64 | else: 65 | _type = p_list[1] 66 | if p_list[2].upper() == "TEMPORARY": 67 | temp = True 68 | else: 69 | temp = False 70 | if isinstance(p_list[-1], dict): 71 | properties = p_list[-1] 72 | tablespace_name = p_list[-2] 73 | else: 74 | properties = None 75 | tablespace_name = p_list[-1] 76 | result = { 77 | "tablespace_name": tablespace_name, 78 | "properties": properties, 79 | "type": _type, 80 | "temporary": temp, 81 | } 82 | return result 83 | 84 | def p_expression_create_tablespace(self, p: List) -> None: 85 | """expr : CREATE TABLESPACE id properties 86 | | CREATE id TABLESPACE id properties 87 | | CREATE id TABLESPACE id 88 | | CREATE TABLESPACE id 89 | | CREATE id id TABLESPACE id 90 | | CREATE id id TABLESPACE id properties 91 | """ 92 | p_list = list(p) 93 | p[0] = self.get_tablespace_data(p_list[1:]) 94 | 95 | def p_properties(self, p: List) -> None: 96 | """properties : property 97 | | properties property""" 98 | p_list = list(p) 99 | if len(p_list) == 3: 100 | p[0] = p[1] 101 | p[0].update(p[2]) 102 | else: 103 | p[0] = p[1] 104 | 105 | def p_property(self, p: List) -> None: 106 | """property : id id 107 | | id STRING 108 | | id ON 109 | | id STORAGE 110 | | id ROW 111 | """ 112 | p[0] = {p[1]: p[2]} 113 | 114 | 115 | class Table: 116 | @staticmethod 117 | def add_if_not_exists(data: Dict, p_list: List): 118 | if "EXISTS" in p_list: 119 | data["if_not_exists"] = True 120 | return data 121 | 122 | def p_create_table(self, p: List): 123 | """create_table : CREATE TABLE IF NOT EXISTS 124 | | CREATE TABLE 125 | | CREATE OR REPLACE TABLE IF NOT EXISTS 126 | | CREATE OR REPLACE TABLE 127 | | CREATE id TABLE IF NOT EXISTS 128 | | CREATE id TABLE 129 | | CREATE OR REPLACE id TABLE IF NOT EXISTS 130 | | CREATE OR REPLACE id TABLE 131 | 132 | """ 133 | # id - for EXTERNAL, TRANSIENT, TEMPORARY 134 | # get schema & table name 135 | p[0] = {} 136 | p_list = list(p) 137 | self.add_if_not_exists(p[0], p_list) 138 | 139 | if 'REPLACE' in p_list: 140 | p[0]["replace"] = True 141 | 142 | id_key = p_list[4] if 'REPLACE' in p_list else p_list[2] 143 | id_key = id_key.upper() 144 | 145 | if id_key in ["EXTERNAL", "TRANSIENT"]: 146 | p[0][id_key.lower()] = True 147 | elif id_key in ["TEMP", "TEMPORARY"]: 148 | p[0]["temp"] = True 149 | 150 | 151 | class Column: 152 | def p_column_property(self, p: List): 153 | """c_property : id id""" 154 | p_list = list(p) 155 | if p[1].lower() == "auto": 156 | p[0] = {"increment": True} 157 | else: 158 | p[0] = {"property": {p_list[1]: p_list[-1]}} 159 | 160 | def set_base_column_propery(self, p: List) -> Dict: 161 | 162 | if "." in list(p): 163 | type_str = f"{p[2]}.{p[4]}" 164 | else: 165 | type_str = p[2] 166 | if isinstance(p[1], dict): 167 | p[0] = p[1] 168 | else: 169 | size = None 170 | p[0] = {"name": p[1], "type": type_str, "size": size} 171 | return p[0] 172 | 173 | @staticmethod 174 | def parse_complex_type(p_list: List[str]) -> str: 175 | # for complex <> types 176 | start_index = 1 177 | _type = "" 178 | if isinstance(p_list[1], dict): 179 | _type = p_list[1]["type"] 180 | start_index = 2 181 | for elem in p_list[start_index:]: 182 | if isinstance(elem, list): 183 | for _elem in elem: 184 | _type += f" {_elem.rstrip()}" 185 | elif "ARRAY" in elem and elem != "ARRAY": 186 | _type += elem 187 | else: 188 | _type += f" {elem}" 189 | return _type 190 | 191 | def p_c_type(self, p: List) -> None: 192 | """c_type : id 193 | | id id 194 | | id id id id 195 | | id id id 196 | | id DOT id 197 | | tid 198 | | ARRAY 199 | | c_type ARRAY 200 | | c_type tid 201 | """ 202 | p[0] = {} 203 | p_list = remove_par(list(p)) 204 | _type = None 205 | 206 | if len(p_list) == 2: 207 | _type = p_list[-1] 208 | elif isinstance(p[1], str) and p[1].lower() == "encode": 209 | p[0] = {"property": {"encode": p[2]}} 210 | else: 211 | _type = self.parse_complex_type(p_list) 212 | if _type: 213 | _type = self.process_type(_type, p_list, p) 214 | p[0]["type"] = _type 215 | 216 | def process_type(self, _type: Union[str, List], p_list: List, p: List) -> str: 217 | 218 | if isinstance(_type, list): 219 | _type = _type[0] 220 | 221 | elif isinstance(p_list[-1], str) and p_list[-1].lower() == "distkey": 222 | p[0] = {"property": {"distkey": True}} 223 | _type = _type.split("distkey")[0] 224 | 225 | _type = _type.strip().replace('" . "', '"."') 226 | 227 | _type = self.process_array_types(_type, p_list) 228 | return _type 229 | 230 | @staticmethod 231 | def process_array_types(_type: str, p_list: List) -> str: 232 | if "<" not in _type and "ARRAY" in _type: 233 | if "[" not in p_list[-1]: 234 | _type = _type.replace(" ARRAY", "[]").replace("ARRAY", "[]") 235 | else: 236 | _type = _type.replace("ARRAY", "") 237 | elif "<" in _type and "[]" in _type: 238 | _type = _type.replace("[]", "ARRAY") 239 | return _type 240 | 241 | @staticmethod 242 | def get_size(p_list: List): 243 | if p_list[-1].isnumeric(): 244 | size = int(p_list[-1]) 245 | else: 246 | size = p_list[-1] 247 | if len(p_list) != 3: 248 | if p_list[-3] != "*": 249 | # oracle can contain * in column size 250 | try: 251 | value_0 = int(p_list[-3]) 252 | except ValueError: 253 | # we have column like p Geometry(MultiPolygon, 26918) 254 | value_0 = p_list[-3] 255 | else: 256 | value_0 = p_list[-3] 257 | size = (value_0, int(p_list[-1])) 258 | return size 259 | 260 | @staticmethod 261 | def get_column_details(p_list: List, p: List): 262 | if p_list[-1].get("type"): 263 | p[0]["type"] += f"{p_list[-1]['type'].strip()}" 264 | elif p_list[-1].get("comment"): 265 | p[0].update(p_list[-1]) 266 | elif p_list[-1].get("property"): 267 | for key, value in p_list[-1]["property"].items(): 268 | p[0][key] = value 269 | p_list.pop(-1) 270 | 271 | @staticmethod 272 | def check_type_parameter(size: Union[tuple, int]) -> bool: 273 | if isinstance(size, tuple) and not ( 274 | isinstance(size[0], str) and size[0].strip() == '*') and not ( 275 | isinstance(size[0], int) or isinstance(size[0], float)): 276 | return True 277 | return False 278 | 279 | @staticmethod 280 | def process_oracle_type_size(p_list): 281 | if p_list[-1] == ')' and p_list[-4] == '(': 282 | # for Oracle sizes like 30 CHAR 283 | p_list[-3] += f" {p_list[-2]}" 284 | del p_list[-2] 285 | return p_list 286 | 287 | def p_column(self, p: List) -> None: 288 | """column : id c_type 289 | | column comment 290 | | column LP id RP 291 | | column LP id id RP 292 | | column LP id RP c_type 293 | | column LP id COMMA id RP 294 | | column LP id COMMA id RP c_type 295 | """ 296 | p[0] = self.set_base_column_propery(p) 297 | p_list = list(p) 298 | 299 | p_list = self.process_oracle_type_size(p_list) 300 | 301 | p_list = remove_par(p_list) 302 | 303 | if isinstance(p_list[-1], dict) and "type" in p_list[-1] and len(p_list) <= 3: 304 | p[0]["type"] = p_list[-1]["type"] 305 | if p_list[-1].get("property"): 306 | for key, value in p_list[-1]["property"].items(): 307 | p[0][key] = value 308 | elif isinstance(p_list[-1], dict): 309 | self.get_column_details(p_list, p) 310 | self.set_column_size(p_list, p) 311 | 312 | def set_column_size(self, p_list: List, p: List): 313 | if ( 314 | not isinstance(p_list[-1], dict) 315 | and bool(re.match(r"[0-9]+", p_list[-1])) 316 | or p_list[-1] == "max" 317 | ): 318 | size = self.get_size(p_list) 319 | if self.check_type_parameter(size): 320 | p[0]["type_parameters"] = size 321 | else: 322 | p[0]["size"] = size 323 | 324 | @staticmethod 325 | def set_property(p: List) -> List: 326 | for item in p[1:]: 327 | if isinstance(item, dict): 328 | if "property" in item: 329 | for key, value in item["property"].items(): 330 | p[0][key] = value 331 | del item["property"] 332 | p[0].update(item) 333 | return p 334 | 335 | @staticmethod 336 | def get_column_properties(p_list: List) -> Tuple: 337 | pk = False 338 | nullable = True 339 | default = None 340 | unique = False 341 | references = None 342 | if isinstance(p_list[-1], str): 343 | if p_list[-1].upper() == "KEY": 344 | pk = True 345 | nullable = False 346 | elif p_list[-1].upper() == "UNIQUE": 347 | unique = True 348 | elif isinstance(p_list[-1], dict) and "references" in p_list[-1]: 349 | p_list[-1]["references"]["column"] = p_list[-1]["references"]["columns"][0] 350 | del p_list[-1]["references"]["columns"] 351 | references = p_list[-1]["references"] 352 | return pk, default, unique, references, nullable 353 | 354 | def p_autoincrement(self, p: List) -> None: 355 | """ autoincrement : AUTOINCREMENT""" 356 | p[0] = {"autoincrement": True} 357 | 358 | def p_defcolumn(self, p: List) -> None: 359 | """defcolumn : column 360 | | defcolumn comment 361 | | defcolumn null 362 | | defcolumn encode 363 | | defcolumn PRIMARY KEY 364 | | defcolumn UNIQUE KEY 365 | | defcolumn UNIQUE 366 | | defcolumn check_ex 367 | | defcolumn default 368 | | defcolumn collate 369 | | defcolumn enforced 370 | | defcolumn ref 371 | | defcolumn foreign ref 372 | | defcolumn encrypt 373 | | defcolumn generated 374 | | defcolumn c_property 375 | | defcolumn on_update 376 | | defcolumn options 377 | | defcolumn autoincrement 378 | """ 379 | p[0] = p[1] 380 | p_list = list(p) 381 | 382 | pk, default, unique, references, nullable = self.get_column_properties(p_list) 383 | 384 | self.set_property(p) 385 | 386 | p[0]["references"] = p[0].get("references", references) 387 | p[0]["unique"] = unique or p[0].get("unique", unique) 388 | p[0]["primary_key"] = pk or p[0].get("primary_key", pk) 389 | p[0]["nullable"] = ( 390 | nullable if nullable is not True else p[0].get("nullable", nullable) 391 | ) 392 | p[0]["default"] = p[0].get("default", default) 393 | p[0]["check"] = p[0].get("check", None) 394 | if isinstance(p_list[-1], dict) and p_list[-1].get("encode"): 395 | p[0]["encode"] = p[0].get("encode", p_list[-1]["encode"]) 396 | p[0]["check"] = self.set_check_in_columm(p[0].get("check")) 397 | 398 | @staticmethod 399 | def set_check_in_columm(check: Optional[List]) -> Optional[str]: 400 | if check: 401 | check_statement = "" 402 | for n, item in enumerate(check): 403 | if isinstance(item, list): 404 | in_clause = ", ".join(item) 405 | check_statement += f" ({in_clause})" 406 | else: 407 | check_statement += f" {item}" if n > 0 else f"{item}" 408 | 409 | return check_statement 410 | 411 | def p_check_ex(self, p: List) -> None: 412 | """check_ex : check_st 413 | | constraint check_st 414 | """ 415 | name = None 416 | if isinstance(p[1], dict): 417 | if "constraint" in p[1]: 418 | p[0] = { 419 | "check": { 420 | "constraint_name": p[1]["constraint"]["name"], 421 | "statement": " ".join(p[2]["check"]), 422 | } 423 | } 424 | elif "check" in p[1]: 425 | p[0] = p[1] 426 | if isinstance(p[1], list): 427 | p[0] = { 428 | "check": {"constraint_name": name, "statement": p[1]["check"]} 429 | } 430 | if len(p) >= 3: 431 | for item in list(p)[2:]: 432 | p[0]["check"]["statement"].append(item) 433 | else: 434 | p[0] = {"check": {"statement": [p[2]], "constraint_name": name}} 435 | 436 | 437 | class Schema: 438 | def p_expression_schema(self, p: List) -> None: 439 | """expr : create_schema 440 | | create_database 441 | | expr id 442 | | expr clone 443 | """ 444 | p[0] = p[1] 445 | p_list = list(p) 446 | 447 | if isinstance(p_list[-1], dict): 448 | p[0].update(p_list[-1]) 449 | elif len(p) > 2: 450 | p[0]["authorization"] = p[2] 451 | 452 | def set_properties_for_schema_and_database(self, p: List, p_list: List) -> None: 453 | if not p[0].get("properties"): 454 | if len(p_list) == 3: 455 | properties = p_list[-1] 456 | elif len(p_list) > 3: 457 | properties = {p_list[-3]: p_list[-1]} 458 | else: 459 | properties = {} 460 | if properties: 461 | p[0]["properties"] = properties 462 | else: 463 | p[0]["properties"].update({p_list[-3]: p_list[-1]}) 464 | 465 | def set_auth_property_in_schema(self, p: List, p_list: List) -> None: 466 | if p_list[2] == auth: 467 | p[0] = {"schema_name": p_list[3], auth.lower(): p_list[3]} 468 | else: 469 | p[0] = {"schema_name": p_list[2], auth.lower(): p_list[-1]} 470 | 471 | def p_c_schema(self, p: List) -> None: 472 | """c_schema : CREATE SCHEMA 473 | | CREATE ID SCHEMA""" 474 | 475 | if len(p) == 4: 476 | p[0] = {"remote": True} 477 | 478 | def p_create_schema(self, p: List) -> None: 479 | """create_schema : c_schema id id 480 | | c_schema id id id 481 | | c_schema id 482 | | c_schema id DOT id 483 | | c_schema id option_comment 484 | | c_schema id DOT id option_comment 485 | | c_schema IF NOT EXISTS id 486 | | c_schema IF NOT EXISTS id DOT id 487 | | create_schema id id id 488 | | create_schema id id STRING 489 | | create_schema options 490 | """ 491 | p_list = list(p) 492 | 493 | p[0] = {} 494 | auth_index = None 495 | 496 | if "comment" in p_list[-1]: 497 | p[0].update(p_list[-1]) 498 | del p_list[-1] 499 | 500 | self.add_if_not_exists(p[0], p_list) 501 | if isinstance(p_list[1], dict): 502 | p[0] = p_list[1] 503 | self.set_properties_for_schema_and_database(p, p_list) 504 | elif auth in p_list: 505 | auth_index = p_list.index(auth) 506 | self.set_auth_property_in_schema(p, p_list) 507 | 508 | if isinstance(p_list[-1], str): 509 | if auth_index: 510 | schema_name = p_list[auth_index - 1] 511 | if schema_name is None: 512 | schema_name = p_list[auth_index + 1] 513 | else: 514 | schema_name = p_list[-1] 515 | p[0]["schema_name"] = schema_name.replace("`", "") 516 | 517 | p[0] = self.set_project_in_schema(p[0], p_list, auth_index) 518 | 519 | @staticmethod 520 | def set_project_in_schema(data: Dict, p_list: List, auth_index: int) -> Dict: 521 | if len(p_list) > 4 and not auth_index and "." in p_list: 522 | data["project"] = p_list[-3].replace("`", "") 523 | return data 524 | 525 | def p_create_database(self, p: List) -> None: 526 | """create_database : database_base 527 | | create_database id id id 528 | | create_database id id STRING 529 | | create_database options 530 | """ 531 | p_list = list(p) 532 | 533 | if isinstance(p_list[1], dict): 534 | p[0] = p_list[1] 535 | self.set_properties_for_schema_and_database(p, p_list) 536 | else: 537 | p[0] = {f"{p[2].lower()}_name": p_list[-1]} 538 | 539 | 540 | class Drop: 541 | def p_expression_drop_table(self, p: List) -> None: 542 | """expr : DROP TABLE id 543 | | DROP TABLE id DOT id 544 | """ 545 | # get schema & table name 546 | p_list = list(p) 547 | schema = None 548 | if len(p) > 4: 549 | if "." in p: 550 | schema = p_list[-3] 551 | table_name = p_list[-1] 552 | else: 553 | table_name = p_list[-1] 554 | p[0] = {"schema": schema, "table_name": table_name} 555 | 556 | 557 | class Type: 558 | def p_multiple_column_names(self, p: List) -> None: 559 | """multiple_column_names : column 560 | | multiple_column_names COMMA 561 | | multiple_column_names column 562 | """ 563 | p_list = list(p) 564 | if isinstance(p[1], dict): 565 | p[0] = [p[1]] 566 | else: 567 | p[0] = p[1] 568 | if p_list[-1] != ",": 569 | p[0].append(p_list[-1]) 570 | 571 | @staticmethod 572 | def add_columns_property_for_type(data: Dict, p_list: List) -> Dict: 573 | if "TABLE" in p_list or isinstance(p_list[-1], dict) and p_list[-1].get("name"): 574 | if not data["properties"].get("columns"): 575 | data["properties"]["columns"] = [] 576 | data["properties"]["columns"].append(p_list[-1]) 577 | return data 578 | 579 | @staticmethod 580 | def set_base_type(data: Dict, p_list: List) -> Dict: 581 | if len(p_list) > 3: 582 | data["base_type"] = p_list[2] 583 | else: 584 | data["base_type"] = None 585 | return data 586 | 587 | @staticmethod 588 | def process_str_base_type(data: Dict, p_list: List) -> Dict: 589 | base_type = data["base_type"].upper() 590 | if base_type == "ENUM": 591 | data["properties"]["values"] = p_list[3] 592 | elif data["base_type"] == "OBJECT": 593 | if "type" in p_list[3][0]: 594 | data["properties"]["attributes"] = p_list[3] 595 | return data 596 | 597 | def p_type_definition(self, p: List) -> None: # noqa: C901 598 | """type_definition : type_name id LP pid RP 599 | | type_name id LP multiple_column_names RP 600 | | type_name LP id_equals RP 601 | | type_name TABLE LP defcolumn 602 | | type_definition COMMA defcolumn 603 | | type_definition RP 604 | """ 605 | p_list = remove_par(list(p)) 606 | p[0] = p[1] 607 | if not p[0].get("properties"): 608 | p[0]["properties"] = {} 609 | 610 | p[0] = self.add_columns_property_for_type(p[0], p_list) 611 | 612 | p[0] = self.set_base_type(p[0], p_list) 613 | 614 | if isinstance(p[0]["base_type"], str): 615 | p[0] = self.process_str_base_type(p[0], p_list) 616 | elif isinstance(p_list[-1], list): 617 | for item in p_list[-1]: 618 | p[0]["properties"].update(item) 619 | 620 | def p_expression_type_as(self, p: List) -> None: 621 | """expr : type_definition""" 622 | p[0] = p[1] 623 | 624 | def p_type_name(self, p: List) -> None: 625 | """type_name : type_create id AS 626 | | type_create id DOT id AS 627 | | type_create id DOT id 628 | | type_create id 629 | """ 630 | p_list = list(p) 631 | p[0] = {} 632 | if "." not in p_list: 633 | p[0]["schema"] = None 634 | p[0]["type_name"] = p_list[2] 635 | else: 636 | p[0]["schema"] = p[2] 637 | p[0]["type_name"] = p_list[4] 638 | 639 | def p_type_create(self, p: List) -> None: 640 | """type_create : CREATE TYPE 641 | | CREATE OR REPLACE TYPE 642 | """ 643 | p[0] = None 644 | 645 | 646 | class Domain: 647 | def p_expression_domain_as(self, p: List) -> None: 648 | """expr : domain_name id LP pid RP""" 649 | p_list = list(p) 650 | p[0] = p[1] 651 | p[0]["base_type"] = p[2] 652 | p[0]["properties"] = {} 653 | if p[0]["base_type"] == "ENUM": 654 | p[0]["properties"]["values"] = p_list[4] 655 | 656 | def p_domain_name(self, p: List) -> None: 657 | """domain_name : CREATE DOMAIN id AS 658 | | CREATE DOMAIN id DOT id AS 659 | | CREATE DOMAIN id DOT id 660 | | CREATE DOMAIN id 661 | """ 662 | p_list = list(p) 663 | p[0] = {} 664 | if "." not in p_list: 665 | p[0]["schema"] = None 666 | else: 667 | p[0]["schema"] = p[3] 668 | p[0]["domain_name"] = p_list[-2] 669 | 670 | 671 | class BaseSQL( 672 | Database, Table, Drop, Domain, Column, AfterColumns, Type, Schema, TableSpaces 673 | ): 674 | def clean_up_id_list_in_equal(self, p_list: List) -> List: # noqa R701 675 | if isinstance(p_list[1], str) and p_list[1].endswith("="): 676 | p_list[1] = p_list[1][:-1] 677 | elif "," in p_list: 678 | if len(p_list) == 4: 679 | p_list = p_list[-1].split("=") 680 | elif len(p_list) == 5 and p_list[-2].endswith("="): 681 | p_list[-2] = p_list[-2][:-1] 682 | elif "=" == p_list[-2]: 683 | p_list.pop(-2) 684 | return p_list 685 | 686 | def get_property(self, p_list: List) -> Dict: 687 | _property = None 688 | if not isinstance(p_list[-2], list): 689 | _value = True 690 | value = None 691 | if p_list[-2]: 692 | if not p_list[-2] == "=": 693 | key = p_list[-2] 694 | else: 695 | key = p_list[-3] 696 | 697 | else: 698 | _value = False 699 | key = p_list[-1] 700 | if "=" in key: 701 | key = key.split("=") 702 | if _value: 703 | value = f"{key[1]} {p_list[-1]}" 704 | key = key[0] 705 | else: 706 | value = p_list[-1] 707 | _property = {key: value} 708 | else: 709 | _property = p_list[-2][0] 710 | return _property 711 | 712 | def p_id_equals(self, p: List) -> None: 713 | """id_equals : id id id_or_string 714 | | id id_or_string 715 | | id_equals COMMA 716 | | id_equals COMMA id id id_or_string 717 | | id 718 | | id_equals LP pid RP 719 | | id_equals LP pid RP id 720 | | id_equals COMMA id id 721 | | id_equals COMMA id 722 | """ 723 | p_list = remove_par(list(p)) 724 | if p_list[-1] == "]": 725 | p_list = p_list[:-1] 726 | if isinstance(p_list[-1], list): 727 | p[0] = p[1] 728 | p[0][-1][list(p[0][-1].keys())[0]] = p_list[-1] 729 | else: 730 | p_list = self.clean_up_id_list_in_equal(p_list) 731 | _property = self.get_property(p_list) 732 | 733 | if _property: 734 | if not isinstance(p[1], list): 735 | p[0] = [_property] 736 | else: 737 | p[0] = p[1] 738 | if not p_list[-1] == ",": 739 | p[0].append(_property) 740 | 741 | def p_expression_index(self, p: List) -> None: 742 | """expr : index_table_name LP index_pid RP""" 743 | p_list = remove_par(list(p)) 744 | p[0] = p[1] 745 | for item in ["detailed_columns", "columns"]: 746 | if item not in p[0]: 747 | p[0][item] = p_list[-1][item] 748 | else: 749 | p[0][item].extend(p_list[-1][item]) 750 | 751 | def p_index_table_name(self, p: List) -> None: 752 | """index_table_name : create_index ON id 753 | | create_index ON id DOT id 754 | """ 755 | p[0] = p[1] 756 | p_list = list(p) 757 | schema = None 758 | if "." in p_list: 759 | schema = p_list[-3] 760 | table_name = p_list[-1] 761 | else: 762 | table_name = p_list[-1] 763 | p[0].update({"schema": schema, "table_name": table_name}) 764 | 765 | def p_create_index(self, p: List) -> None: 766 | """create_index : CREATE INDEX id 767 | | CREATE UNIQUE INDEX id 768 | | create_index ON id 769 | | CREATE CLUSTERED INDEX id 770 | """ 771 | p_list = list(p) 772 | if "CLUSTERED" in p_list: 773 | clustered = True 774 | else: 775 | clustered = False 776 | if isinstance(p[1], dict): 777 | p[0] = p[1] 778 | else: 779 | p[0] = { 780 | "schema": None, 781 | "index_name": p_list[-1], 782 | "unique": "UNIQUE" in p_list, 783 | "clustered": clustered, 784 | } 785 | 786 | def extract_check_data(self, p, p_list): 787 | if isinstance(p_list[-1]["check"], list): 788 | check = " ".join(p_list[-1]["check"]) 789 | if isinstance(check, str): 790 | check = {"constraint_name": None, "statement": check} 791 | else: 792 | check = p_list[-1]["check"] 793 | p[0] = self.set_constraint(p[0], "checks", check, check["constraint_name"]) 794 | if not p[0].get("checks"): 795 | p[0]["checks"] = [] 796 | p[0]["checks"].append(check) 797 | return p[0] 798 | 799 | def p_expression_table(self, p: List) -> None: # noqa R701 800 | """expr : table_name defcolumn 801 | | table_name LP defcolumn 802 | | table_name 803 | | expr COMMA defcolumn 804 | | expr COMMA 805 | | expr COMMA constraint 806 | | expr COMMA check_ex 807 | | expr COMMA foreign 808 | | expr COMMA pkey 809 | | expr COMMA uniq 810 | | expr COMMA statem_by_id 811 | | expr COMMA constraint uniq 812 | | expr COMMA period_for 813 | | expr COMMA pkey_constraint 814 | | expr COMMA constraint pkey 815 | | expr COMMA constraint pkey enforced 816 | | expr COMMA constraint foreign ref 817 | | expr COMMA foreign ref 818 | | expr encode 819 | | expr DEFAULT id id id 820 | | expr RP 821 | """ 822 | p[0] = p[1] or defaultdict(list) 823 | p_list = remove_par(list(p)) 824 | if p_list[-1] != "," and p_list[-1] is not None: 825 | if "type" in p_list[-1] and "name" in p_list[-1]: 826 | if not p[0].get("columns"): 827 | p[0]["columns"] = [] 828 | p[0]["columns"].append(p_list[-1]) 829 | elif "check" in p_list[-1]: 830 | p[0] = self.extract_check_data(p, p_list) 831 | elif "enforced" in p_list[-1]: 832 | p_list[-2].update(p_list[-1]) 833 | p[0].update({"primary_key_enforced": p_list[-1]["enforced"]}) 834 | elif 'DEFAULT' in p_list: 835 | p[0].update({"default_charset": p_list[-1]}) 836 | elif isinstance(p_list[-1], dict): 837 | p[0].update(p_list[-1]) 838 | 839 | if isinstance(p_list[-1], dict): 840 | p[0] = self.process_constraints_and_refs(p[0], p_list) 841 | 842 | def process_unique_and_primary_constraint(self, data: Dict, p_list: List) -> Dict: 843 | if p_list[-1].get("unique_statement"): 844 | data = self.set_constraint( 845 | data, 846 | "uniques", 847 | {"columns": p_list[-1]["unique_statement"]}, 848 | p_list[-2]["constraint"]["name"], 849 | ) 850 | else: 851 | data = self.set_constraint( 852 | data, 853 | "primary_keys", 854 | {"columns": p_list[-1]["primary_key"]}, 855 | p_list[-2]["constraint"]["name"], 856 | ) 857 | return data 858 | 859 | def process_constraints_and_refs(self, data: Dict, p_list: List) -> Dict: 860 | 861 | if "constraint" in p_list[-2]: 862 | data = self.process_unique_and_primary_constraint(data, p_list) 863 | elif ( 864 | len(p_list) >= 4 865 | and isinstance(p_list[3], dict) 866 | and p_list[3].get("constraint") 867 | and p_list[3]["constraint"].get("primary_key") 868 | ): 869 | del p_list[3]["constraint"]["primary_key"] 870 | data = self.set_constraint( 871 | target_dict=data, 872 | _type="primary_keys", 873 | constraint=p_list[3]["constraint"], 874 | constraint_name=p_list[3]["constraint"]["name"], 875 | ) 876 | del data["constraint"] 877 | elif p_list[-1].get("references"): 878 | data = self.add_ref_information_to_table(data, p_list) 879 | return data 880 | 881 | def add_ref_information_to_table(self, data, p_list): 882 | if len(p_list) > 4 and "constraint" in p_list[3]: 883 | data = self.set_constraint( 884 | data, 885 | "references", 886 | p_list[-1]["references"], 887 | p_list[3]["constraint"]["name"], 888 | ) 889 | elif isinstance(p_list[-2], list): 890 | if "ref_columns" not in data: 891 | data["ref_columns"] = [] 892 | 893 | for num, column in enumerate(p_list[-2]): 894 | ref = deepcopy(p_list[-1]["references"]) 895 | ref["column"] = ref["columns"][num] 896 | del ref["columns"] 897 | ref["name"] = column 898 | data["ref_columns"].append(ref) 899 | return data 900 | 901 | @staticmethod 902 | def set_constraint( 903 | target_dict: Dict, _type: str, constraint: Dict, constraint_name: str 904 | ) -> Dict: 905 | if not target_dict.get("constraints"): 906 | target_dict["constraints"] = {} 907 | if not target_dict["constraints"].get(_type): 908 | target_dict["constraints"][_type] = [] 909 | if "name" in constraint: 910 | del constraint["name"] 911 | constraint.update({"constraint_name": constraint_name}) 912 | target_dict["constraints"][_type].append(constraint) 913 | return target_dict 914 | 915 | def p_likke(self, p: List) -> None: 916 | """likke : LIKE 917 | | CLONE 918 | """ 919 | p[0] = None 920 | 921 | def p_expression_like_table(self, p: List) -> None: 922 | """expr : table_name likke id 923 | | table_name likke id DOT id 924 | | table_name LP likke id DOT id RP 925 | | table_name LP likke id RP 926 | """ 927 | # get schema & table name 928 | p_list = remove_par(list(p)) 929 | if len(p_list) > 4: 930 | if "." in p: 931 | schema = p_list[-3] 932 | table_name = p_list[-1] 933 | else: 934 | table_name = p_list[-1] 935 | schema = None 936 | p[0] = p[1] 937 | p[0].update({"like": {"schema": schema, "table_name": table_name}}) 938 | 939 | def p_t_name(self, p: List) -> None: 940 | """t_name : id DOT id 941 | | id 942 | | id DOT id DOT id 943 | """ 944 | p_list = list(p) 945 | 946 | project = None 947 | 948 | if len(p) > 3: 949 | if "." in p: 950 | schema = p_list[-3] 951 | table_name = p_list[-1] 952 | if len(p) == 6: 953 | project = p_list[1] 954 | else: 955 | table_name = p_list[-1] 956 | schema = None 957 | 958 | p[0] = {"schema": schema, "table_name": table_name, "columns": [], "checks": []} 959 | 960 | if project: 961 | p[0]["project"] = project 962 | 963 | def p_table_name(self, p: List) -> None: 964 | """table_name : create_table t_name 965 | | table_name likke id 966 | """ 967 | # can contain additional properties like 'external for HQL 968 | p[0] = p[1] 969 | 970 | p[0].update(list(p)[-1]) 971 | 972 | def p_expression_seq(self, p: List) -> None: 973 | """expr : seq_name 974 | | expr INCREMENT id 975 | | expr INCREMENT id id 976 | | expr START id 977 | | expr START id id 978 | | expr MINVALUE id 979 | | expr NO MINVALUE 980 | | expr NO MAXVALUE 981 | | expr MAXVALUE id 982 | | expr CACHE id 983 | | expr CACHE 984 | """ 985 | # get schema & table name 986 | p_list = list(p) 987 | p[0] = p[1] 988 | value = None 989 | if len(p) == 4: 990 | if p[2] == "NO": 991 | value = {p_list[-1].lower(): False} 992 | else: 993 | value = {p[2].lower(): int(p_list[-1])} 994 | elif len(p) == 3: 995 | value = {p[2].lower(): True} 996 | elif len(p) == 5: 997 | value = {f"{p[2].lower()}_{p[3].lower()}": int(p_list[-1])} 998 | if value: 999 | p[0].update(value) 1000 | 1001 | def p_seq_name(self, p: List) -> None: 1002 | """seq_name : create_seq id DOT id 1003 | | create_seq id 1004 | """ 1005 | # get schema & table name 1006 | p_list = list(p) 1007 | schema = None 1008 | if len(p) > 4: 1009 | if "." in p: 1010 | schema = p_list[-3] 1011 | seq_name = p_list[-1] 1012 | else: 1013 | seq_name = p_list[-1] 1014 | p[0] = {"schema": schema, "sequence_name": seq_name} 1015 | 1016 | def p_create_seq(self, p: List) -> None: 1017 | """create_seq : CREATE SEQUENCE IF NOT EXISTS 1018 | | CREATE SEQUENCE 1019 | 1020 | """ 1021 | # get schema & table name 1022 | 1023 | self.add_if_not_exists(p[0], list(p)) 1024 | 1025 | def p_tid(self, p: List) -> None: 1026 | """tid : LT id 1027 | | LT 1028 | | tid LT 1029 | | tid id 1030 | | tid COMMAT 1031 | | tid RT 1032 | """ 1033 | if not isinstance(p[1], list): 1034 | p[0] = [p[1]] 1035 | else: 1036 | p[0] = p[1] 1037 | 1038 | for i in list(p)[2:]: 1039 | if not i == "[]" and not i == ",": 1040 | p[0][0] += f" {i}" 1041 | else: 1042 | p[0][0] += f"{i}" 1043 | 1044 | @staticmethod 1045 | def get_complex_type(p, p_list): 1046 | if len(p_list) == 4: 1047 | p[0]["type"] = f"{p[2]} {p[3][0]}" 1048 | elif p[0]["type"]: 1049 | if len(p[0]["type"]) == 1 and isinstance(p[0]["type"], list): 1050 | p[0]["type"] = p[0]["type"][0] 1051 | p[0]["type"] = f'{p[0]["type"]} {p_list[-1][0]}' 1052 | else: 1053 | p[0]["type"] = p_list[-1][0] 1054 | return p[0] 1055 | 1056 | def extract_references(self, table_data: Dict): 1057 | ref = { 1058 | "table": table_data["table_name"], 1059 | "columns": [None], 1060 | "schema": table_data["schema"], 1061 | "on_delete": None, 1062 | "on_update": None, 1063 | "deferrable_initially": None, 1064 | } 1065 | 1066 | if table_data.get("project"): 1067 | ref["project"] = table_data["project"] 1068 | 1069 | return ref 1070 | 1071 | def p_null(self, p: List) -> None: 1072 | """null : NULL 1073 | | NOT NULL 1074 | """ 1075 | nullable = True 1076 | if "NULL" in p or "null" in p: 1077 | if "NOT" in p or "not" in p: 1078 | nullable = False 1079 | p[0] = {"nullable": nullable} 1080 | 1081 | def p_f_call(self, p: List) -> None: 1082 | """f_call : id LP RP 1083 | | id LP f_call RP 1084 | | id LP multi_id RP 1085 | | id LP pid RP 1086 | """ 1087 | p_list = list(p) 1088 | if isinstance(p[1], list): 1089 | p[0] = p[1] 1090 | p[0].append(p_list[-1]) 1091 | else: 1092 | value = "" 1093 | for elem in p_list[1:]: 1094 | if isinstance(elem, list): 1095 | elem = ",".join(elem) 1096 | value += elem 1097 | p[0] = value 1098 | 1099 | def p_multi_id(self, p: List) -> None: 1100 | """multi_id : id 1101 | | multi_id id 1102 | | f_call 1103 | | multi_id f_call 1104 | """ 1105 | p_list = list(p) 1106 | if isinstance(p[1], list): 1107 | p[0] = p[1] 1108 | p[0].append(p_list[-1]) 1109 | else: 1110 | value = " ".join(p_list[1:]) 1111 | p[0] = value 1112 | 1113 | def p_funct_args(self, p: List) -> None: 1114 | """funct_args : LP multi_id RP""" 1115 | p[0] = {"args": f"({p[2]})"} 1116 | 1117 | def p_funct(self, p: List) -> None: 1118 | """funct : id LP multi_id RP""" 1119 | p[0] = {"func_name": p[1], "args": f"({p[3]})"} 1120 | 1121 | def p_multiple_funct(self, p: List) -> None: 1122 | """multiple_funct : funct 1123 | | multiple_funct COMMA funct 1124 | | multiple_funct COMMA 1125 | """ 1126 | if not isinstance(p[1], list): 1127 | p[0] = [p[1]] 1128 | else: 1129 | p[0] = p[1] 1130 | p[0].append(p[-1]) 1131 | 1132 | def p_funct_expr(self, p: List) -> None: 1133 | """funct_expr : LP multi_id RP 1134 | | multi_id 1135 | """ 1136 | if len(p) > 2: 1137 | p[0] = p[2] 1138 | else: 1139 | p[0] = p[1] 1140 | 1141 | def p_dot_id(self, p: List) -> None: 1142 | """dot_id : id DOT id""" 1143 | p[0] = f"{p[1]}.{p[3]}" 1144 | 1145 | def p_default(self, p: List) -> None: 1146 | """default : DEFAULT id 1147 | | DEFAULT STRING 1148 | | DEFAULT NULL 1149 | | default FOR dot_id 1150 | | DEFAULT funct_expr 1151 | | DEFAULT LP pid RP 1152 | | DEFAULT LP funct_expr pid RP 1153 | | default id 1154 | | default LP RP 1155 | """ 1156 | p_list = remove_par(list(p)) 1157 | 1158 | default = self.pre_process_default(p_list) 1159 | 1160 | if isinstance(p_list[-1], list): 1161 | p_list[-1] = " ".join(p_list[-1]) 1162 | default = " ".join(p_list[1:]) 1163 | elif not isinstance(default, dict) and default.isnumeric(): 1164 | default = int(default) 1165 | 1166 | if isinstance(p[1], dict): 1167 | p[0] = self.process_dict_default_value(p_list, default) 1168 | else: 1169 | p[0] = {"default": default} 1170 | 1171 | @staticmethod 1172 | def pre_process_default(p_list: List) -> Any: 1173 | if len(p_list) == 5 and isinstance(p_list[3], list): 1174 | default = p_list[3][0] 1175 | elif "DEFAULT" in p_list and len(p_list) == 4: 1176 | default = f"{p_list[2]} {p_list[3]}" 1177 | else: 1178 | default = p_list[2] 1179 | return default 1180 | 1181 | @staticmethod 1182 | def process_dict_default_value(p_list: List, default: Any) -> Dict: 1183 | data = p_list[1] 1184 | if "FOR" in default: 1185 | data["default"] = {"next_value_for": p_list[-1]} 1186 | else: 1187 | for i in p_list[2:]: 1188 | if isinstance(p_list[2], str): 1189 | p_list[2] = p_list[2].replace("\\'", "'") 1190 | if i == ")" or i == "(": 1191 | data["default"] = str(data["default"]) + f"{i}" 1192 | else: 1193 | data["default"] = str(data["default"]) + f" {i}" 1194 | data["default"] = data["default"].replace("))", ")") 1195 | return data 1196 | 1197 | def p_enforced(self, p: List) -> None: 1198 | """enforced : ENFORCED 1199 | | NOT ENFORCED 1200 | """ 1201 | p_list = list(p) 1202 | p[0] = {"enforced": len(p_list) == 1} 1203 | 1204 | def p_collate(self, p: List) -> None: 1205 | """collate : COLLATE id 1206 | | COLLATE STRING 1207 | """ 1208 | p_list = list(p) 1209 | p[0] = {"collate": p_list[-1]} 1210 | 1211 | def p_constraint(self, p: List) -> None: 1212 | """ 1213 | constraint : CONSTRAINT id 1214 | """ 1215 | 1216 | p_list = list(p) 1217 | 1218 | p[0] = {"constraint": {"name": p_list[-1]}} 1219 | 1220 | def p_generated(self, p: List) -> None: 1221 | """ 1222 | generated : gen_always funct_expr 1223 | | gen_always funct_expr id 1224 | | gen_always LP multi_id RP 1225 | | gen_always f_call 1226 | """ 1227 | p_list = list(p) 1228 | stored = False 1229 | if len(p) > 3 and p_list[-1].lower() == "stored": 1230 | stored = True 1231 | _as = p[2] 1232 | p[0] = {"generated": {"always": True, "as": _as, "stored": stored}} 1233 | 1234 | def p_gen_always(self, p: List) -> None: 1235 | """ 1236 | gen_always : GENERATED id AS 1237 | """ 1238 | p[0] = {"generated": {"always": True}} 1239 | 1240 | def p_check_st(self, p: List) -> None: 1241 | """check_st : CHECK LP id 1242 | | check_st id 1243 | | check_st STRING 1244 | | check_st id STRING 1245 | | check_st id RP 1246 | | check_st STRING RP 1247 | | check_st funct_args 1248 | | check_st LP pid RP 1249 | """ 1250 | p_list = remove_par(list(p)) 1251 | if isinstance(p[1], dict): 1252 | p[0] = p[1] 1253 | else: 1254 | p[0] = {"check": []} 1255 | for item in p_list[2:]: 1256 | if isinstance(p_list[-1], dict) and p_list[-1].get("args"): 1257 | p[0]["check"][-1] += p_list[-1]["args"] 1258 | elif isinstance(item, list): 1259 | p[0]["check"].append(f"({','.join(item)})") 1260 | else: 1261 | p[0]["check"].append(item) 1262 | 1263 | def p_using_tablespace(self, p: List) -> None: 1264 | """using_tablespace : USING INDEX tablespace""" 1265 | p_list = list(p) 1266 | p[0] = {"using": {"tablespace": p_list[-1], "index": True}} 1267 | 1268 | def p_expression_alter(self, p: List) -> None: 1269 | """expr : alter_foreign ref 1270 | | alter_check 1271 | | alter_unique 1272 | | alter_default 1273 | | alter_primary_key 1274 | | alter_primary_key using_tablespace 1275 | """ 1276 | p[0] = p[1] 1277 | if len(p) == 3: 1278 | p[0].update(p[2]) 1279 | 1280 | def p_alter_primary_key(self, p: List) -> None: 1281 | """alter_primary_key : alt_table PRIMARY KEY LP pid RP 1282 | | alt_table constraint PRIMARY KEY LP pid RP 1283 | """ 1284 | 1285 | p_list = remove_par(list(p)) 1286 | p[0] = p[1] 1287 | p[0]["primary_key"] = {"constraint_name": None, "columns": p_list[-1]} 1288 | if "constraint" in p[2]: 1289 | p[0]["primary_key"]["constraint_name"] = p[2]["constraint"]["name"] 1290 | 1291 | def p_alter_unique(self, p: List) -> None: 1292 | """alter_unique : alt_table UNIQUE LP pid RP 1293 | | alt_table constraint UNIQUE LP pid RP 1294 | """ 1295 | 1296 | p_list = remove_par(list(p)) 1297 | p[0] = p[1] 1298 | p[0]["unique"] = {"constraint_name": None, "columns": p_list[-1]} 1299 | if "constraint" in p[2]: 1300 | p[0]["unique"]["constraint_name"] = p[2]["constraint"]["name"] 1301 | 1302 | @staticmethod 1303 | def get_column_and_value_from_alter(p: List) -> Tuple: 1304 | 1305 | p_list = remove_par(list(p)) 1306 | 1307 | column = None 1308 | value = None 1309 | 1310 | if isinstance(p_list[2], str) and "FOR" == p_list[2].upper(): 1311 | column = p_list[-1] 1312 | elif p[0].get("default") and p[0]["default"].get("value"): 1313 | value = p[0]["default"]["value"] + " " + p_list[-1] 1314 | else: 1315 | value = p_list[-1] 1316 | return column, value 1317 | 1318 | def p_alter_default(self, p: List) -> None: 1319 | """alter_default : alt_table id id 1320 | | alt_table constraint id id 1321 | | alt_table id STRING 1322 | | alt_table constraint id STRING 1323 | | alter_default id 1324 | | alter_default FOR pid 1325 | """ 1326 | 1327 | p[0] = p[1] 1328 | column, value = self.get_column_and_value_from_alter(p) 1329 | 1330 | if "default" not in p[0]: 1331 | 1332 | p[0]["default"] = { 1333 | "constraint_name": None, 1334 | "columns": column, 1335 | "value": value, 1336 | } 1337 | else: 1338 | p[0]["default"].update( 1339 | { 1340 | "columns": p[0]["default"].get("column") or column, 1341 | "value": value or p[0]["default"].get("value"), 1342 | } 1343 | ) 1344 | if "constraint" in p[2]: 1345 | p[0]["default"]["constraint_name"] = p[2]["constraint"]["name"] 1346 | 1347 | def p_pid(self, p: List) -> None: 1348 | """pid : id 1349 | | STRING 1350 | | pid id 1351 | | pid STRING 1352 | | STRING LP RP 1353 | | id LP RP 1354 | | pid COMMA id 1355 | | pid COMMA STRING 1356 | """ 1357 | p_list = list(p) 1358 | 1359 | if len(p_list) == 4 and isinstance(p[1], str): 1360 | p[0] = ["".join(p[1:])] 1361 | elif not isinstance(p_list[1], list): 1362 | p[0] = [p_list[1]] 1363 | else: 1364 | p[0] = p_list[1] 1365 | p[0].append(p_list[-1]) 1366 | 1367 | def p_alter_check(self, p: List) -> None: 1368 | """alter_check : alt_table check_st 1369 | | alt_table constraint check_st 1370 | """ 1371 | p_list = remove_par(list(p)) 1372 | p[0] = p[1] 1373 | if isinstance(p[1], dict): 1374 | p[0] = p[1] 1375 | if not p[0].get("check"): 1376 | p[0]["check"] = {"constraint_name": None, "statement": []} 1377 | if isinstance(p[2], dict) and "constraint" in p[2]: 1378 | p[0]["check"]["constraint_name"] = p[2]["constraint"]["name"] 1379 | p[0]["check"]["statement"] = p_list[-1]["check"] 1380 | 1381 | def p_index_pid(self, p: List) -> None: 1382 | """index_pid : id 1383 | | index_pid id 1384 | | index_pid COMMA index_pid 1385 | """ 1386 | p_list = list(p) 1387 | if len(p_list) == 2: 1388 | detailed_column = {"name": p_list[1], "order": "ASC", "nulls": "LAST"} 1389 | column = p_list[1] 1390 | p[0] = {"detailed_columns": [detailed_column], "columns": [column]} 1391 | else: 1392 | p[0] = p[1] 1393 | if len(p) == 3: 1394 | if p_list[-1] in ["DESC", "ASC"]: 1395 | p[0]["detailed_columns"][0]["order"] = p_list[-1] 1396 | else: 1397 | p[0]["detailed_columns"][0]["nulls"] = p_list[-1] 1398 | 1399 | column = p_list[2] 1400 | elif isinstance(p_list[-1], dict): 1401 | for i in p_list[-1]["columns"]: 1402 | p[0]["columns"].append(i) 1403 | for i in p_list[-1]["detailed_columns"]: 1404 | p[0]["detailed_columns"].append(i) 1405 | 1406 | def p_alter_foreign(self, p: List) -> None: 1407 | """alter_foreign : alt_table foreign 1408 | | alt_table constraint foreign 1409 | """ 1410 | 1411 | p_list = list(p) 1412 | 1413 | p[0] = p[1] 1414 | if isinstance(p_list[-1], list): 1415 | p[0]["columns"] = [{"name": i} for i in p_list[-1]] 1416 | else: 1417 | column = p_list[-1] 1418 | 1419 | if not p[0].get("columns"): 1420 | p[0]["columns"] = [] 1421 | p[0]["columns"].append(column) 1422 | 1423 | for column in p[0]["columns"]: 1424 | if isinstance(p_list[2], dict) and "constraint" in p_list[2]: 1425 | column.update({"constraint_name": p_list[2]["constraint"]["name"]}) 1426 | 1427 | def p_alt_table_name(self, p: List) -> None: 1428 | """alt_table : ALTER TABLE t_name ADD 1429 | | ALTER TABLE IF EXISTS t_name ADD 1430 | | ALTER TABLE ID t_name ADD""" 1431 | p_list = list(p) 1432 | table_data = p_list[-2] 1433 | p[0] = { 1434 | "alter_table_name": table_data["table_name"], 1435 | "schema": table_data["schema"], 1436 | } 1437 | if "IF" in p_list: 1438 | p[0]["if_exists"] = True 1439 | if len(p_list) == 6: 1440 | p[0]["only"] = True 1441 | if table_data.get("project"): 1442 | p[0]["project"] = table_data["project"] 1443 | 1444 | def p_foreign(self, p): 1445 | # todo: need to redone id lists 1446 | """foreign : FOREIGN KEY LP pid RP 1447 | | FOREIGN KEY""" 1448 | p_list = remove_par(list(p)) 1449 | if len(p_list) == 4: 1450 | columns = p_list[-1] 1451 | p[0] = columns 1452 | 1453 | def p_ref(self, p: List) -> None: 1454 | """ref : REFERENCES t_name 1455 | | ref LP pid RP 1456 | | ref ON DELETE id 1457 | | ref ON UPDATE id 1458 | | ref DEFERRABLE INITIALLY id 1459 | | ref NOT DEFERRABLE 1460 | """ 1461 | p_list = remove_par(list(p)) 1462 | if isinstance(p[1], dict): 1463 | p[0] = p[1] 1464 | if "ON" not in p_list and "DEFERRABLE" not in p_list: 1465 | p[0]["references"]["columns"] = p_list[-1] 1466 | else: 1467 | p[0]["references"]["columns"] = p[0]["references"].get( 1468 | "columns", [None] 1469 | ) 1470 | else: 1471 | data = {"references": self.extract_references(p_list[-1])} 1472 | p[0] = data 1473 | p[0] = self.process_references_with_properties(p[0], p_list) 1474 | 1475 | @staticmethod 1476 | def process_references_with_properties(data: Dict, p_list: List) -> Dict: 1477 | if "ON" in p_list: 1478 | if "DELETE" in p_list: 1479 | data["references"]["on_delete"] = p_list[-1] 1480 | elif "UPDATE" in p_list: 1481 | data["references"]["on_update"] = p_list[-1] 1482 | elif "DEFERRABLE" in p_list: 1483 | if "NOT" not in p_list: 1484 | data["references"]["deferrable_initially"] = p_list[-1] 1485 | else: 1486 | data["references"]["deferrable_initially"] = "NOT" 1487 | return data 1488 | 1489 | def p_expression_primary_key(self, p): 1490 | "expr : pkey" 1491 | p[0] = p[1] 1492 | 1493 | def p_uniq(self, p: List) -> None: 1494 | """uniq : UNIQUE LP pid RP""" 1495 | p_list = remove_par(list(p)) 1496 | p[0] = {"unique_statement": p_list[-1]} 1497 | 1498 | def p_statem_by_id(self, p: List) -> None: 1499 | """statem_by_id : id LP pid RP 1500 | | id KEY LP pid RP 1501 | """ 1502 | p_list = remove_par(list(p)) 1503 | if p[1].upper() == "UNIQUE": 1504 | p[0] = {"unique_statement": p_list[-1]} 1505 | elif p[1].upper() == "CHECK": 1506 | p[0] = {"check": p_list[-1]} 1507 | elif p[1].upper() == "PRIMARY": 1508 | p[0] = {"primary_key": p_list[-1]} 1509 | 1510 | def p_pkey(self, p: List) -> None: 1511 | """pkey : pkey_statement LP pid RP 1512 | | pkey_statement ID LP pid RP 1513 | """ 1514 | p_list = remove_par(list(p)) 1515 | 1516 | columns = [] 1517 | 1518 | p[0] = {} 1519 | 1520 | if isinstance(p_list[2], str) and "CLUSTERED" == p_list[2]: 1521 | order = None 1522 | column = None 1523 | for item in p_list[-1]: 1524 | if item not in ["ASC", "DESC"]: 1525 | column = item 1526 | else: 1527 | order = item 1528 | if column and order: 1529 | columns.append({"column": column, "order": order}) 1530 | column = None 1531 | order = None 1532 | p[0]["clustered_primary_key"] = columns 1533 | 1534 | p[0] = self.process_order_in_pk(p[0], p_list) 1535 | 1536 | @staticmethod 1537 | def process_order_in_pk(data: Dict, p_list: List) -> Dict: 1538 | columns = [] 1539 | for item in p_list[-1]: 1540 | if item not in ["ASC", "DESC"]: 1541 | columns.append(item) 1542 | data["primary_key"] = columns 1543 | return data 1544 | 1545 | def p_pkey_statement(self, p: List) -> None: 1546 | """pkey_statement : PRIMARY KEY""" 1547 | p[0] = {"primary_key": None} 1548 | 1549 | def p_comment(self, p: List) -> None: 1550 | """comment : COMMENT STRING""" 1551 | p_list = remove_par(list(p)) 1552 | p[0] = {"comment": check_spec(p_list[-1])} 1553 | 1554 | def p_tablespace(self, p: List) -> None: 1555 | """tablespace : TABLESPACE id 1556 | | TABLESPACE id properties 1557 | """ 1558 | # Initial 5m Next 5m Maxextents Unlimited 1559 | p[0] = self.get_tablespace_data(list(p)) 1560 | 1561 | def p_expr_tablespace(self, p: List) -> None: 1562 | """expr : expr tablespace""" 1563 | p_list = list(p) 1564 | p[0] = p[1] 1565 | p[0]["tablespace"] = p_list[-1] 1566 | -------------------------------------------------------------------------------- /sondesh/output/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/koustreak/Sondesh/1fc5274b21ac3a69de56b6e56b1a67649b486ae1/sondesh/output/__init__.py -------------------------------------------------------------------------------- /sondesh/output/common.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | from copy import deepcopy 5 | from typing import Dict, List, Tuple 6 | 7 | from sondesh.output import dialects as d 8 | 9 | output_modes = [ 10 | "mssql", 11 | "mysql", 12 | "oracle", 13 | "hql", 14 | "sql", 15 | "snowflake", 16 | "redshift", 17 | "bigquery", 18 | ] 19 | 20 | 21 | logger = logging.getLogger('sondesh') 22 | 23 | 24 | def get_table_from_tables_data(tables_dict: Dict, table_id: Tuple[str, str]) -> Dict: 25 | """get table by name and schema or rise exception""" 26 | target_table = tables_dict.get(table_id) 27 | if target_table is None: 28 | 29 | raise ValueError( 30 | f"Found ALTER statement to not existed TABLE {table_id[0]} with SCHEMA {table_id[1]}" 31 | ) 32 | return target_table 33 | 34 | 35 | def add_index_to_table(tables_dict: Dict, statement: Dict, output_mode: str) -> Dict: 36 | """populate 'index' key in output data""" 37 | table_id = (statement["table_name"], statement["schema"]) 38 | target_table = get_table_from_tables_data(tables_dict, table_id) 39 | 40 | del statement["schema"] 41 | del statement["table_name"] 42 | 43 | if output_mode != "mssql": 44 | del statement["clustered"] 45 | 46 | target_table["index"].append(statement) 47 | 48 | return tables_dict 49 | 50 | 51 | def create_alter_column(index: int, column: Dict, ref_statement: Dict) -> Dict: 52 | """create alter column metadata""" 53 | column_reference = ref_statement["columns"][index] 54 | alter_column = { 55 | "name": column["name"], 56 | "constraint_name": column.get("constraint_name"), 57 | } 58 | alter_column["references"] = deepcopy(ref_statement) 59 | alter_column["references"]["column"] = column_reference 60 | del alter_column["references"]["columns"] 61 | return alter_column 62 | 63 | 64 | def prepare_alter_columns(target_table: Dict, statement: Dict) -> Dict: 65 | """prepare alters column metadata""" 66 | alter_columns = [] 67 | for num, column in enumerate(statement["columns"]): 68 | alter_columns.append(create_alter_column(num, column, statement["references"])) 69 | if not target_table["alter"].get("columns"): 70 | target_table["alter"]["columns"] = alter_columns 71 | else: 72 | target_table["alter"]["columns"].extend(alter_columns) 73 | return target_table 74 | 75 | 76 | def add_alter_to_table(tables_dict: Dict, statement: Dict) -> Dict: 77 | """add 'alter' statement to the table""" 78 | table_id = (statement["alter_table_name"], statement["schema"]) 79 | 80 | target_table = get_table_from_tables_data(tables_dict, table_id) 81 | 82 | if "columns" in statement: 83 | prepare_alter_columns(target_table, statement) 84 | elif "check" in statement: 85 | if not target_table["alter"].get("checks"): 86 | target_table["alter"]["checks"] = [] 87 | statement["check"]["statement"] = " ".join(statement["check"]["statement"]) 88 | target_table["alter"]["checks"].append(statement["check"]) 89 | elif "unique" in statement: 90 | target_table = set_alter_to_table_data("unique", statement, target_table) 91 | target_table = set_unique_columns_from_alter(statement, target_table) 92 | elif "default" in statement: 93 | target_table = set_alter_to_table_data("default", statement, target_table) 94 | target_table = set_default_columns_from_alter(statement, target_table) 95 | elif "primary_key" in statement: 96 | target_table = set_alter_to_table_data("primary_key", statement, target_table) 97 | return tables_dict 98 | 99 | 100 | def set_default_columns_from_alter(statement: Dict, target_table: Dict) -> Dict: 101 | for column in target_table["columns"]: 102 | if statement["default"]["columns"]: 103 | for column_name in statement["default"]["columns"]: 104 | if column["name"] == column_name: 105 | column["default"] = statement["default"]["value"] 106 | return target_table 107 | 108 | 109 | def set_unique_columns_from_alter(statement: Dict, target_table: Dict) -> Dict: 110 | for column in target_table["columns"]: 111 | for column_name in statement["unique"]["columns"]: 112 | if column["name"] == column_name: 113 | column["unique"] = True 114 | return target_table 115 | 116 | 117 | def set_alter_to_table_data(key: str, statement: Dict, target_table: Dict) -> Dict: 118 | if not target_table["alter"].get(key + "s"): 119 | target_table["alter"][key + "s"] = [] 120 | if "using" in statement: 121 | statement[key]["using"] = statement["using"] 122 | target_table["alter"][key + "s"].append(statement[key]) 123 | return target_table 124 | 125 | 126 | def init_table_data() -> Dict: 127 | return { 128 | "columns": [], 129 | "primary_key": None, 130 | "alter": {}, 131 | "checks": [], 132 | "index": [], 133 | "partitioned_by": [], 134 | "tablespace": None, 135 | } 136 | 137 | 138 | def process_alter_and_index_result( 139 | tables_dict: Dict, table: Dict, output_mode: str 140 | ) -> Dict: 141 | if table.get("index_name"): 142 | tables_dict = add_index_to_table(tables_dict, table, output_mode) 143 | 144 | elif table.get("alter_table_name"): 145 | tables_dict = add_alter_to_table(tables_dict, table) 146 | 147 | return tables_dict 148 | 149 | 150 | def process_entities(tables_dict: Dict, table: Dict, output_mode: str) -> Dict: 151 | """process tables, types, sequence and etc. data""" 152 | is_it_table = True 153 | 154 | if table.get("table_name"): 155 | table_data = init_table_data() 156 | table_data = d.populate_dialects_table_data(output_mode, table_data) 157 | table_data.update(table) 158 | table_data = set_unique_columns(table_data) 159 | else: 160 | table_data = table 161 | is_it_table = False 162 | 163 | if is_it_table: 164 | table_data = process_is_it_table_item(table_data, tables_dict) 165 | 166 | table_data = normalize_ref_columns_in_final_output(table_data) 167 | 168 | d.dialects_clean_up(output_mode, table_data) 169 | return table_data 170 | 171 | 172 | def result_format( 173 | result: List[Dict], output_mode: str, group_by_type: bool 174 | ) -> List[Dict]: 175 | """method to format final output after parser""" 176 | final_result = [] 177 | tables_dict = {} 178 | for table in result: 179 | # process each item in parser output 180 | if "index_name" in table or "alter_table_name" in table: 181 | tables_dict = process_alter_and_index_result( 182 | tables_dict, table, output_mode 183 | ) 184 | else: 185 | # process tables, types, sequence and etc. data 186 | table_data = process_entities(tables_dict, table, output_mode) 187 | final_result.append(table_data) 188 | if group_by_type: 189 | final_result = group_by_type_result(final_result) 190 | return final_result 191 | 192 | 193 | def process_is_it_table_item(table_data: Dict, tables_dict: Dict) -> Dict: 194 | if table_data.get("table_name"): 195 | tables_dict[(table_data["table_name"], table_data["schema"])] = table_data 196 | else: 197 | logger.error( 198 | "\n Something goes wrong. Possible you try to parse unsupported statement \n " 199 | ) 200 | if not table_data.get("primary_key"): 201 | table_data = check_pk_in_columns_and_constraints(table_data) 202 | else: 203 | table_data = remove_pk_from_columns(table_data) 204 | 205 | if table_data.get("unique"): 206 | table_data = add_unique_columns(table_data) 207 | 208 | for column in table_data["columns"]: 209 | if column["name"] in table_data["primary_key"]: 210 | column["nullable"] = False 211 | return table_data 212 | 213 | 214 | def normalize_ref_columns_in_final_output(table_data: Dict) -> Dict: 215 | # todo: this is hack, need to remove it 216 | if "references" in table_data: 217 | del table_data["references"] 218 | if "ref_columns" in table_data: 219 | for col_ref in table_data["ref_columns"]: 220 | name = col_ref["name"] 221 | for column in table_data["columns"]: 222 | if name == column["name"]: 223 | del col_ref["name"] 224 | column["references"] = col_ref 225 | del table_data["ref_columns"] 226 | return table_data 227 | 228 | 229 | def set_column_unique_param(table_data: Dict, key: str) -> Dict: 230 | for column in table_data["columns"]: 231 | if key == "constraints": 232 | unique = table_data[key].get("unique", []) 233 | if unique: 234 | check_in = unique["columns"] 235 | else: 236 | check_in = [] 237 | else: 238 | check_in = table_data[key] 239 | if column["name"] in check_in: 240 | column["unique"] = True 241 | return table_data 242 | 243 | 244 | def set_unique_columns(table_data: Dict) -> Dict: 245 | 246 | unique_keys = ["unique_statement", "constraints"] 247 | 248 | for key in unique_keys: 249 | if table_data.get(key, None): 250 | # get column names from unique constraints & statements 251 | table_data = set_column_unique_param(table_data, key) 252 | if "unique_statement" in table_data: 253 | del table_data["unique_statement"] 254 | return table_data 255 | 256 | 257 | def group_by_type_result(final_result: List[Dict]) -> Dict[str, List]: 258 | result_as_dict = { 259 | "tables": [], 260 | "types": [], 261 | "sequences": [], 262 | "domains": [], 263 | "schemas": [], 264 | "ddl_properties": [], 265 | "comments": [], 266 | } 267 | keys_map = { 268 | "table_name": "tables", 269 | "sequence_name": "sequences", 270 | "type_name": "types", 271 | "domain_name": "domains", 272 | "schema_name": "schemas", 273 | "tablespace_name": "tablespaces", 274 | "database_name": "databases", 275 | "value": "ddl_properties", 276 | "comments": "comments", 277 | } 278 | for item in final_result: 279 | for key in keys_map: 280 | if key in item: 281 | _type = result_as_dict.get(keys_map.get(key)) 282 | if _type is None: 283 | result_as_dict[keys_map.get(key)] = [] 284 | _type = result_as_dict[keys_map.get(key)] 285 | if key != "comments": 286 | _type.append(item) 287 | else: 288 | _type.extend(item["comments"]) 289 | break 290 | if result_as_dict["comments"] == []: 291 | del result_as_dict["comments"] 292 | return result_as_dict 293 | 294 | 295 | def add_unique_columns(table_data: Dict) -> Dict: 296 | for column in table_data["columns"]: 297 | if column["name"] in table_data["unique"]: 298 | column["unique"] = True 299 | del table_data["unique"] 300 | return table_data 301 | 302 | 303 | def remove_pk_from_columns(table_data: Dict) -> Dict: 304 | for column in table_data["columns"]: 305 | del column["primary_key"] 306 | return table_data 307 | 308 | 309 | def check_pk_in_columns_and_constraints(table_data: Dict) -> Dict: 310 | pk = [] 311 | for column in table_data["columns"]: 312 | if column["primary_key"]: 313 | pk.append(column["name"]) 314 | del column["primary_key"] 315 | if table_data.get("constraints") and table_data["constraints"].get("primary_keys"): 316 | for key_constraints in table_data["constraints"]["primary_keys"]: 317 | pk.extend(key_constraints["columns"]) 318 | table_data["primary_key"] = pk 319 | return table_data 320 | 321 | 322 | def dump_data_to_file(table_name: str, dump_path: str, data: List[Dict]) -> None: 323 | """method to dump json schema""" 324 | if not os.path.isdir(dump_path): 325 | os.makedirs(dump_path, exist_ok=True) 326 | with open("{}/{}_schema.json".format(dump_path, table_name), "w+") as schema_file: 327 | json.dump(data, schema_file, indent=1) 328 | -------------------------------------------------------------------------------- /sondesh/output/dialects.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List 2 | 3 | hql_clean_up_list = ["deferrable_initially"] 4 | 5 | 6 | sql_clean_up_list = [ 7 | "external", 8 | "external", 9 | "stored_as", 10 | "row_format", 11 | "lines_terminated_by", 12 | "fields_terminated_by", 13 | "collection_items_terminated_by", 14 | "map_keys_terminated_by", 15 | ] 16 | 17 | 18 | def add_additional_hql_keys(table_data: Dict) -> Dict: 19 | table_data.update( 20 | { 21 | "stored_as": None, 22 | "location": None, 23 | "comment": None, 24 | "row_format": None, 25 | "fields_terminated_by": None, 26 | "lines_terminated_by": None, 27 | "fields_terminated_by": None, 28 | "map_keys_terminated_by": None, 29 | "collection_items_terminated_by": None, 30 | "external": table_data.get("external", False), 31 | } 32 | ) 33 | return table_data 34 | 35 | 36 | def add_additional_oracle_keys(table_data: Dict) -> Dict: 37 | table_data.update( 38 | { 39 | "constraints": {"uniques": None, "checks": None, "references": None}, 40 | "storage": None, 41 | } 42 | ) 43 | return table_data 44 | 45 | 46 | def update_bigquery_output(table_data: Dict) -> Dict: 47 | if table_data.get("schema"): 48 | table_data["dataset"] = table_data["schema"] 49 | del table_data["schema"] 50 | return table_data 51 | 52 | 53 | def add_additional_redshift_keys(table_data: Dict) -> Dict: 54 | table_data.update( 55 | { 56 | "diststyle": None, 57 | "distkey": None, 58 | "sortkey": {"type": None, "keys": []}, 59 | "encode": None, 60 | "temp": False, 61 | } 62 | ) 63 | return table_data 64 | 65 | 66 | def add_additional_snowflake_keys(table_data: Dict) -> Dict: 67 | table_data.update({"clone": None, "primary_key_enforced": None}) 68 | return table_data 69 | 70 | 71 | def add_additional_oracle_keys_in_column(column_data: Dict) -> Dict: 72 | column_data.update({"encrypt": None}) 73 | return column_data 74 | 75 | 76 | def add_additional_snowflake_keys_in_column(column_data: Dict) -> Dict: 77 | return column_data 78 | 79 | 80 | def add_additional_redshift_keys_in_column(column_data: Dict, table_data: Dict) -> Dict: 81 | column_data["encode"] = column_data.get("encode", None) 82 | if column_data.get("distkey"): 83 | table_data["distkey"] = column_data["name"] 84 | del column_data["distkey"] 85 | return column_data, table_data 86 | 87 | 88 | def add_additional_mssql_keys(table_data: Dict) -> Dict: 89 | table_data.update( 90 | { 91 | "constraints": {"uniques": None, "checks": None, "references": None}, 92 | } 93 | ) 94 | return table_data 95 | 96 | 97 | def clean_up_output(table_data: Dict, key_list: List[str]) -> Dict: 98 | for key in key_list: 99 | if key in table_data: 100 | del table_data[key] 101 | return table_data 102 | 103 | 104 | def populate_dialects_table_data(output_mode: str, table_data: Dict) -> Dict: 105 | 106 | mehtod_mapper = { 107 | "hql": add_additional_hql_keys, 108 | "mssql": add_additional_mssql_keys, 109 | "mysql": add_additional_mssql_keys, 110 | "oracle": add_additional_oracle_keys, 111 | "redshift": add_additional_redshift_keys, 112 | "snowflake": add_additional_snowflake_keys, 113 | } 114 | 115 | method = mehtod_mapper.get(output_mode) 116 | 117 | if method: 118 | table_data = method(table_data) 119 | 120 | return table_data 121 | 122 | 123 | def key_cleaning(table_data: Dict, output_mode: str) -> Dict: 124 | if output_mode != "hql": 125 | table_data = clean_up_output(table_data, sql_clean_up_list) 126 | else: 127 | table_data = clean_up_output(table_data, hql_clean_up_list) 128 | # todo: need to figure out how workaround it normally 129 | if "_ddl_parser_comma_only_str" == table_data.get("fields_terminated_by"): 130 | table_data["fields_terminated_by"] = "','" 131 | return table_data 132 | 133 | 134 | def process_redshift_dialect(table_data: List[Dict]) -> List[Dict]: 135 | for column in table_data.get("columns", []): 136 | column, table_data = add_additional_redshift_keys_in_column(column, table_data) 137 | if table_data.get("encode"): 138 | column["encode"] = column["encode"] or table_data.get("encode") 139 | return table_data 140 | 141 | 142 | def dialects_clean_up(output_mode: str, table_data: Dict) -> Dict: 143 | key_cleaning(table_data, output_mode) 144 | update_mappers_for_table_properties = {"bigquery": update_bigquery_output} 145 | update_table_prop = update_mappers_for_table_properties.get(output_mode) 146 | if update_table_prop: 147 | table_data = update_table_prop(table_data) 148 | 149 | if output_mode == "oracle": 150 | for column in table_data.get("columns", []): 151 | column = add_additional_oracle_keys_in_column(column) 152 | elif output_mode == "snowflake": 153 | # can be no columns if it is a create database or create schema 154 | for column in table_data.get("columns", []): 155 | column = add_additional_snowflake_keys_in_column(column) 156 | 157 | elif output_mode == "redshift": 158 | table_data = process_redshift_dialect(table_data) 159 | return table_data 160 | -------------------------------------------------------------------------------- /sondesh/parser.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | import re 5 | from typing import Dict, List, Optional, Tuple, Union 6 | 7 | from ply import lex, yacc 8 | 9 | from sondesh.output.common import dump_data_to_file, result_format 10 | from sondesh.utils import find_first_unpair_closed_par 11 | 12 | # open comment 13 | OP_COM = "/*" 14 | # close comment 15 | CL_COM = "*/" 16 | 17 | IN_COM = "--" 18 | MYSQL_COM = "#" 19 | 20 | 21 | def set_logging_config( 22 | log_level: Union[str, int], 23 | log_file: Optional[str] = None) -> None: 24 | 25 | if log_file: 26 | logging.basicConfig( 27 | level=log_level, 28 | filename=log_file, 29 | filemode="w", 30 | format="%(filename)10s:%(lineno)4d:%(message)s", 31 | ) 32 | else: 33 | logging.basicConfig( 34 | level=log_level, 35 | format="%(filename)10s:%(lineno)4d:%(message)s", 36 | ) 37 | 38 | 39 | class Parser: 40 | """ 41 | Base class for a lexer/parser that has the rules defined as methods 42 | 43 | It could not be loaded or called without Subclass, 44 | 45 | for example: DDLParser 46 | 47 | Subclass must include tokens for parser and rules 48 | 49 | This class contains logic for lines pre-processing before passing them to lexx&yacc parser: 50 | 51 | - clean up 52 | - catch comments 53 | - catch statements like 'SET' (they are not parsed by parser) 54 | - etc 55 | """ 56 | 57 | def __init__( 58 | self, 59 | content: str, 60 | silent: bool = True, 61 | debug: bool = False, 62 | normalize_names: bool = False, 63 | log_file: Optional[str] = None, 64 | log_level: Union[str, int] = logging.DEBUG, 65 | ) -> None: 66 | """ 67 | content: is a file content for processing 68 | silent: if true - will not raise errors, just return empty output 69 | debug: if True - parser will produce huge tokens tree & parser.out file, normally you don't want this enable 70 | normalize_names: if flag is True (default 'False') then all identifiers will be returned without 71 | '[', '"' and other delimeters that used in different SQL dialects to separate custom names 72 | from reserverd words & statements. 73 | For example, if flag set 'True' and you pass this input: 74 | 75 | CREATE TABLE [dbo].[TO_Requests]( 76 | [Request_ID] [int] IDENTITY(1,1) NOT NULL, 77 | [user_id] [int] 78 | 79 | In output you will have names like 'dbo' and 'TO_Requests', not '[dbo]' and '[TO_Requests]'. 80 | log_file: path to file for logging 81 | log_level: set logging level for parser 82 | """ 83 | self.tables = [] 84 | self.silent = not debug if debug else silent 85 | self.data = content.encode("unicode_escape") 86 | self.paren_count = 0 87 | self.normalize_names = normalize_names 88 | set_logging_config(log_level, log_file) 89 | log = logging.getLogger() 90 | self.lexer = lex.lex(object=self, debug=False, debuglog=log) 91 | self.yacc = yacc.yacc(module=self, debug=False, debuglog=log) 92 | self.columns_closed = False 93 | self.statement = None 94 | self.block_comments = [] 95 | self.comments = [] 96 | 97 | def catch_comment_or_process_line(self, code_line: str) -> str: 98 | if self.multi_line_comment: 99 | self.comments.append(self.line) 100 | if CL_COM in self.line: 101 | self.multi_line_comment = False 102 | return '' 103 | 104 | elif not ( 105 | self.line.strip().startswith(MYSQL_COM) 106 | or self.line.strip().startswith(IN_COM) 107 | ): 108 | return self.process_inline_comments(code_line) 109 | return code_line 110 | 111 | def pre_process_line(self) -> Tuple[str, List]: 112 | code_line = "" 113 | comma_only_str = r"((\')|(' ))+(,)((\')|( '))+\B" 114 | self.line = re.sub(comma_only_str, "_ddl_parser_comma_only_str", self.line) 115 | code_line = self.catch_comment_or_process_line(code_line) 116 | if self.line.startswith(OP_COM) and CL_COM not in self.line: 117 | self.multi_line_comment = True 118 | elif self.line.startswith(CL_COM): 119 | self.multi_line_comment = False 120 | self.line = code_line 121 | 122 | def process_in_comment(self, line: str) -> str: 123 | if re.search(r"((\")|(\'))+(.)*(--)+(.)*((\")|(\'))+", line): 124 | code_line = line 125 | else: 126 | splitted_line = line.split(IN_COM) 127 | code_line = splitted_line[0] 128 | self.comments.append(splitted_line[1]) 129 | return code_line 130 | 131 | def process_line_before_comment(self) -> str: 132 | """ get useful codeline - remove comment """ 133 | code_line = "" 134 | if IN_COM in self.line: 135 | code_line = self.process_in_comment(self.line) 136 | elif CL_COM not in self.line and OP_COM not in self.line: 137 | code_line = self.line 138 | return code_line 139 | 140 | def process_inline_comments(self, code_line: str) -> Tuple[str, List]: 141 | """ this method сatches comments like "create table ( # some comment" - inline this statement""" 142 | comment = None 143 | code_line = self.process_line_before_comment() 144 | if OP_COM in self.line: 145 | splitted_line = self.line.split(OP_COM) 146 | code_line += splitted_line[0] 147 | comment = splitted_line[1] 148 | self.block_comments.append(OP_COM) 149 | if CL_COM in code_line and self.block_comments: 150 | splitted_line = self.line.split(CL_COM) 151 | self.block_comments.pop(-1) 152 | code_line += splitted_line[1] 153 | comment = splitted_line[0] 154 | 155 | if comment: 156 | self.comments.append(comment) 157 | return code_line 158 | 159 | def process_regex_input(self, data): 160 | regex = data.split('"input.regex"')[1].split("=")[1] 161 | index = find_first_unpair_closed_par(regex) 162 | regex = regex[:index] 163 | data = data.replace(regex, " lexer_state_regex ") 164 | data = data.replace('"input.regex"', "parse_m_input_regex") 165 | self.lexer.state = {"lexer_state_regex": regex} 166 | return data 167 | 168 | def pre_process_data(self, data): 169 | data = data.decode("utf-8") 170 | # todo: not sure how to workaround ',' normal way 171 | if "input.regex" in data: 172 | data = self.process_regex_input(data) 173 | 174 | data = ( 175 | data.replace(",", " , ") 176 | .replace("(", " ( ") 177 | .replace(")", " ) ") 178 | .replace("\\x", "\\0") 179 | .replace("‘", "'") 180 | .replace("’", "'") 181 | .replace("\\u2018", "'") 182 | .replace("\\u2019", "'") 183 | .replace("'\\t'", "'pars_m_t'") 184 | .replace("'\\n'", "'pars_m_n'") 185 | .replace("\\'", "pars_m_single") 186 | .replace("\\t", " ") 187 | ) 188 | return data 189 | 190 | def process_set(self) -> None: 191 | self.set_line = self.set_line.split() 192 | if self.set_line[-2] == "=": 193 | name = self.set_line[1] 194 | else: 195 | name = self.set_line[-2] 196 | value = self.set_line[-1].replace(";", "") 197 | self.tables.append({"name": name, "value": value}) 198 | 199 | def parse_set_statement(self): 200 | if re.match(r"SET ", self.line.upper()): 201 | self.set_was_in_line = True 202 | if not self.set_line: 203 | self.set_line = self.line 204 | else: 205 | self.process_set() 206 | self.set_line = self.line 207 | elif (self.set_line and len(self.set_line.split()) == 3) or ( 208 | self.set_line and self.set_was_in_line 209 | ): 210 | self.process_set() 211 | self.set_line = None 212 | self.set_was_in_line = False 213 | 214 | def check_new_statement_start(self, line: str) -> bool: 215 | self.new_statement = False 216 | if self.statement and self.statement.count("(") == self.statement.count(")"): 217 | new_statements_tokens = ["ALTER ", "CREATE ", "DROP ", "SET "] 218 | for key in new_statements_tokens: 219 | if line.upper().startswith(key): 220 | self.new_statement = True 221 | return self.new_statement 222 | 223 | def check_line_on_skip_words(self) -> bool: 224 | skip_regex = r"^(GO|USE|INSERT)\b" 225 | 226 | self.skip = False 227 | 228 | if re.match(skip_regex, self.line.upper()): 229 | self.skip = True 230 | return self.skip 231 | 232 | def add_line_to_statement(self) -> str: 233 | 234 | if ( 235 | self.line 236 | and not self.skip 237 | and not self.set_was_in_line 238 | and not self.new_statement 239 | ): 240 | if self.statement is None: 241 | self.statement = self.line 242 | else: 243 | self.statement += f" {self.line}" 244 | 245 | def parse_data(self) -> List[Dict]: 246 | self.tables: List[Dict] = [] 247 | data = self.pre_process_data(self.data) 248 | lines = data.replace("\\t", "").split("\\n") 249 | 250 | self.set_line: Optional[str] = None 251 | 252 | self.set_was_in_line: bool = False 253 | 254 | self.multi_line_comment = False 255 | 256 | for num, self.line in enumerate(lines): 257 | self.process_line(num != len(lines) - 1) 258 | if self.comments: 259 | self.tables.append({"comments": self.comments}) 260 | return self.tables 261 | 262 | def process_line( 263 | self, 264 | last_line: bool, 265 | ) -> Tuple[Optional[str], bool]: 266 | self.pre_process_line() 267 | 268 | self.line = self.line.strip().replace("\n", "").replace("\t", "") 269 | self.skip = self.check_line_on_skip_words() 270 | 271 | self.parse_set_statement() 272 | # to avoid issues when comma or parath are glued to column name 273 | self.check_new_statement_start(self.line) 274 | 275 | final_line = self.line.endswith(";") and not self.set_was_in_line 276 | self.add_line_to_statement() 277 | 278 | if (final_line or self.new_statement) and self.statement: 279 | # end of sql operation, remove ; from end of line 280 | self.statement = self.statement[:-1] 281 | elif last_line and not self.skip: 282 | # continue combine lines in one massive 283 | return 284 | 285 | self.set_default_flags_in_lexer() 286 | 287 | self.process_statement() 288 | 289 | def process_statement(self) -> None: 290 | 291 | if not self.set_line and self.statement: 292 | self.parse_statement() 293 | if self.new_statement: 294 | self.statement = self.line 295 | else: 296 | self.statement = None 297 | 298 | def parse_statement(self) -> None: 299 | 300 | _parse_result = yacc.parse(self.statement) 301 | if _parse_result: 302 | self.tables.append(_parse_result) 303 | 304 | def set_default_flags_in_lexer(self) -> None: 305 | attrs = [ 306 | "is_table", 307 | "sequence", 308 | "last_token", 309 | "columns_def", 310 | "after_columns", 311 | "check", 312 | "is_table", 313 | "last_par", 314 | "lp_open", 315 | "is_alter", 316 | "is_like", 317 | ] 318 | for attr in attrs: 319 | setattr(self.lexer, attr, False) 320 | self.lexer.lt_open = 0 321 | 322 | def run( 323 | self, 324 | *, 325 | dump: bool = False, 326 | dump_path="schemas", 327 | file_path: Optional[str] = None, 328 | output_mode: str = "sql", 329 | group_by_type: bool = False, 330 | json_dump=False, 331 | ) -> List[Dict]: 332 | """ 333 | dump: provide 'True' if you need to dump output in file 334 | dump_path: folder where you want to store result dump files 335 | file_path: pass full path to ddl file if you want to use this 336 | file name as name for the target output file 337 | output_mode: change output mode to get information relative to specific dialect, 338 | for example, in output_mode='hql' you will see also in self.tables such information as 339 | 'external', 'stored_as', etc. Possible variants: ["mssql", "mysql", "oracle", "hql", "sql", "redshift"] 340 | group_by_type: if you set True, output will be formed as Dict with keys ['self.tables', 341 | 'sequences', 'types', 'domains'] 342 | and each dict will contain list of parsed entities. Without it output is a List with Dicts where each 343 | Dict == one entity from ddl - one table or sequence or type. 344 | """ 345 | self.tables = self.parse_data() 346 | self.tables = result_format(self.tables, output_mode, group_by_type) 347 | if dump: 348 | if file_path: 349 | # if we run parse from one file - save same way to one file 350 | dump_data_to_file( 351 | os.path.basename(file_path).split(".")[0], dump_path, self.tables 352 | ) 353 | else: 354 | for table in self.tables: 355 | dump_data_to_file(table["table_name"], dump_path, table) 356 | if json_dump: 357 | self.tables = json.dumps(self.tables) 358 | return self.tables 359 | -------------------------------------------------------------------------------- /sondesh/tokens.py: -------------------------------------------------------------------------------- 1 | # statements that used at the start of defenition or in statements without columns 2 | defenition_statements = { 3 | "DROP": "DROP", 4 | "CREATE": "CREATE", 5 | "TABLE": "TABLE", 6 | "DATABASE": "DATABASE", 7 | "SCHEMA": "SCHEMA", 8 | "ALTER": "ALTER", 9 | "TYPE": "TYPE", 10 | "DOMAIN": "DOMAIN", 11 | "REPLACE": "REPLACE", 12 | "OR": "OR", 13 | "CLUSTERED": "CLUSTERED", 14 | "SEQUENCE": "SEQUENCE", 15 | "TABLESPACE": "TABLESPACE", 16 | } 17 | common_statements = { 18 | "INDEX": "INDEX", 19 | "REFERENCES": "REFERENCES", 20 | "KEY": "KEY", 21 | "ADD": "ADD", 22 | "AS": "AS", 23 | "CLONE": "CLONE", 24 | "DEFERRABLE": "DEFERRABLE", 25 | "INITIALLY": "INITIALLY", 26 | "IF": "IF", 27 | "NOT": "NOT", 28 | "EXISTS": "EXISTS", 29 | "ON": "ON", 30 | "FOR": "FOR", 31 | "ENCRYPT": "ENCRYPT", 32 | "SALT": "SALT", 33 | "NO": "NO", 34 | "USING": "USING", 35 | # bigquery 36 | "OPTIONS": "OPTIONS", 37 | } 38 | 39 | columns_defenition = { 40 | "DELETE": "DELETE", 41 | "UPDATE": "UPDATE", 42 | "NULL": "NULL", 43 | "ARRAY": "ARRAY", 44 | ",": "COMMA", 45 | "DEFAULT": "DEFAULT", 46 | "COLLATE": "COLLATE", 47 | "ENFORCED": "ENFORCED", 48 | "ENCODE": "ENCODE", 49 | "GENERATED": "GENERATED", 50 | "COMMENT": "COMMENT" 51 | } 52 | first_liners = { 53 | "LIKE": "LIKE", 54 | "CONSTRAINT": "CONSTRAINT", 55 | "FOREIGN": "FOREIGN", 56 | "PRIMARY": "PRIMARY", 57 | "UNIQUE": "UNIQUE", 58 | "CHECK": "CHECK", 59 | "WITH": "WITH", 60 | } 61 | 62 | common_statements.update(first_liners) 63 | defenition_statements.update(common_statements) 64 | after_columns_tokens = { 65 | "PARTITIONED": "PARTITIONED", 66 | "PARTITION": "PARTITION", 67 | "BY": "BY", 68 | # hql 69 | "INTO": "INTO", 70 | "STORED": "STORED", 71 | "LOCATION": "LOCATION", 72 | "ROW": "ROW", 73 | "FORMAT": "FORMAT", 74 | "TERMINATED": "TERMINATED", 75 | "COLLECTION": "COLLECTION", 76 | "ITEMS": "ITEMS", 77 | "MAP": "MAP", 78 | "KEYS": "KEYS", 79 | "SERDE": "SERDE", 80 | "CLUSTER": "CLUSTER", 81 | "SERDEPROPERTIES": "SERDEPROPERTIES", 82 | "TBLPROPERTIES": "TBLPROPERTIES", 83 | "USING": "USING", 84 | "SKEWED": "SKEWED", 85 | # oracle 86 | "STORAGE": "STORAGE", 87 | "TABLESPACE": "TABLESPACE", 88 | # mssql 89 | "TEXTIMAGE_ON": "TEXTIMAGE_ON", 90 | } 91 | sequence_reserved = { 92 | "INCREMENT": "INCREMENT", 93 | "START": "START", 94 | "MINVALUE": "MINVALUE", 95 | "MAXVALUE": "MAXVALUE", 96 | "CACHE": "CACHE", 97 | "NO": "NO", 98 | } 99 | 100 | 101 | tokens = tuple( 102 | set( 103 | ["ID", "DOT", "STRING", "DQ_STRING", "LP", "RP", "LT", "RT", "COMMAT", "AUTOINCREMENT"] 104 | + list(defenition_statements.values()) 105 | + list(common_statements.values()) 106 | + list(columns_defenition.values()) 107 | + list(sequence_reserved.values()) 108 | + list(after_columns_tokens.values()) 109 | ) 110 | ) 111 | 112 | symbol_tokens = { 113 | ")": "RP", 114 | "(": "LP", 115 | } 116 | 117 | symbol_tokens_no_check = {"<": "LT", ">": "RT"} 118 | -------------------------------------------------------------------------------- /sondesh/utils.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | 4 | def remove_par(p_list: List[str]) -> List[str]: 5 | remove_list = ["(", ")"] 6 | for symbol in remove_list: 7 | while symbol in p_list: 8 | p_list.remove(symbol) 9 | return p_list 10 | 11 | 12 | spec_mapper = { 13 | "'pars_m_t'": "'\t'", 14 | "'pars_m_n'": "'\n'", 15 | "'pars_m_dq'": '"', 16 | "pars_m_single": "'", 17 | } 18 | 19 | 20 | def check_spec(value: str) -> str: 21 | replace_value = spec_mapper.get(value) 22 | if not replace_value: 23 | for item in spec_mapper: 24 | if item in value: 25 | replace_value = value.replace(item, spec_mapper[item]) 26 | break 27 | else: 28 | replace_value = value 29 | return replace_value 30 | 31 | 32 | def find_first_unpair_closed_par(str_: str) -> int: 33 | stack = [] 34 | n = -1 35 | for i in str_: 36 | n += 1 37 | if i == ")": 38 | if not stack: 39 | return n 40 | else: 41 | stack.pop(-1) 42 | elif i == "(": 43 | stack.append(i) 44 | -------------------------------------------------------------------------------- /test/read_from_file.py: -------------------------------------------------------------------------------- 1 | import pprint 2 | from sondesh.ddl_parser import parse_from_file 3 | 4 | result = parse_from_file('sql_files/one.sql') 5 | pprint.pprint(result) 6 | -------------------------------------------------------------------------------- /test/sql_files/one.sql: -------------------------------------------------------------------------------- 1 | create table sales( 2 | salesid integer not null, 3 | listid integer not null, 4 | sellerid integer not null, 5 | buyerid integer not null encode auto, 6 | eventid integer not null encode mostly16, 7 | dateid smallint, 8 | qtysold smallint not null encode mostly8, 9 | pricepaid decimal(8,2) encode delta32k, 10 | commission decimal(8,2) encode delta32k, 11 | saletime timestamp, 12 | test_col varchar(160), 13 | test_col2 varchar(130), 14 | primary key(salesid), 15 | foreign key(listid) references listing(listid), 16 | foreign key(sellerid) references users(userid), 17 | foreign key(buyerid) references users(userid), 18 | foreign key(dateid) references date(dateid) 19 | ) 20 | diststyle auto1 21 | compound sortkey(salesid,sellerid); -------------------------------------------------------------------------------- /test/sql_files/test_sql.sql: -------------------------------------------------------------------------------- 1 | create table sales( 2 | salesid integer not null, 3 | listid integer not null, 4 | sellerid integer not null, 5 | buyerid integer not null encode auto, 6 | eventid integer not null encode mostly16, 7 | dateid smallint, 8 | qtysold smallint not null encode mostly8, 9 | pricepaid decimal(8,2) encode delta32k, 10 | commission decimal(8,2) encode delta32k, 11 | saletime timestamp without time zone encode az64, 12 | test_col varchar(100), 13 | primary key(salesid), 14 | foreign key(listid) references listing(listid), 15 | foreign key(sellerid) references users(userid), 16 | foreign key(buyerid) references users(userid), 17 | foreign key(dateid) references date(dateid) 18 | ) 19 | diststyle auto1 20 | compound sortkey(salesid,sellerid); -------------------------------------------------------------------------------- /test/sql_files/two.sql: -------------------------------------------------------------------------------- 1 | create table sales( 2 | salesid integer not null, 3 | listid integer not null, 4 | sellerid varchar not null, 5 | buyerid integer not null encode auto, 6 | eventid integer not null encode mostly16, 7 | dateid smallint not null, 8 | qtysold smallint not null encode mostly8, 9 | pricepaid decimal(8,2) encode delta32k, 10 | commission decimal(8,2) encode delta32k, 11 | saletime timestamp without time zone encode az64, 12 | test_col varchar(120), 13 | primary key(salesid), 14 | foreign key(listid) references listing(listid), 15 | foreign key(sellerid) references users(userid), 16 | foreign key(buyerid) references users(userid), 17 | foreign key(dateid) references date(dateid) 18 | ) 19 | diststyle auto 20 | compound sortkey(listid,sellerid); -------------------------------------------------------------------------------- /test/test_oracle.py: -------------------------------------------------------------------------------- 1 | import pprint 2 | 3 | from sondesh.ddl_parser import parse_the_ddl 4 | 5 | def test_oracle_ddl(): 6 | 7 | ddl = ''' 8 | CREATE TABLE employee ( 9 | employee_id number(100), 10 | first_name VARCHAR2(128) NOT NULL, 11 | last_name VARCHAR2(128) NOT NULL, 12 | salary NUMBER(6) ENCRYPT USING 'SHA256', 13 | emp_photo Blob, 14 | dept_id NUMBER(10), 15 | car_vin_no NUMBER(*,10), 16 | include_exclude_ind CHAR(1) DEFAULT 'Y', 17 | TEXT2_ NVARCHAR2(2000), 18 | CONSTRAINT check_employee_name CHECK (first_name = upper(first_name)), 19 | CONSTRAINT dept_fk FOREIGN KEY(dept_id) REFERENCES department(dept_id), 20 | CONSTRAINT employees_pk PRIMARY KEY (employee_id) 21 | ) 22 | PARTITION BY REFERENCE(dept_fk) 23 | Storage ( Initial 5m Next 5m Maxextents Unlimited ) 24 | ; 25 | ''' 26 | 27 | result = parse_the_ddl(ddl).run(group_by_type=True) 28 | pprint.pprint(result) 29 | 30 | expected = ''' 31 | {'ddl_properties': [], 32 | 'domains': [], 33 | 'schemas': [], 34 | 'sequences': [], 35 | 'tables': [{'alter': {}, 36 | 'checks': [{'constraint_name': 'check_employee_name', 37 | 'statement': 'first_name = upper(first_name)'}], 38 | 'columns': [{'check': None, 39 | 'default': None, 40 | 'name': 'employee_id', 41 | 'nullable': False, 42 | 'references': None, 43 | 'size': 100, 44 | 'type': 'number', 45 | 'unique': False}, 46 | {'check': None, 47 | 'default': None, 48 | 'name': 'first_name', 49 | 'nullable': False, 50 | 'references': None, 51 | 'size': 128, 52 | 'type': 'VARCHAR2', 53 | 'unique': False}, 54 | {'check': None, 55 | 'default': None, 56 | 'name': 'last_name', 57 | 'nullable': False, 58 | 'references': None, 59 | 'size': 128, 60 | 'type': 'VARCHAR2', 61 | 'unique': False}, 62 | {'check': None, 63 | 'default': None, 64 | 'encrypt': {'encryption_algorithm': "'SHA256'", 65 | 'integrity_algorithm': 'SHA-1', 66 | 'salt': True}, 67 | 'name': 'salary', 68 | 'nullable': True, 69 | 'references': None, 70 | 'size': 6, 71 | 'type': 'NUMBER', 72 | 'unique': False}, 73 | {'check': None, 74 | 'default': None, 75 | 'name': 'emp_photo', 76 | 'nullable': True, 77 | 'references': None, 78 | 'size': None, 79 | 'type': 'Blob', 80 | 'unique': False}, 81 | {'check': None, 82 | 'default': None, 83 | 'name': 'dept_id', 84 | 'nullable': True, 85 | 'references': None, 86 | 'size': 10, 87 | 'type': 'NUMBER', 88 | 'unique': False}, 89 | {'check': None, 90 | 'default': None, 91 | 'name': 'car_vin_no', 92 | 'nullable': True, 93 | 'references': None, 94 | 'size': ('*', 10), 95 | 'type': 'NUMBER', 96 | 'unique': False}, 97 | {'check': None, 98 | 'default': "'Y'", 99 | 'name': 'include_exclude_ind', 100 | 'nullable': True, 101 | 'references': None, 102 | 'size': 1, 103 | 'type': 'CHAR', 104 | 'unique': False}, 105 | {'check': None, 106 | 'default': None, 107 | 'name': 'TEXT2_', 108 | 'nullable': True, 109 | 'references': None, 110 | 'size': 2000, 111 | 'type': 'NVARCHAR2', 112 | 'unique': False}], 113 | 'constraints': {'checks': [{'constraint_name': 'check_employee_name', 114 | 'statement': 'first_name = ' 115 | 'upper(first_name)'}], 116 | 'primary_keys': [{'columns': ['employee_id'], 117 | 'constraint_name': 'employees_pk'}], 118 | 'references': [{'columns': ['dept_id'], 119 | 'constraint_name': 'dept_fk', 120 | 'deferrable_initially': None, 121 | 'on_delete': None, 122 | 'on_update': None, 123 | 'schema': None, 124 | 'table': 'department'}]}, 125 | 'index': [], 126 | 'partition_by': {'columns': ['dept_fk'], 'type': 'REFERENCE'}, 127 | 'partitioned_by': [], 128 | 'primary_key': ['employee_id'], 129 | 'schema': None, 130 | 'storage': {'initial': '5m', 131 | 'maxextents': 'Unlimited', 132 | 'next': '5m'}, 133 | 'table_name': 'employee', 134 | 'tablespace': None}], 135 | 'types': []} 136 | ''' 137 | #assert expected == result 138 | pprint.pprint(result['tables'][0]['checks']) 139 | 140 | test_oracle_ddl() -------------------------------------------------------------------------------- /test/test_redshift.py: -------------------------------------------------------------------------------- 1 | import pprint 2 | 3 | from sondesh.ddl_parser import parse_the_ddl 4 | 5 | def test_redshift(): 6 | 7 | ddl = ''' 8 | create table sales( 9 | salesid integer not null, 10 | listid integer not null, 11 | sellerid integer not null, 12 | buyerid integer not null encode auto, 13 | eventid integer not null encode mostly16, 14 | dateid smallint not null, 15 | qtysold smallint not null encode mostly8, 16 | pricepaid decimal(8,2) encode delta32k, 17 | commission decimal(8,2) encode delta32k, 18 | saletime timestamp, 19 | primary key(salesid), 20 | foreign key(listid) references listing(listid), 21 | foreign key(sellerid) references users(userid), 22 | foreign key(buyerid) references users(userid), 23 | foreign key(dateid) references date(dateid) 24 | ) 25 | distkey(listid) 26 | compound sortkey(listid,sellerid) 27 | ''' 28 | result = parse_the_ddl(ddl).run(group_by_type=True, output_mode="redshift") 29 | pprint.pprint(result) 30 | 31 | test_redshift() --------------------------------------------------------------------------------